diff mbox series

[v5,bpf-next,4/8] bpf: implement numbers iterator

Message ID 20230308184121.1165081-5-andrii@kernel.org (mailing list archive)
State Accepted
Commit 6018e1f407cccf39b804d1f75ad4de7be4e6cc45
Delegated to: BPF
Headers show
Series BPF open-coded iterators | expand

Checks

Context Check Description
bpf/vmtest-bpf-next-PR success PR summary
bpf/vmtest-bpf-next-VM_Test-39 success Logs for test_verifier on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-40 success Logs for test_verifier on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-2 success Logs for build for aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-3 success Logs for build for aarch64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-4 success Logs for build for s390x with gcc
bpf/vmtest-bpf-next-VM_Test-9 success Logs for test_maps on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-12 success Logs for test_maps on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-13 success Logs for test_maps on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-19 success Logs for test_progs_no_alu32 on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-22 success Logs for test_progs_no_alu32 on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-23 success Logs for test_progs_no_alu32 on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-24 success Logs for test_progs_no_alu32_parallel on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-25 success Logs for test_progs_no_alu32_parallel on aarch64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-27 success Logs for test_progs_no_alu32_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-28 success Logs for test_progs_no_alu32_parallel on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-29 success Logs for test_progs_parallel on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-30 success Logs for test_progs_parallel on aarch64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-32 success Logs for test_progs_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-33 success Logs for test_progs_parallel on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-34 success Logs for test_verifier on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-35 success Logs for test_verifier on aarch64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-37 success Logs for test_verifier on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-38 success Logs for test_verifier on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-10 success Logs for test_maps on aarch64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-14 success Logs for test_progs on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-15 success Logs for test_progs on aarch64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-17 success Logs for test_progs on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-18 success Logs for test_progs on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-20 success Logs for test_progs_no_alu32 on aarch64 with llvm-17
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for bpf-next, async
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit fail Errors and warnings before: 1787 this patch: 1790
netdev/cc_maintainers warning 8 maintainers not CCed: song@kernel.org sdf@google.com haoluo@google.com yhs@fb.com john.fastabend@gmail.com kpsingh@kernel.org jolsa@kernel.org martin.lau@linux.dev
netdev/build_clang success Errors and warnings before: 178 this patch: 178
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn fail Errors and warnings before: 1783 this patch: 1786
netdev/checkpatch fail CHECK: multiple assignments should be avoided ERROR: "foo* bar" should be "foo *bar" WARNING: line length of 86 exceeds 80 columns WARNING: line length of 96 exceeds 80 columns
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0
bpf/vmtest-bpf-next-VM_Test-16 fail Logs for test_progs on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-36 success Logs for test_verifier on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-21 success Logs for test_progs_no_alu32 on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-26 success Logs for test_progs_no_alu32_parallel on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-31 success Logs for test_progs_parallel on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-11 success Logs for test_maps on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-1 success Logs for ShellCheck
bpf/vmtest-bpf-next-VM_Test-5 success Logs for build for x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-6 success Logs for build for x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-7 success Logs for llvm-toolchain
bpf/vmtest-bpf-next-VM_Test-8 success Logs for set-matrix

Commit Message

Andrii Nakryiko March 8, 2023, 6:41 p.m. UTC
Implement the first open-coded iterator type over a range of integers.

It's public API consists of:
  - bpf_iter_num_new() constructor, which accepts [start, end) range
    (that is, start is inclusive, end is exclusive).
  - bpf_iter_num_next() which will keep returning read-only pointer to int
    until the range is exhausted, at which point NULL will be returned.
    If bpf_iter_num_next() is kept calling after this, NULL will be
    persistently returned.
  - bpf_iter_num_destroy() destructor, which needs to be called at some
    point to clean up iterator state. BPF verifier enforces that iterator
    destructor is called at some point before BPF program exits.

Note that `start = end = X` is a valid combination to setup an empty
iterator. bpf_iter_num_new() will return 0 (success) for any such
combination.

If bpf_iter_num_new() detects invalid combination of input arguments, it
returns error, resets iterator state to, effectively, empty iterator, so
any subsequent call to bpf_iter_num_next() will keep returning NULL.

BPF verifier has no knowledge that returned integers are in the
[start, end) value range, as both `start` and `end` are not statically
known and enforced: they are runtime values.

While the implementation is pretty trivial, some care needs to be taken
to avoid overflows and underflows. Subsequent selftests will validate
correctness of [start, end) semantics, especially around extremes
(INT_MIN and INT_MAX).

Similarly to bpf_loop(), we enforce that no more than BPF_MAX_LOOPS can
be specified.

bpf_iter_num_{new,next,destroy}() is a logical evolution from bounded
BPF loops and bpf_loop() helper and is the basis for implementing
ergonomic BPF loops with no statically known or verified bounds.
Subsequent patches implement bpf_for() macro, demonstrating how this can
be wrapped into something that works and feels like a normal for() loop
in C language.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
---
 include/linux/bpf.h            |  8 +++-
 include/uapi/linux/bpf.h       |  8 ++++
 kernel/bpf/bpf_iter.c          | 70 ++++++++++++++++++++++++++++++++++
 kernel/bpf/helpers.c           |  3 ++
 tools/include/uapi/linux/bpf.h |  8 ++++
 5 files changed, 95 insertions(+), 2 deletions(-)
diff mbox series

Patch

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 6792a7940e1e..e64ff1e89fb2 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1617,8 +1617,12 @@  struct bpf_array {
 #define BPF_COMPLEXITY_LIMIT_INSNS      1000000 /* yes. 1M insns */
 #define MAX_TAIL_CALL_CNT 33
 
-/* Maximum number of loops for bpf_loop */
-#define BPF_MAX_LOOPS	BIT(23)
+/* Maximum number of loops for bpf_loop and bpf_iter_num.
+ * It's enum to expose it (and thus make it discoverable) through BTF.
+ */
+enum {
+	BPF_MAX_LOOPS = 8 * 1024 * 1024,
+};
 
 #define BPF_F_ACCESS_MASK	(BPF_F_RDONLY |		\
 				 BPF_F_RDONLY_PROG |	\
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 976b194eb775..4abddb668a10 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -7112,4 +7112,12 @@  enum {
 	BPF_F_TIMER_ABS = (1ULL << 0),
 };
 
+/* BPF numbers iterator state */
+struct bpf_iter_num {
+	/* opaque iterator state; having __u64 here allows to preserve correct
+	 * alignment requirements in vmlinux.h, generated from BTF
+	 */
+	__u64 __opaque[1];
+} __attribute__((aligned(8)));
+
 #endif /* _UAPI__LINUX_BPF_H__ */
diff --git a/kernel/bpf/bpf_iter.c b/kernel/bpf/bpf_iter.c
index 5dc307bdeaeb..96856f130cbf 100644
--- a/kernel/bpf/bpf_iter.c
+++ b/kernel/bpf/bpf_iter.c
@@ -776,3 +776,73 @@  const struct bpf_func_proto bpf_loop_proto = {
 	.arg3_type	= ARG_PTR_TO_STACK_OR_NULL,
 	.arg4_type	= ARG_ANYTHING,
 };
+
+struct bpf_iter_num_kern {
+	int cur; /* current value, inclusive */
+	int end; /* final value, exclusive */
+} __aligned(8);
+
+__diag_push();
+__diag_ignore_all("-Wmissing-prototypes",
+		  "Global functions as their definitions will be in vmlinux BTF");
+
+__bpf_kfunc int bpf_iter_num_new(struct bpf_iter_num *it, int start, int end)
+{
+	struct bpf_iter_num_kern *s = (void *)it;
+
+	BUILD_BUG_ON(sizeof(struct bpf_iter_num_kern) != sizeof(struct bpf_iter_num));
+	BUILD_BUG_ON(__alignof__(struct bpf_iter_num_kern) != __alignof__(struct bpf_iter_num));
+
+	BTF_TYPE_EMIT(struct btf_iter_num);
+
+	/* start == end is legit, it's an empty range and we'll just get NULL
+	 * on first (and any subsequent) bpf_iter_num_next() call
+	 */
+	if (start > end) {
+		s->cur = s->end = 0;
+		return -EINVAL;
+	}
+
+	/* avoid overflows, e.g., if start == INT_MIN and end == INT_MAX */
+	if ((s64)end - (s64)start > BPF_MAX_LOOPS) {
+		s->cur = s->end = 0;
+		return -E2BIG;
+	}
+
+	/* user will call bpf_iter_num_next() first,
+	 * which will set s->cur to exactly start value;
+	 * underflow shouldn't matter
+	 */
+	s->cur = start - 1;
+	s->end = end;
+
+	return 0;
+}
+
+__bpf_kfunc int *bpf_iter_num_next(struct bpf_iter_num* it)
+{
+	struct bpf_iter_num_kern *s = (void *)it;
+
+	/* check failed initialization or if we are done (same behavior);
+	 * need to be careful about overflow, so convert to s64 for checks,
+	 * e.g., if s->cur == s->end == INT_MAX, we can't just do
+	 * s->cur + 1 >= s->end
+	 */
+	if ((s64)(s->cur + 1) >= s->end) {
+		s->cur = s->end = 0;
+		return NULL;
+	}
+
+	s->cur++;
+
+	return &s->cur;
+}
+
+__bpf_kfunc void bpf_iter_num_destroy(struct bpf_iter_num *it)
+{
+	struct bpf_iter_num_kern *s = (void *)it;
+
+	s->cur = s->end = 0;
+}
+
+__diag_pop();
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 637ac4e92e75..f9b7eeedce08 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -2411,6 +2411,9 @@  BTF_ID_FLAGS(func, bpf_rcu_read_lock)
 BTF_ID_FLAGS(func, bpf_rcu_read_unlock)
 BTF_ID_FLAGS(func, bpf_dynptr_slice, KF_RET_NULL)
 BTF_ID_FLAGS(func, bpf_dynptr_slice_rdwr, KF_RET_NULL)
+BTF_ID_FLAGS(func, bpf_iter_num_new, KF_ITER_NEW)
+BTF_ID_FLAGS(func, bpf_iter_num_next, KF_ITER_NEXT | KF_RET_NULL)
+BTF_ID_FLAGS(func, bpf_iter_num_destroy, KF_ITER_DESTROY)
 BTF_SET8_END(common_btf_ids)
 
 static const struct btf_kfunc_id_set common_kfunc_set = {
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 976b194eb775..4abddb668a10 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -7112,4 +7112,12 @@  enum {
 	BPF_F_TIMER_ABS = (1ULL << 0),
 };
 
+/* BPF numbers iterator state */
+struct bpf_iter_num {
+	/* opaque iterator state; having __u64 here allows to preserve correct
+	 * alignment requirements in vmlinux.h, generated from BTF
+	 */
+	__u64 __opaque[1];
+} __attribute__((aligned(8)));
+
 #endif /* _UAPI__LINUX_BPF_H__ */