diff mbox series

[v2,bpf-next,1/2] bpf: Introduce bpf_can_loop() kfunc

Message ID 20240227055235.23463-2-alexei.starovoitov@gmail.com (mailing list archive)
State Changes Requested
Delegated to: BPF
Headers show
Series bpf: Introduce bpf_can_loop | expand

Checks

Context Check Description
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for bpf-next
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit fail Errors and warnings before: 1157 this patch: 1158
netdev/build_tools success Errors and warnings before: 0 this patch: 0
netdev/cc_maintainers warning 13 maintainers not CCed: jolsa@kernel.org john.fastabend@gmail.com nathan@kernel.org yonghong.song@linux.dev song@kernel.org martin.lau@linux.dev sdf@google.com morbo@google.com justinstitt@google.com kpsingh@kernel.org ndesaulniers@google.com llvm@lists.linux.dev haoluo@google.com
netdev/build_clang success Errors and warnings before: 1066 this patch: 1066
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn fail Errors and warnings before: 1174 this patch: 1175
netdev/checkpatch fail CHECK: multiple assignments should be avoided ERROR: do not use assignment in if condition WARNING: line length of 101 exceeds 80 columns WARNING: line length of 81 exceeds 80 columns WARNING: line length of 84 exceeds 80 columns WARNING: line length of 86 exceeds 80 columns WARNING: line length of 91 exceeds 80 columns WARNING: line length of 92 exceeds 80 columns WARNING: line length of 98 exceeds 80 columns
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0
bpf/vmtest-bpf-next-PR fail PR summary
bpf/vmtest-bpf-next-VM_Test-0 success Logs for Lint
bpf/vmtest-bpf-next-VM_Test-1 success Logs for ShellCheck
bpf/vmtest-bpf-next-VM_Test-2 success Logs for Unittests
bpf/vmtest-bpf-next-VM_Test-3 success Logs for Validate matrix.py
bpf/vmtest-bpf-next-VM_Test-5 success Logs for aarch64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-4 success Logs for aarch64-gcc / build / build for aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-12 success Logs for s390x-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-17 success Logs for s390x-gcc / veristat
bpf/vmtest-bpf-next-VM_Test-18 success Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-10 success Logs for aarch64-gcc / veristat
bpf/vmtest-bpf-next-VM_Test-11 success Logs for s390x-gcc / build / build for s390x with gcc
bpf/vmtest-bpf-next-VM_Test-20 success Logs for x86_64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-9 success Logs for aarch64-gcc / test (test_verifier, false, 360) / test_verifier on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-28 success Logs for x86_64-llvm-17 / build / build for x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-16 success Logs for s390x-gcc / test (test_verifier, false, 360) / test_verifier on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-19 success Logs for x86_64-gcc / build / build for x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-34 success Logs for x86_64-llvm-17 / veristat
bpf/vmtest-bpf-next-VM_Test-35 success Logs for x86_64-llvm-18 / build / build for x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-42 success Logs for x86_64-llvm-18 / veristat
bpf/vmtest-bpf-next-VM_Test-6 success Logs for aarch64-gcc / test (test_maps, false, 360) / test_maps on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-8 fail Logs for aarch64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-7 fail Logs for aarch64-gcc / test (test_progs, false, 360) / test_progs on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-13 success Logs for s390x-gcc / test (test_maps, false, 360) / test_maps on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-15 fail Logs for s390x-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-14 fail Logs for s390x-gcc / test (test_progs, false, 360) / test_progs on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-21 success Logs for x86_64-gcc / test (test_maps, false, 360) / test_maps on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-29 success Logs for x86_64-llvm-17 / build-release / build for x86_64 with llvm-17 and -O2 optimization
bpf/vmtest-bpf-next-VM_Test-22 fail Logs for x86_64-gcc / test (test_progs, false, 360) / test_progs on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-24 success Logs for x86_64-gcc / test (test_progs_no_alu32_parallel, true, 30) / test_progs_no_alu32_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-31 fail Logs for x86_64-llvm-17 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-27 fail Logs for x86_64-gcc / veristat / veristat on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-23 fail Logs for x86_64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-32 fail Logs for x86_64-llvm-17 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-30 success Logs for x86_64-llvm-17 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-26 success Logs for x86_64-gcc / test (test_verifier, false, 360) / test_verifier on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-38 fail Logs for x86_64-llvm-18 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-39 fail Logs for x86_64-llvm-18 / test (test_progs_cpuv4, false, 360) / test_progs_cpuv4 on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-25 success Logs for x86_64-gcc / test (test_progs_parallel, true, 30) / test_progs_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-41 success Logs for x86_64-llvm-18 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-40 fail Logs for x86_64-llvm-18 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-36 success Logs for x86_64-llvm-18 / build-release / build for x86_64 with llvm-18 and -O2 optimization
bpf/vmtest-bpf-next-VM_Test-33 success Logs for x86_64-llvm-17 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-37 success Logs for x86_64-llvm-18 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-18

Commit Message

Alexei Starovoitov Feb. 27, 2024, 5:52 a.m. UTC
From: Alexei Starovoitov <ast@kernel.org>

While working on bpf_arena the following monster macro had to be
used to iterate a link list:

  for (struct bpf_iter_num ___it __attribute__((aligned(8),
                                                cleanup(bpf_iter_num_destroy))),
              * ___tmp = (bpf_iter_num_new(&___it, 0, (1000000)),
                          pos = list_entry_safe((head)->first,
                                                typeof(*(pos)), member),
                          (void)bpf_iter_num_destroy,
			  (void *)0);
       bpf_iter_num_next(&___it) && pos &&
          ({ ___tmp = (void *)pos->member.next; 1; });
       pos = list_entry_safe((void __arena *)___tmp, typeof(*(pos)), member))

It's similar to bpf_for(), bpf_repeat() macros.
Unfortunately every "for" in normal C code needs an equivalent monster macro.

Instead, let's introduce bpf_can_loop() kfunc that acts on a hidden
bpf_iter_num, so that bpf_iter_num_new(), bpf_iter_num_destroy()
don't need to be called explicitly. It simplifies the macro to:

  for (void * ___tmp = (pos = list_entry_safe((head)->first,
                                              typeof(*(pos)), member),
                        (void *)0);
       bpf_can_loop(0) && pos &&
          ({ ___tmp = (void *)pos->member.next; 1; });
       pos = list_entry_safe((void __arena *)___tmp, typeof(*(pos)), member))

and can be used in any normal "for" or "while" loop, like

  for (i = 0; i < cnt && bpf_can_loop(0); i++) {

The verifier recognizes that bpf_can_loop() is used in the program,
reserves additional 8 bytes of stack, zero initializes them in subprog
prologue, and passes that address to bpf_can_loop() kfunc that simply
increments the counter until it reaches BPF_MAX_LOOPS.

In the future bpf_can_loop() can be inlined to improve performance.
New instruction with the same semantics can be added,
and LLVM will finally be able to emit __builtin_memcpy, __builtin_strcmp.

bpf_can_loop() is not a substitute for bpf_for() when it's used to
iterate normal arrays or map_values.
bpf_can_loop() works well only with arena pointers that don't need
to be bounds-checked on every iteration.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf_verifier.h |   3 +
 kernel/bpf/helpers.c         |  12 +++
 kernel/bpf/verifier.c        | 163 +++++++++++++++++++++++++++++------
 3 files changed, 150 insertions(+), 28 deletions(-)
diff mbox series

Patch

diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 84365e6dd85d..69bc7f2d20f1 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -449,6 +449,7 @@  struct bpf_verifier_state {
 	u32 jmp_history_cnt;
 	u32 dfs_depth;
 	u32 callback_unroll_depth;
+	struct bpf_reg_state can_loop_reg;
 };
 
 #define bpf_get_spilled_reg(slot, frame, mask)				\
@@ -549,6 +550,7 @@  struct bpf_insn_aux_data {
 	bool zext_dst; /* this insn zero extends dst reg */
 	bool storage_get_func_atomic; /* bpf_*_storage_get() with atomic memory alloc */
 	bool is_iter_next; /* bpf_iter_<type>_next() kfunc call */
+	bool is_can_loop; /* bpf_can_loop() kfunc call */
 	bool call_with_percpu_alloc_ptr; /* {this,per}_cpu_ptr() with prog percpu alloc */
 	u8 alu_state; /* used in combination with alu_limit */
 
@@ -619,6 +621,7 @@  struct bpf_subprog_info {
 	u32 start; /* insn idx of function entry point */
 	u32 linfo_idx; /* The idx to the main_prog->aux->linfo */
 	u16 stack_depth; /* max. stack depth used by this function */
+	u16 stack_extra;
 	bool has_tail_call: 1;
 	bool tail_call_reachable: 1;
 	bool has_ld_abs: 1;
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 93edf730d288..d1d93ad8a010 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -2542,6 +2542,17 @@  __bpf_kfunc void bpf_throw(u64 cookie)
 	WARN(1, "A call to BPF exception callback should never return\n");
 }
 
+__bpf_kfunc long bpf_can_loop(void *ptr__ign)
+{
+	u64 *pcnt = ptr__ign, cnt = *pcnt;
+
+	if (cnt < BPF_MAX_LOOPS) {
+		*pcnt = cnt + 1;
+		return cnt + 1;
+	}
+	return 0;
+}
+
 __bpf_kfunc_end_defs();
 
 BTF_KFUNCS_START(generic_btf_ids)
@@ -2618,6 +2629,7 @@  BTF_ID_FLAGS(func, bpf_dynptr_is_null)
 BTF_ID_FLAGS(func, bpf_dynptr_is_rdonly)
 BTF_ID_FLAGS(func, bpf_dynptr_size)
 BTF_ID_FLAGS(func, bpf_dynptr_clone)
+BTF_ID_FLAGS(func, bpf_can_loop)
 BTF_KFUNCS_END(common_btf_ids)
 
 static const struct btf_kfunc_id_set common_kfunc_set = {
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 011d54a1dc53..a94ed21b7770 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -502,6 +502,7 @@  static bool is_dynptr_ref_function(enum bpf_func_id func_id)
 
 static bool is_sync_callback_calling_kfunc(u32 btf_id);
 static bool is_bpf_throw_kfunc(struct bpf_insn *insn);
+static bool is_can_loop_kfunc(struct bpf_kfunc_call_arg_meta *meta);
 
 static bool is_sync_callback_calling_function(enum bpf_func_id func_id)
 {
@@ -1436,6 +1437,7 @@  static int copy_verifier_state(struct bpf_verifier_state *dst_state,
 		if (err)
 			return err;
 	}
+	dst_state->can_loop_reg = src->can_loop_reg;
 	return 0;
 }
 
@@ -7868,6 +7870,34 @@  static int widen_imprecise_scalars(struct bpf_verifier_env *env,
 	return 0;
 }
 
+static bool is_can_loop_insn(struct bpf_verifier_env *env, int insn_idx)
+{
+	return env->insn_aux_data[insn_idx].is_can_loop;
+}
+
+static struct bpf_reg_state *get_iter_reg(struct bpf_verifier_env *env,
+					  struct bpf_verifier_state *st, int insn_idx)
+{
+	struct bpf_func_state *cur_frame;
+	struct bpf_reg_state *iter_reg;
+	int spi;
+
+	if (is_can_loop_insn(env, insn_idx))
+		return &st->can_loop_reg;
+
+	cur_frame = st->frame[st->curframe];
+	/* btf_check_iter_kfuncs() enforces that
+	 * iter state pointer is always the first arg
+	 */
+	iter_reg = &cur_frame->regs[BPF_REG_1];
+	/* current state is valid due to states_equal(),
+	 * so we can assume valid iter and reg state,
+	 * no need for extra (re-)validations
+	 */
+	spi = __get_spi(iter_reg->off + iter_reg->var_off.value);
+	return &func(env, iter_reg)->stack[spi].spilled_ptr;
+}
+
 /* process_iter_next_call() is called when verifier gets to iterator's next
  * "method" (e.g., bpf_iter_num_next() for numbers iterator) call. We'll refer
  * to it as just "iter_next()" in comments below.
@@ -7946,18 +7976,15 @@  static int widen_imprecise_scalars(struct bpf_verifier_env *env,
  *     }
  *     bpf_iter_num_destroy(&it);
  */
-static int process_iter_next_call(struct bpf_verifier_env *env, int insn_idx,
-				  struct bpf_kfunc_call_arg_meta *meta)
+static int process_iter_next_call(struct bpf_verifier_env *env, int insn_idx)
 {
 	struct bpf_verifier_state *cur_st = env->cur_state, *queued_st, *prev_st;
 	struct bpf_func_state *cur_fr = cur_st->frame[cur_st->curframe], *queued_fr;
 	struct bpf_reg_state *cur_iter, *queued_iter;
-	int iter_frameno = meta->iter.frameno;
-	int iter_spi = meta->iter.spi;
 
 	BTF_TYPE_EMIT(struct bpf_iter);
 
-	cur_iter = &env->cur_state->frame[iter_frameno]->stack[iter_spi].spilled_ptr;
+	cur_iter = get_iter_reg(env, cur_st, insn_idx);
 
 	if (cur_iter->iter.state != BPF_ITER_STATE_ACTIVE &&
 	    cur_iter->iter.state != BPF_ITER_STATE_DRAINED) {
@@ -7985,7 +8012,7 @@  static int process_iter_next_call(struct bpf_verifier_env *env, int insn_idx,
 		if (!queued_st)
 			return -ENOMEM;
 
-		queued_iter = &queued_st->frame[iter_frameno]->stack[iter_spi].spilled_ptr;
+		queued_iter = get_iter_reg(env, queued_st, insn_idx);
 		queued_iter->iter.state = BPF_ITER_STATE_ACTIVE;
 		queued_iter->iter.depth++;
 		if (prev_st)
@@ -10925,6 +10952,7 @@  enum special_kfunc_type {
 	KF_bpf_percpu_obj_new_impl,
 	KF_bpf_percpu_obj_drop_impl,
 	KF_bpf_throw,
+	KF_bpf_can_loop,
 	KF_bpf_iter_css_task_new,
 };
 
@@ -10949,6 +10977,7 @@  BTF_ID(func, bpf_dynptr_clone)
 BTF_ID(func, bpf_percpu_obj_new_impl)
 BTF_ID(func, bpf_percpu_obj_drop_impl)
 BTF_ID(func, bpf_throw)
+BTF_ID(func, bpf_can_loop)
 #ifdef CONFIG_CGROUPS
 BTF_ID(func, bpf_iter_css_task_new)
 #endif
@@ -10977,6 +11006,7 @@  BTF_ID(func, bpf_dynptr_clone)
 BTF_ID(func, bpf_percpu_obj_new_impl)
 BTF_ID(func, bpf_percpu_obj_drop_impl)
 BTF_ID(func, bpf_throw)
+BTF_ID(func, bpf_can_loop)
 #ifdef CONFIG_CGROUPS
 BTF_ID(func, bpf_iter_css_task_new)
 #else
@@ -11003,6 +11033,11 @@  static bool is_kfunc_bpf_rcu_read_unlock(struct bpf_kfunc_call_arg_meta *meta)
 	return meta->func_id == special_kfunc_list[KF_bpf_rcu_read_unlock];
 }
 
+static bool is_can_loop_kfunc(struct bpf_kfunc_call_arg_meta *meta)
+{
+	return meta->func_id == special_kfunc_list[KF_bpf_can_loop];
+}
+
 static enum kfunc_ptr_arg_type
 get_kfunc_ptr_arg_type(struct bpf_verifier_env *env,
 		       struct bpf_kfunc_call_arg_meta *meta,
@@ -12049,6 +12084,7 @@  static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
 	insn_aux = &env->insn_aux_data[insn_idx];
 
 	insn_aux->is_iter_next = is_iter_next_kfunc(&meta);
+	insn_aux->is_can_loop = is_can_loop_kfunc(&meta);
 
 	if (is_kfunc_destructive(&meta) && !capable(CAP_SYS_BOOT)) {
 		verbose(env, "destructive kfunc calls require CAP_SYS_BOOT capability\n");
@@ -12424,8 +12460,8 @@  static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
 			mark_btf_func_reg_size(env, regno, t->size);
 	}
 
-	if (is_iter_next_kfunc(&meta)) {
-		err = process_iter_next_call(env, insn_idx, &meta);
+	if (is_iter_next_kfunc(&meta) || is_can_loop_kfunc(&meta)) {
+		err = process_iter_next_call(env, insn_idx);
 		if (err)
 			return err;
 	}
@@ -15609,7 +15645,7 @@  static int visit_insn(int t, struct bpf_verifier_env *env)
 			struct bpf_kfunc_call_arg_meta meta;
 
 			ret = fetch_kfunc_meta(env, insn, &meta, NULL);
-			if (ret == 0 && is_iter_next_kfunc(&meta)) {
+			if (ret == 0 && (is_iter_next_kfunc(&meta) || is_can_loop_kfunc(&meta))) {
 				mark_prune_point(env, t);
 				/* Checking and saving state checkpoints at iter_next() call
 				 * is crucial for fast convergence of open-coded iterator loop
@@ -16759,6 +16795,9 @@  static bool states_equal(struct bpf_verifier_env *env,
 	if (old->active_rcu_lock != cur->active_rcu_lock)
 		return false;
 
+	if (old->can_loop_reg.iter.state != cur->can_loop_reg.iter.state)
+		return false;
+
 	/* for states to be equal callsites have to be the same
 	 * and all frame states need to be equivalent
 	 */
@@ -16997,6 +17036,9 @@  static bool iter_active_depths_differ(struct bpf_verifier_state *old, struct bpf
 	struct bpf_func_state *state;
 	int i, fr;
 
+	if (old->can_loop_reg.iter.depth != cur->can_loop_reg.iter.depth)
+		return true;
+
 	for (fr = old->curframe; fr >= 0; fr--) {
 		state = old->frame[fr];
 		for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
@@ -17101,23 +17143,11 @@  static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
 			 * comparison would discard current state with r7=-32
 			 * => unsafe memory access at 11 would not be caught.
 			 */
-			if (is_iter_next_insn(env, insn_idx)) {
+			if (is_iter_next_insn(env, insn_idx) || is_can_loop_insn(env, insn_idx)) {
 				if (states_equal(env, &sl->state, cur, true)) {
-					struct bpf_func_state *cur_frame;
-					struct bpf_reg_state *iter_state, *iter_reg;
-					int spi;
+					struct bpf_reg_state *iter_state;
 
-					cur_frame = cur->frame[cur->curframe];
-					/* btf_check_iter_kfuncs() enforces that
-					 * iter state pointer is always the first arg
-					 */
-					iter_reg = &cur_frame->regs[BPF_REG_1];
-					/* current state is valid due to states_equal(),
-					 * so we can assume valid iter and reg state,
-					 * no need for extra (re-)validations
-					 */
-					spi = __get_spi(iter_reg->off + iter_reg->var_off.value);
-					iter_state = &func(env, iter_reg)->stack[spi].spilled_ptr;
+					iter_state = get_iter_reg(env, cur, insn_idx);
 					if (iter_state->iter.state == BPF_ITER_STATE_ACTIVE) {
 						update_loop_entry(cur, &sl->state);
 						goto hit;
@@ -19258,7 +19288,8 @@  static void __fixup_collection_insert_kfunc(struct bpf_insn_aux_data *insn_aux,
 }
 
 static int fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
-			    struct bpf_insn *insn_buf, int insn_idx, int *cnt)
+			    struct bpf_insn *insn_buf, int insn_idx, int stack_base,
+			    int *cnt, int *stack_extra)
 {
 	const struct bpf_kfunc_desc *desc;
 
@@ -19349,6 +19380,12 @@  static int fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
 		   desc->func_id == special_kfunc_list[KF_bpf_rdonly_cast]) {
 		insn_buf[0] = BPF_MOV64_REG(BPF_REG_0, BPF_REG_1);
 		*cnt = 1;
+	} else if (desc->func_id == special_kfunc_list[KF_bpf_can_loop]) {
+		insn_buf[0] = BPF_MOV64_REG(BPF_REG_1, BPF_REG_FP);
+		insn_buf[1] = BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, stack_base - 8);
+		insn_buf[2] = *insn;
+		*cnt = 3;
+		*stack_extra = 8;
 	}
 	return 0;
 }
@@ -19396,7 +19433,10 @@  static int do_misc_fixups(struct bpf_verifier_env *env)
 	struct bpf_insn insn_buf[16];
 	struct bpf_prog *new_prog;
 	struct bpf_map *map_ptr;
-	int i, ret, cnt, delta = 0;
+	int i, ret, cnt, delta = 0, cur_subprog = 0;
+	struct bpf_subprog_info *subprogs = env->subprog_info;
+	u16 stack_depth = subprogs[cur_subprog].stack_depth;
+	u16 stack_depth_extra = 0;
 
 	if (env->seen_exception && !env->exception_callback_subprog) {
 		struct bpf_insn patch[] = {
@@ -19416,7 +19456,16 @@  static int do_misc_fixups(struct bpf_verifier_env *env)
 		mark_subprog_exc_cb(env, env->exception_callback_subprog);
 	}
 
-	for (i = 0; i < insn_cnt; i++, insn++) {
+	for (i = 0; i < insn_cnt;
+	     ({
+		if (subprogs[cur_subprog + 1].start == i + delta + 1) {
+			subprogs[cur_subprog].stack_depth += stack_depth_extra;
+			subprogs[cur_subprog].stack_extra = stack_depth_extra;
+			cur_subprog++;
+			stack_depth = subprogs[cur_subprog].stack_depth;
+			stack_depth_extra = 0;
+		}
+	      }), i++, insn++) {
 		/* Make divide-by-zero exceptions impossible. */
 		if (insn->code == (BPF_ALU64 | BPF_MOD | BPF_X) ||
 		    insn->code == (BPF_ALU64 | BPF_DIV | BPF_X) ||
@@ -19536,11 +19585,18 @@  static int do_misc_fixups(struct bpf_verifier_env *env)
 		if (insn->src_reg == BPF_PSEUDO_CALL)
 			continue;
 		if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) {
-			ret = fixup_kfunc_call(env, insn, insn_buf, i + delta, &cnt);
+			int stack_extra = 0;
+
+			ret = fixup_kfunc_call(env, insn, insn_buf, i + delta,
+					       -stack_depth, &cnt, &stack_extra);
 			if (ret)
 				return ret;
 			if (cnt == 0)
 				continue;
+			if (stack_extra & (BPF_REG_SIZE - 1)) {
+				verbose(env, "verifier bug: kfunc stack extra must be power of 8\n");
+				return -EFAULT;
+			}
 
 			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
 			if (!new_prog)
@@ -19549,6 +19605,17 @@  static int do_misc_fixups(struct bpf_verifier_env *env)
 			delta	 += cnt - 1;
 			env->prog = prog = new_prog;
 			insn	  = new_prog->insnsi + i + delta;
+			if (stack_extra) {
+				/* multiple calls to bpf_can_loop() from one subprog
+				 * share the same stack slot.
+				 * Only bpf_can_loop kfunc can request extra stack for now.
+				 */
+				if (stack_depth_extra && stack_depth_extra != stack_extra) {
+					verbose(env, "verifier bug: stack_extra\n");
+					return -EFAULT;
+				}
+				stack_depth_extra = stack_extra;
+			}
 			continue;
 		}
 
@@ -19942,6 +20009,30 @@  static int do_misc_fixups(struct bpf_verifier_env *env)
 		insn->imm = fn->func - __bpf_call_base;
 	}
 
+	env->prog->aux->stack_depth = subprogs[0].stack_depth;
+	for (i = 0; i < env->subprog_cnt; i++) {
+		int subprog_start = subprogs[i].start, j;
+		int stack_slots = subprogs[i].stack_extra / 8;
+
+		if (stack_slots >= ARRAY_SIZE(insn_buf)) {
+			verbose(env, "verifier bug: stack_extra is too large\n");
+			return -EFAULT;
+		}
+
+		/* Add insns to subprog prologue to zero init extra stack */
+		for (j = 0; j < stack_slots; j++)
+			insn_buf[j] = BPF_ST_MEM(BPF_DW, BPF_REG_FP,
+						 -subprogs[i].stack_depth + j * 8, 0);
+		if (j) {
+			insn_buf[j] = env->prog->insnsi[subprog_start];
+
+			new_prog = bpf_patch_insn_data(env, subprog_start, insn_buf, j + 1);
+			if (!new_prog)
+				return -ENOMEM;
+			env->prog = prog = new_prog;
+		}
+	}
+
 	/* Since poke tab is now finalized, publish aux to tracker. */
 	for (i = 0; i < prog->aux->size_poke_tab; i++) {
 		map_ptr = prog->aux->poke_tab[i].tail_call.map;
@@ -20130,6 +20221,21 @@  static void free_states(struct bpf_verifier_env *env)
 	}
 }
 
+static void init_can_loop_reg(struct bpf_reg_state *st)
+{
+	__mark_reg_known_zero(st);
+	st->type = PTR_TO_STACK;
+	st->live |= REG_LIVE_WRITTEN;
+	st->ref_obj_id = 0;
+	st->iter.btf = NULL;
+	st->iter.btf_id = 0;
+	/* Init register state to sane values.
+	 * Only iter.state and iter.depth are used during verification.
+	 */
+	st->iter.state = BPF_ITER_STATE_ACTIVE;
+	st->iter.depth = 0;
+}
+
 static int do_check_common(struct bpf_verifier_env *env, int subprog)
 {
 	bool pop_log = !(env->log.level & BPF_LOG_LEVEL2);
@@ -20147,6 +20253,7 @@  static int do_check_common(struct bpf_verifier_env *env, int subprog)
 	state->curframe = 0;
 	state->speculative = false;
 	state->branches = 1;
+	init_can_loop_reg(&state->can_loop_reg);
 	state->frame[0] = kzalloc(sizeof(struct bpf_func_state), GFP_KERNEL);
 	if (!state->frame[0]) {
 		kfree(state);