From patchwork Thu Sep 26 23:45:11 2024 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Yonghong Song X-Patchwork-Id: 13813773 X-Patchwork-Delegate: bpf@iogearbox.net Received: from 69-171-232-181.mail-mxout.facebook.com (69-171-232-181.mail-mxout.facebook.com [69.171.232.181]) (using TLSv1.2 with cipher ECDHE-RSA-AES128-GCM-SHA256 (128/128 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 72812171675 for ; Thu, 26 Sep 2024 23:45:18 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=69.171.232.181 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1727394322; cv=none; b=GUfBKZLoiVXpcmyORmdwJG5+WeWG4xqpR5fzbL3D8CFaa0VuGCNQagdhqKQzwvnKtCFU7XG7zbeIDgrJYNF/hLvIK/qz0gKvuYh8CFKW44s18uHgFayX/PLkqiRRMtrg3Lt5s2K0sK7QG/aAQf9U/AhIsqbJr0dfb1BgwflGwEU= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1727394322; c=relaxed/simple; bh=B4OA7Tt6sUJGmx3vHA67FELppapwDvFmsbojVWyCS28=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=tTScAxNjKX+1Jh1n9JKw5h/2oXk+/bNxMySUNJJCNkBDklAwTeWIDICkn4ULoqzFKC9Et3TNu2ozvV5TaN5elF6E7J0T4BqWesMdPRmY7g2pJeYb/2JIr2Bfjz6Nb7mLkHbkxnRC02TlVXfQGokORIqECvMJH/vytZteGkWZYfY= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=fail (p=none dis=none) header.from=linux.dev; spf=fail smtp.mailfrom=linux.dev; arc=none smtp.client-ip=69.171.232.181 Authentication-Results: smtp.subspace.kernel.org; dmarc=fail (p=none dis=none) header.from=linux.dev Authentication-Results: smtp.subspace.kernel.org; spf=fail smtp.mailfrom=linux.dev Received: by devbig309.ftw3.facebook.com (Postfix, from userid 128203) id 25F49967C738; Thu, 26 Sep 2024 16:45:11 -0700 (PDT) From: Yonghong Song To: bpf@vger.kernel.org Cc: Alexei Starovoitov , Andrii Nakryiko , Daniel Borkmann , kernel-team@fb.com, Martin KaFai Lau Subject: [PATCH bpf-next v3 1/5] bpf: Allow each subprog having stack size of 512 bytes Date: Thu, 26 Sep 2024 16:45:11 -0700 Message-ID: <20240926234511.1769453-1-yonghong.song@linux.dev> X-Mailer: git-send-email 2.43.5 In-Reply-To: <20240926234506.1769256-1-yonghong.song@linux.dev> References: <20240926234506.1769256-1-yonghong.song@linux.dev> Precedence: bulk X-Mailing-List: bpf@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 X-Patchwork-Delegate: bpf@iogearbox.net With private stack support, each subprog can have stack with up to 512 bytes. The limit of 512 bytes per subprog is kept to avoid increase verifier complexity as greater than 512 bytes will cause big verifier change and increase memory consumption and verification time. Signed-off-by: Yonghong Song --- include/linux/bpf.h | 1 + include/linux/filter.h | 1 + kernel/bpf/core.c | 5 +++++ kernel/bpf/verifier.c | 49 +++++++++++++++++++++++++++++++++++++----- 4 files changed, 51 insertions(+), 5 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 19d8ca8ac960..62909fbe9e48 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1483,6 +1483,7 @@ struct bpf_prog_aux { bool xdp_has_frags; bool exception_cb; bool exception_boundary; + bool pstack_enabled; struct bpf_arena *arena; /* BTF_KIND_FUNC_PROTO for valid attach_btf_id */ const struct btf_type *attach_func_proto; diff --git a/include/linux/filter.h b/include/linux/filter.h index 7d7578a8eac1..3a21947f2fd4 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -1119,6 +1119,7 @@ bool bpf_jit_supports_exceptions(void); bool bpf_jit_supports_ptr_xchg(void); bool bpf_jit_supports_arena(void); bool bpf_jit_supports_insn(struct bpf_insn *insn, bool in_arena); +bool bpf_jit_supports_private_stack(void); u64 bpf_arch_uaddress_limit(void); void arch_bpf_stack_walk(bool (*consume_fn)(void *cookie, u64 ip, u64 sp, u64 bp), void *cookie); bool bpf_helper_changes_pkt_data(void *func); diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 4e07cc057d6f..0727fff6de0e 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -3044,6 +3044,11 @@ bool __weak bpf_jit_supports_exceptions(void) return false; } +bool __weak bpf_jit_supports_private_stack(void) +{ + return false; +} + void __weak arch_bpf_stack_walk(bool (*consume_fn)(void *cookie, u64 ip, u64 sp, u64 bp), void *cookie) { } diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 9a7ed527e47e..97700e32e085 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -5999,7 +5999,8 @@ static int round_up_stack_depth(struct bpf_verifier_env *env, int stack_depth) * Since recursion is prevented by check_cfg() this algorithm * only needs a local stack of MAX_CALL_FRAMES to remember callsites */ -static int check_max_stack_depth_subprog(struct bpf_verifier_env *env, int idx) +static int check_max_stack_depth_subprog(struct bpf_verifier_env *env, int idx, + bool pstack_enabled) { struct bpf_subprog_info *subprog = env->subprog_info; struct bpf_insn *insn = env->prog->insnsi; @@ -6007,8 +6008,9 @@ static int check_max_stack_depth_subprog(struct bpf_verifier_env *env, int idx) bool tail_call_reachable = false; int ret_insn[MAX_CALL_FRAMES]; int ret_prog[MAX_CALL_FRAMES]; - int j; + int j, subprog_stack_depth, stack_limit; + stack_limit = pstack_enabled ? U16_MAX : MAX_BPF_STACK; i = subprog[idx].start; process_func: /* protect against potential stack overflow that might happen when @@ -6036,12 +6038,18 @@ static int check_max_stack_depth_subprog(struct bpf_verifier_env *env, int idx) depth); return -EACCES; } - depth += round_up_stack_depth(env, subprog[idx].stack_depth); - if (depth > MAX_BPF_STACK) { + subprog_stack_depth = round_up_stack_depth(env, subprog[idx].stack_depth); + depth += subprog_stack_depth; + if (depth > stack_limit) { verbose(env, "combined stack size of %d calls is %d. Too large\n", frame + 1, depth); return -EACCES; } + if (pstack_enabled && subprog_stack_depth > MAX_BPF_STACK) { + verbose(env, "stack size of subprog %d is %d. Too large\n", + idx, subprog_stack_depth); + return -EACCES; + } continue_func: subprog_end = subprog[idx + 1].start; for (; i < subprog_end; i++) { @@ -6137,14 +6145,45 @@ static int check_max_stack_depth_subprog(struct bpf_verifier_env *env, int idx) goto continue_func; } +static bool bpf_enable_private_stack(struct bpf_prog *prog) +{ + if (!bpf_jit_supports_private_stack()) + return false; + + switch (prog->aux->prog->type) { + case BPF_PROG_TYPE_KPROBE: + case BPF_PROG_TYPE_TRACEPOINT: + case BPF_PROG_TYPE_PERF_EVENT: + case BPF_PROG_TYPE_RAW_TRACEPOINT: + return true; + case BPF_PROG_TYPE_TRACING: + if (prog->expected_attach_type != BPF_TRACE_ITER) + return true; + fallthrough; + default: + return false; + } +} + static int check_max_stack_depth(struct bpf_verifier_env *env) { + bool has_tail_call = false, pstack_enabled = false; struct bpf_subprog_info *si = env->subprog_info; int ret; + for (int i = 0; i < env->subprog_cnt; i++) { + if (si[i].has_tail_call) { + has_tail_call = true; + break; + } + } + + if (!has_tail_call && bpf_enable_private_stack(env->prog)) + env->prog->aux->pstack_enabled = pstack_enabled = true; + for (int i = 0; i < env->subprog_cnt; i++) { if (!i || si[i].is_async_cb) { - ret = check_max_stack_depth_subprog(env, i); + ret = check_max_stack_depth_subprog(env, i, pstack_enabled); if (ret < 0) return ret; } From patchwork Thu Sep 26 23:45:16 2024 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Yonghong Song X-Patchwork-Id: 13813776 X-Patchwork-Delegate: bpf@iogearbox.net Received: from 66-220-155-178.mail-mxout.facebook.com (66-220-155-178.mail-mxout.facebook.com [66.220.155.178]) (using TLSv1.2 with cipher ECDHE-RSA-AES128-GCM-SHA256 (128/128 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 82B11188CB7 for ; Thu, 26 Sep 2024 23:45:28 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=66.220.155.178 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1727394330; cv=none; b=SBpr7GQO7IaTade/n7Ulx4zWzNk9eAT0DeKfT0r1/q1MxX8gjJn5ZbCn80B3GTzURMEKgRAehb6xC8pAvkyD+TzKoUFnx1MXZWpGvuoZKndfvWy7mlfqhwNK7DzFaweV3c/hlDtY96McXOWZUGMgdlLwH8qEBNimIyDTVa+o7ZY= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1727394330; c=relaxed/simple; bh=+AgiCmwseQJNQiVXVJD/fpibq+iyNAvfGv9ebRpy/Fk=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=Ik4le82lFhivBRyrNFmsdXAfI3wfJnnNkH1vavZ3x5GjEK222tRdFu2p6KGiQJFIVkyy1aaeQVR4KDIxIIKTWS5FxITEiLCIYj+8MJ2BATkV3Gi8+Nl5GB/keDUKh48x3aenHuaRTqoFDuhC7Wp9RT5AYzUok18pji8cPfN4l08= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=fail (p=none dis=none) header.from=linux.dev; spf=fail smtp.mailfrom=linux.dev; arc=none smtp.client-ip=66.220.155.178 Authentication-Results: smtp.subspace.kernel.org; dmarc=fail (p=none dis=none) header.from=linux.dev Authentication-Results: smtp.subspace.kernel.org; spf=fail smtp.mailfrom=linux.dev Received: by devbig309.ftw3.facebook.com (Postfix, from userid 128203) id 40BE1967C768; Thu, 26 Sep 2024 16:45:16 -0700 (PDT) From: Yonghong Song To: bpf@vger.kernel.org Cc: Alexei Starovoitov , Andrii Nakryiko , Daniel Borkmann , kernel-team@fb.com, Martin KaFai Lau Subject: [PATCH bpf-next v3 2/5] bpf: Collect stack depth information Date: Thu, 26 Sep 2024 16:45:16 -0700 Message-ID: <20240926234516.1770154-1-yonghong.song@linux.dev> X-Mailer: git-send-email 2.43.5 In-Reply-To: <20240926234506.1769256-1-yonghong.song@linux.dev> References: <20240926234506.1769256-1-yonghong.song@linux.dev> Precedence: bulk X-Mailing-List: bpf@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 X-Patchwork-Delegate: bpf@iogearbox.net Private stack memory allocation is based on call subtrees. For example, main_prog // stack size 50 subprog1 // stack size 50 subprog2 // stack size 50 subprog3 // stack size 50 Overall allocation size should be 150 bytes (stacks from main_prog, subprog1 and subprog2). To simplify jit, the root of subtrees is either the main prog or any callback func. For example, main_prog subprog1 // callback subprog10 ... subprog10 subprog11 In this case, two subtrees exist. One root is main_prog and the other root is subprog10. The private stack is used only if - the subtree stack size is greater than 128 bytes and smaller than or equal to U16_MAX, and - the prog type is kprobe, tracepoint, perf_event, raw_tracepoint and tracing, and - jit supports private stack, and - no tail call in the main prog and all subprogs The restriction of no tail call is due to the following two reasons: - to avoid potential large memory consumption. Currently maximum tail call count is MAX_TAIL_CALL_CNT=33. Considering private stack memory allocation is per-cpu based. It will be a very large memory consumption to support current MAX_TAIL_CALL_CNT. - if the tailcall in the callback function, it is not easy to pass the tail call cnt to the callback function and the tail call cnt is needed to find proper offset for private stack. So to avoid complexity, private stack does not support tail call for now. Signed-off-by: Yonghong Song --- include/linux/bpf.h | 3 +- include/linux/bpf_verifier.h | 3 ++ kernel/bpf/verifier.c | 81 ++++++++++++++++++++++++++++++++++++ 3 files changed, 86 insertions(+), 1 deletion(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 62909fbe9e48..156b9516d9f6 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1566,7 +1566,8 @@ struct bpf_prog { call_get_stack:1, /* Do we call bpf_get_stack() or bpf_get_stackid() */ call_get_func_ip:1, /* Do we call get_func_ip() */ tstamp_type_access:1, /* Accessed __sk_buff->tstamp_type */ - sleepable:1; /* BPF program is sleepable */ + sleepable:1, /* BPF program is sleepable */ + pstack_eligible:1; /* Candidate for private stacks */ enum bpf_prog_type type; /* Type of BPF program */ enum bpf_attach_type expected_attach_type; /* For some prog types */ u32 len; /* Number of filter blocks */ diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 4513372c5bc8..63df10f4129e 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -659,6 +659,8 @@ struct bpf_subprog_info { * are used for bpf_fastcall spills and fills. */ s16 fastcall_stack_off; + u16 subtree_stack_depth; + u16 subtree_top_idx; bool has_tail_call: 1; bool tail_call_reachable: 1; bool has_ld_abs: 1; @@ -668,6 +670,7 @@ struct bpf_subprog_info { bool args_cached: 1; /* true if bpf_fastcall stack region is used by functions that can't be inlined */ bool keep_fastcall_stack: 1; + bool pstack_eligible:1; u8 arg_cnt; struct bpf_subprog_arg_info args[MAX_BPF_FUNC_REG_ARGS]; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 97700e32e085..69e17cb22037 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -194,6 +194,8 @@ struct bpf_verifier_stack_elem { #define BPF_GLOBAL_PERCPU_MA_MAX_SIZE 512 +#define BPF_PSTACK_MIN_SUBTREE_SIZE 128 + static int acquire_reference_state(struct bpf_verifier_env *env, int insn_idx); static int release_reference(struct bpf_verifier_env *env, int ref_obj_id); static void invalidate_non_owning_refs(struct bpf_verifier_env *env); @@ -6192,6 +6194,82 @@ static int check_max_stack_depth(struct bpf_verifier_env *env) return 0; } +static int calc_private_stack_alloc_subprog(struct bpf_verifier_env *env, int idx) +{ + struct bpf_subprog_info *subprog = env->subprog_info; + struct bpf_insn *insn = env->prog->insnsi; + int depth = 0, frame = 0, i, subprog_end; + int ret_insn[MAX_CALL_FRAMES]; + int ret_prog[MAX_CALL_FRAMES]; + int ps_eligible = 0; + int orig_idx = idx; + + subprog[idx].subtree_top_idx = idx; + i = subprog[idx].start; + +process_func: + depth += round_up_stack_depth(env, subprog[idx].stack_depth); + if (depth > U16_MAX) + return -EACCES; + + if (!ps_eligible && depth >= BPF_PSTACK_MIN_SUBTREE_SIZE) { + subprog[orig_idx].pstack_eligible = true; + ps_eligible = true; + } + subprog[orig_idx].subtree_stack_depth = + max_t(u16, subprog[orig_idx].subtree_stack_depth, depth); + +continue_func: + subprog_end = subprog[idx + 1].start; + for (; i < subprog_end; i++) { + int next_insn, sidx; + + if (!bpf_pseudo_call(insn + i) && !bpf_pseudo_func(insn + i)) + continue; + /* remember insn and function to return to */ + ret_insn[frame] = i + 1; + ret_prog[frame] = idx; + + /* find the callee */ + next_insn = i + insn[i].imm + 1; + sidx = find_subprog(env, next_insn); + if (subprog[sidx].is_cb) { + if (!bpf_pseudo_call(insn + i)) + continue; + } + i = next_insn; + idx = sidx; + subprog[idx].subtree_top_idx = orig_idx; + + frame++; + goto process_func; + } + if (frame == 0) + return ps_eligible; + depth -= round_up_stack_depth(env, subprog[idx].stack_depth); + frame--; + i = ret_insn[frame]; + idx = ret_prog[frame]; + goto continue_func; +} + +static int calc_private_stack_alloc_size(struct bpf_verifier_env *env) +{ + struct bpf_subprog_info *si = env->subprog_info; + int ret; + + for (int i = 0; i < env->subprog_cnt; i++) { + if (!i || si[i].is_cb) { + ret = calc_private_stack_alloc_subprog(env, i); + if (ret < 0) + return ret; + if (ret) + env->prog->pstack_eligible = true; + } + } + return 0; +} + #ifndef CONFIG_BPF_JIT_ALWAYS_ON static int get_callee_stack_depth(struct bpf_verifier_env *env, const struct bpf_insn *insn, int idx) @@ -22502,6 +22580,9 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u3 : false; } + if (ret == 0 && env->prog->aux->pstack_enabled) + ret = calc_private_stack_alloc_size(env); + if (ret == 0) ret = fixup_call_args(env); From patchwork Thu Sep 26 23:45:21 2024 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Yonghong Song X-Patchwork-Id: 13813775 X-Patchwork-Delegate: bpf@iogearbox.net Received: from 69-171-232-181.mail-mxout.facebook.com (69-171-232-181.mail-mxout.facebook.com [69.171.232.181]) (using TLSv1.2 with cipher ECDHE-RSA-AES128-GCM-SHA256 (128/128 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 6E6BA1B0135 for ; Thu, 26 Sep 2024 23:45:22 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=69.171.232.181 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1727394324; cv=none; b=S1hIUKkSojEo1rtM5qT7/zGHT/gUXJFLPWRlGBo2FiwIua3wG1KfF8/HFK8JhMgVHc6xy9yWFMEK/cS3Qep7V8Cegxy3jprvcMFUlgAY1LfWNzYic51/T+Ri3+s1oEyDjd9WiOvUv1lviM9eeFQJKEQNcv+lGxGNd4k9szV2WMM= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1727394324; c=relaxed/simple; bh=Q2O3OnZiDsXUABk0J6cYFqz3n613nwFrevN9DE0Zin8=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=AlBy2GEeoHJ8Iy8pzX9z2x0DXBkX3Tw2noHhfaJK1pMZLxQ7xmxQEtEQ1twSfNK2pe4cwJjq54Kcu4LK3nOufd6gLhiwZ/B+GqHerKsBTud8N9UIKygZkYKk8CWaXnFzpr2EglGYwmCWIv6R3TutgxgWoVSpfpHkG3fgEQfP1wc= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=fail (p=none dis=none) header.from=linux.dev; spf=fail smtp.mailfrom=linux.dev; arc=none smtp.client-ip=69.171.232.181 Authentication-Results: smtp.subspace.kernel.org; dmarc=fail (p=none dis=none) header.from=linux.dev Authentication-Results: smtp.subspace.kernel.org; spf=fail smtp.mailfrom=linux.dev Received: by devbig309.ftw3.facebook.com (Postfix, from userid 128203) id 5BF40967C786; Thu, 26 Sep 2024 16:45:21 -0700 (PDT) From: Yonghong Song To: bpf@vger.kernel.org Cc: Alexei Starovoitov , Andrii Nakryiko , Daniel Borkmann , kernel-team@fb.com, Martin KaFai Lau Subject: [PATCH bpf-next v3 3/5] bpf: Mark each subprog with proper pstack states Date: Thu, 26 Sep 2024 16:45:21 -0700 Message-ID: <20240926234521.1770481-1-yonghong.song@linux.dev> X-Mailer: git-send-email 2.43.5 In-Reply-To: <20240926234506.1769256-1-yonghong.song@linux.dev> References: <20240926234506.1769256-1-yonghong.song@linux.dev> Precedence: bulk X-Mailing-List: bpf@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 X-Patchwork-Delegate: bpf@iogearbox.net Three private stack states are used to direct jit action: PSTACK_TREE_NO: do not use private stack PSTACK_TREE_INTERNAL: adjust frame pointer address (similar to normal stack) PSTACK_TREE_ROOT: set the frame pointer Note that for subtree root, even if the root bpf_prog stack size is 0, PSTACK_TREE_INTERNAL is still used. This is for bpf exception handling. More details can be found in subsequent jit support and selftest patches. Signed-off-by: Yonghong Song --- include/linux/bpf.h | 9 +++++++++ kernel/bpf/core.c | 19 +++++++++++++++++++ kernel/bpf/verifier.c | 30 ++++++++++++++++++++++++++++++ 3 files changed, 58 insertions(+) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 156b9516d9f6..8f02d11bd408 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1550,6 +1550,12 @@ struct bpf_prog_aux { }; }; +enum bpf_pstack_state { + PSTACK_TREE_NO, + PSTACK_TREE_INTERNAL, + PSTACK_TREE_ROOT, +}; + struct bpf_prog { u16 pages; /* Number of allocated pages */ u16 jited:1, /* Is our filter JIT'ed? */ @@ -1570,15 +1576,18 @@ struct bpf_prog { pstack_eligible:1; /* Candidate for private stacks */ enum bpf_prog_type type; /* Type of BPF program */ enum bpf_attach_type expected_attach_type; /* For some prog types */ + enum bpf_pstack_state pstack:2; /* Private stack state */ u32 len; /* Number of filter blocks */ u32 jited_len; /* Size of jited insns in bytes */ u8 tag[BPF_TAG_SIZE]; + u16 subtree_stack_depth; /* Subtree stack depth if PSTACK_TREE_ROOT prog, 0 otherwise */ struct bpf_prog_stats __percpu *stats; int __percpu *active; unsigned int (*bpf_func)(const void *ctx, const struct bpf_insn *insn); struct bpf_prog_aux *aux; /* Auxiliary fields */ struct sock_fprog_kern *orig_prog; /* Original BPF program */ + void __percpu *private_stack_ptr; /* Instructions for interpreter */ union { DECLARE_FLEX_ARRAY(struct sock_filter, insns); diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 0727fff6de0e..d6eb052f6631 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -1239,6 +1239,7 @@ void __weak bpf_jit_free(struct bpf_prog *fp) struct bpf_binary_header *hdr = bpf_jit_binary_hdr(fp); bpf_jit_binary_free(hdr); + free_percpu(fp->private_stack_ptr); WARN_ON_ONCE(!bpf_prog_kallsyms_verify_off(fp)); } @@ -2420,6 +2421,24 @@ struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err) if (*err) return fp; + if (fp->pstack_eligible) { + if (!fp->aux->stack_depth) { + fp->pstack = PSTACK_TREE_NO; + } else { + void __percpu *private_stack_ptr; + + fp->pstack = PSTACK_TREE_ROOT; + private_stack_ptr = + __alloc_percpu_gfp(fp->aux->stack_depth, 8, GFP_KERNEL); + if (!private_stack_ptr) { + *err = -ENOMEM; + return fp; + } + fp->subtree_stack_depth = fp->aux->stack_depth; + fp->private_stack_ptr = private_stack_ptr; + } + } + fp = bpf_int_jit_compile(fp); bpf_prog_jit_attempt_done(fp); if (!fp->jited && jit_needed) { diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 69e17cb22037..9d093e2013ca 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -20060,6 +20060,7 @@ static int jit_subprogs(struct bpf_verifier_env *env) { struct bpf_prog *prog = env->prog, **func, *tmp; int i, j, subprog_start, subprog_end = 0, len, subprog; + int subtree_top_idx, subtree_stack_depth; struct bpf_map *map_ptr; struct bpf_insn *insn; void *old_bpf_func; @@ -20138,6 +20139,35 @@ static int jit_subprogs(struct bpf_verifier_env *env) func[i]->is_func = 1; func[i]->sleepable = prog->sleepable; func[i]->aux->func_idx = i; + + subtree_top_idx = env->subprog_info[i].subtree_top_idx; + if (env->subprog_info[subtree_top_idx].pstack_eligible) { + if (subtree_top_idx == i) + func[i]->subtree_stack_depth = + env->subprog_info[i].subtree_stack_depth; + + subtree_stack_depth = func[i]->subtree_stack_depth; + if (subtree_top_idx != i) { + if (env->subprog_info[subtree_top_idx].subtree_stack_depth) + func[i]->pstack = PSTACK_TREE_INTERNAL; + else + func[i]->pstack = PSTACK_TREE_NO; + } else if (!subtree_stack_depth) { + func[i]->pstack = PSTACK_TREE_INTERNAL; + } else { + void __percpu *private_stack_ptr; + + func[i]->pstack = PSTACK_TREE_ROOT; + private_stack_ptr = + __alloc_percpu_gfp(subtree_stack_depth, 8, GFP_KERNEL); + if (!private_stack_ptr) { + err = -ENOMEM; + goto out_free; + } + func[i]->private_stack_ptr = private_stack_ptr; + } + } + /* Below members will be freed only at prog->aux */ func[i]->aux->btf = prog->aux->btf; func[i]->aux->func_info = prog->aux->func_info; From patchwork Thu Sep 26 23:45:26 2024 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Yonghong Song X-Patchwork-Id: 13813777 X-Patchwork-Delegate: bpf@iogearbox.net Received: from 69-171-232-181.mail-mxout.facebook.com (69-171-232-181.mail-mxout.facebook.com [69.171.232.181]) (using TLSv1.2 with cipher ECDHE-RSA-AES128-GCM-SHA256 (128/128 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 3F46D13C683 for ; Thu, 26 Sep 2024 23:45:37 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=69.171.232.181 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1727394340; cv=none; b=hF0NB5iRuv1iblxhG7/76uo4kyNxZOjcgvx+V7g3RS5FHY7tzcSkl9suLIlKlb2xryvXr8m5919OpUuIfZ205/KKlGkGmfQ6JMDyAcLYG3jFEHfNWHs2QSFpBwqaU2Huylsfq/jxvr0J3HDrr21K9Z+JDtUAYNFiaCmeeJZh9II= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1727394340; c=relaxed/simple; bh=FEbdUWsTuM01VEzEcCRNhgjZWj8vPDLfbKej36mK/KU=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=Df2Hwe65jaE87V7UoxDW9R4BL0nvfRq33AQrNlodZuGZ7vwDqWbmog9MuINtwuhEw6mrl2H3bhnzXmi3MeG1QktL8cV8MnqCj9qYRCtZgPHSpb5094ZnNsaZjBrvEW0Y1I9xwp0H8+c91WuAYJoTCsaGdAg0z2J5sIj8Ebz5bLo= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=fail (p=none dis=none) header.from=linux.dev; spf=fail smtp.mailfrom=linux.dev; arc=none smtp.client-ip=69.171.232.181 Authentication-Results: smtp.subspace.kernel.org; dmarc=fail (p=none dis=none) header.from=linux.dev Authentication-Results: smtp.subspace.kernel.org; spf=fail smtp.mailfrom=linux.dev Received: by devbig309.ftw3.facebook.com (Postfix, from userid 128203) id 76C4A967C7A4; Thu, 26 Sep 2024 16:45:26 -0700 (PDT) From: Yonghong Song To: bpf@vger.kernel.org Cc: Alexei Starovoitov , Andrii Nakryiko , Daniel Borkmann , kernel-team@fb.com, Martin KaFai Lau Subject: [PATCH bpf-next v3 4/5] bpf, x86: Add jit support for private stack Date: Thu, 26 Sep 2024 16:45:26 -0700 Message-ID: <20240926234526.1770736-1-yonghong.song@linux.dev> X-Mailer: git-send-email 2.43.5 In-Reply-To: <20240926234506.1769256-1-yonghong.song@linux.dev> References: <20240926234506.1769256-1-yonghong.song@linux.dev> Precedence: bulk X-Mailing-List: bpf@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 X-Patchwork-Delegate: bpf@iogearbox.net Add jit support for private stack. For a particular subtree, e.g., subtree_root <== stack depth 120 subprog1 <== stack depth 80 subprog2 <== stack depth 40 subprog3 <== stack depth 160 Let us say that private_stack_ptr is the memory address allocated for private stack. The frame pointer for each above is calculated like below: subtree_root <== subtree_root_fp = private_stack_ptr + 120 subprog1 <== subtree_subprog1_fp = subtree_root_fp + 80 subprog2 <== subtree_subprog2_fp = subtree_subprog1_fp + 40 subprog3 <== subtree_subprog1_fp = subtree_root_fp + 160 For any function call to helper/kfunc, push/pop prog frame pointer is needed in order to preserve frame pointer value. To deal with exception handling, push/pop frame pointer is also used surrounding call to subsequent subprog. For example, subtree_root subprog1 ... insn: call bpf_throw ... After jit, we will have subtree_root insn: push r9 subprog1 ... insn: push r9 insn: call bpf_throw insn: pop r9 ... insn: pop r9 exception_handler pop r9 ... where r9 represents the fp for each subprog. Signed-off-by: Yonghong Song --- arch/x86/net/bpf_jit_comp.c | 87 ++++++++++++++++++++++++++++++++++--- 1 file changed, 81 insertions(+), 6 deletions(-) diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 06b080b61aa5..c264822c926b 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -325,6 +325,22 @@ struct jit_context { /* Number of bytes that will be skipped on tailcall */ #define X86_TAIL_CALL_OFFSET (12 + ENDBR_INSN_SIZE) +static void push_r9(u8 **pprog) +{ + u8 *prog = *pprog; + + EMIT2(0x41, 0x51); /* push r9 */ + *pprog = prog; +} + +static void pop_r9(u8 **pprog) +{ + u8 *prog = *pprog; + + EMIT2(0x41, 0x59); /* pop r9 */ + *pprog = prog; +} + static void push_r12(u8 **pprog) { u8 *prog = *pprog; @@ -491,7 +507,7 @@ static void emit_prologue_tail_call(u8 **pprog, bool is_subprog) */ static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf, bool tail_call_reachable, bool is_subprog, - bool is_exception_cb) + bool is_exception_cb, enum bpf_pstack_state pstack) { u8 *prog = *pprog; @@ -518,6 +534,8 @@ static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf, * first restore those callee-saved regs from stack, before * reusing the stack frame. */ + if (pstack) + pop_r9(&prog); pop_callee_regs(&prog, all_callee_regs_used); pop_r12(&prog); /* Reset the stack frame. */ @@ -1404,6 +1422,22 @@ static void emit_shiftx(u8 **pprog, u32 dst_reg, u8 src_reg, bool is64, u8 op) *pprog = prog; } +static void emit_private_frame_ptr(u8 **pprog, void *private_frame_ptr) +{ + u8 *prog = *pprog; + + /* movabs r9, private_frame_ptr */ + emit_mov_imm64(&prog, X86_REG_R9, (long) private_frame_ptr >> 32, + (u32) (long) private_frame_ptr); + + /* add , gs:[] */ + EMIT2(0x65, 0x4c); + EMIT3(0x03, 0x0c, 0x25); + EMIT((u32)(unsigned long)&this_cpu_off, 4); + + *pprog = prog; +} + #define INSN_SZ_DIFF (((addrs[i] - addrs[i - 1]) - (prog - temp))) #define __LOAD_TCC_PTR(off) \ @@ -1421,20 +1455,31 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image int insn_cnt = bpf_prog->len; bool seen_exit = false; u8 temp[BPF_MAX_INSN_SIZE + BPF_INSN_SAFETY]; + void __percpu *private_frame_ptr = NULL; u64 arena_vm_start, user_vm_start; + u32 orig_stack_depth, stack_depth; int i, excnt = 0; int ilen, proglen = 0; u8 *prog = temp; int err; + stack_depth = bpf_prog->aux->stack_depth; + orig_stack_depth = round_up(stack_depth, 8); + if (bpf_prog->pstack) { + stack_depth = 0; + if (bpf_prog->pstack == PSTACK_TREE_ROOT) + private_frame_ptr = bpf_prog->private_stack_ptr + orig_stack_depth; + } + arena_vm_start = bpf_arena_get_kern_vm_start(bpf_prog->aux->arena); user_vm_start = bpf_arena_get_user_vm_start(bpf_prog->aux->arena); detect_reg_usage(insn, insn_cnt, callee_regs_used); - emit_prologue(&prog, bpf_prog->aux->stack_depth, + emit_prologue(&prog, stack_depth, bpf_prog_was_classic(bpf_prog), tail_call_reachable, - bpf_is_subprog(bpf_prog), bpf_prog->aux->exception_cb); + bpf_is_subprog(bpf_prog), bpf_prog->aux->exception_cb, + bpf_prog->pstack); /* Exception callback will clobber callee regs for its own use, and * restore the original callee regs from main prog's stack frame. */ @@ -1454,6 +1499,17 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image emit_mov_imm64(&prog, X86_REG_R12, arena_vm_start >> 32, (u32) arena_vm_start); + if (bpf_prog->pstack == PSTACK_TREE_ROOT) { + emit_private_frame_ptr(&prog, private_frame_ptr); + } else if (bpf_prog->pstack == PSTACK_TREE_INTERNAL && orig_stack_depth) { + /* r9 += orig_stack_depth */ + maybe_emit_1mod(&prog, X86_REG_R9, true); + if (is_imm8(orig_stack_depth)) + EMIT3(0x83, add_1reg(0xC0, X86_REG_R9), orig_stack_depth); + else + EMIT2_off32(0x81, add_1reg(0xC0, X86_REG_R9), orig_stack_depth); + } + ilen = prog - temp; if (rw_image) memcpy(rw_image + proglen, temp, ilen); @@ -1473,6 +1529,14 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image u8 *func; int nops; + if (bpf_prog->pstack) { + if (src_reg == BPF_REG_FP) + src_reg = X86_REG_R9; + + if (dst_reg == BPF_REG_FP) + dst_reg = X86_REG_R9; + } + switch (insn->code) { /* ALU */ case BPF_ALU | BPF_ADD | BPF_X: @@ -2128,14 +2192,20 @@ st: if (is_imm8(insn->off)) func = (u8 *) __bpf_call_base + imm32; if (tail_call_reachable) { - LOAD_TAIL_CALL_CNT_PTR(bpf_prog->aux->stack_depth); + LOAD_TAIL_CALL_CNT_PTR(stack_depth); ip += 7; } if (!imm32) return -EINVAL; + if (bpf_prog->pstack) { + push_r9(&prog); + ip += 2; + } ip += x86_call_depth_emit_accounting(&prog, func, ip); if (emit_call(&prog, func, ip)) return -EINVAL; + if (bpf_prog->pstack) + pop_r9(&prog); break; } @@ -2145,13 +2215,13 @@ st: if (is_imm8(insn->off)) &bpf_prog->aux->poke_tab[imm32 - 1], &prog, image + addrs[i - 1], callee_regs_used, - bpf_prog->aux->stack_depth, + stack_depth, ctx); else emit_bpf_tail_call_indirect(bpf_prog, &prog, callee_regs_used, - bpf_prog->aux->stack_depth, + stack_depth, image + addrs[i - 1], ctx); break; @@ -3559,6 +3629,11 @@ bool bpf_jit_supports_exceptions(void) return IS_ENABLED(CONFIG_UNWINDER_ORC); } +bool bpf_jit_supports_private_stack(void) +{ + return true; +} + void arch_bpf_stack_walk(bool (*consume_fn)(void *cookie, u64 ip, u64 sp, u64 bp), void *cookie) { #if defined(CONFIG_UNWINDER_ORC) From patchwork Thu Sep 26 23:45:31 2024 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Yonghong Song X-Patchwork-Id: 13813778 X-Patchwork-Delegate: bpf@iogearbox.net Received: from 69-171-232-181.mail-mxout.facebook.com (69-171-232-181.mail-mxout.facebook.com [69.171.232.181]) (using TLSv1.2 with cipher ECDHE-RSA-AES128-GCM-SHA256 (128/128 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 8EC2613C683 for ; Thu, 26 Sep 2024 23:45:44 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=69.171.232.181 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1727394346; cv=none; b=cQcIiPuBQsjKMCLcxRQOuvFmPNb9I+xAqmZQGM94NokqQbWpekRI9lkcuhwKps5/XYNm3r4xSJJAKQGw2hzrS9zBA+cwYadfw/ZLwl6DpOTbAU8gHpabsZ6jvSf42pOzXG9i7a2v3ecfMuFWUTTsve3nD++rce7oYBWq+kaD7B4= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1727394346; c=relaxed/simple; bh=4e98wkph68Fu05rku6EeYCqb4fc24grg7vd3hPlNzHA=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=rvovM2IsEicSFoGancYBNvYk//VVUuFjyQ0EQ9XP8u7QkhIA/TJedCRFuuvoexdckTOr5MzdcNuHFWRJH6RX2bhNZCNY1eDvy1Cp96b8176PWX3VmQkvQrkJTHeh7I9m5USQuaYtkHAUQPT1VIWAHZ3YiI3LzwIbu3Wg0v3gUR0= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=fail (p=none dis=none) header.from=linux.dev; spf=fail smtp.mailfrom=linux.dev; arc=none smtp.client-ip=69.171.232.181 Authentication-Results: smtp.subspace.kernel.org; dmarc=fail (p=none dis=none) header.from=linux.dev Authentication-Results: smtp.subspace.kernel.org; spf=fail smtp.mailfrom=linux.dev Received: by devbig309.ftw3.facebook.com (Postfix, from userid 128203) id 91391967C7C2; Thu, 26 Sep 2024 16:45:31 -0700 (PDT) From: Yonghong Song To: bpf@vger.kernel.org Cc: Alexei Starovoitov , Andrii Nakryiko , Daniel Borkmann , kernel-team@fb.com, Martin KaFai Lau Subject: [PATCH bpf-next v3 5/5] selftests/bpf: Add private stack tests Date: Thu, 26 Sep 2024 16:45:31 -0700 Message-ID: <20240926234531.1771024-1-yonghong.song@linux.dev> X-Mailer: git-send-email 2.43.5 In-Reply-To: <20240926234506.1769256-1-yonghong.song@linux.dev> References: <20240926234506.1769256-1-yonghong.song@linux.dev> Precedence: bulk X-Mailing-List: bpf@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 X-Patchwork-Delegate: bpf@iogearbox.net Some private stack tests are added including: - prog with stack size greater than BPF_PSTACK_MIN_SUBTREE_SIZE. - prog with stack size less than BPF_PSTACK_MIN_SUBTREE_SIZE. - prog with one subprog having MAX_BPF_STACK stack size and another subprog having non-zero stack size. - prog with callback function. - prog with exception in main prog or subprog. Signed-off-by: Yonghong Song --- .../selftests/bpf/prog_tests/verifier.c | 2 + .../bpf/progs/verifier_private_stack.c | 215 ++++++++++++++++++ 2 files changed, 217 insertions(+) create mode 100644 tools/testing/selftests/bpf/progs/verifier_private_stack.c diff --git a/tools/testing/selftests/bpf/prog_tests/verifier.c b/tools/testing/selftests/bpf/prog_tests/verifier.c index e26b5150fc43..635ff3509403 100644 --- a/tools/testing/selftests/bpf/prog_tests/verifier.c +++ b/tools/testing/selftests/bpf/prog_tests/verifier.c @@ -59,6 +59,7 @@ #include "verifier_or_jmp32_k.skel.h" #include "verifier_precision.skel.h" #include "verifier_prevent_map_lookup.skel.h" +#include "verifier_private_stack.skel.h" #include "verifier_raw_stack.skel.h" #include "verifier_raw_tp_writable.skel.h" #include "verifier_reg_equal.skel.h" @@ -185,6 +186,7 @@ void test_verifier_bpf_fastcall(void) { RUN(verifier_bpf_fastcall); } void test_verifier_or_jmp32_k(void) { RUN(verifier_or_jmp32_k); } void test_verifier_precision(void) { RUN(verifier_precision); } void test_verifier_prevent_map_lookup(void) { RUN(verifier_prevent_map_lookup); } +void test_verifier_private_stack(void) { RUN(verifier_private_stack); } void test_verifier_raw_stack(void) { RUN(verifier_raw_stack); } void test_verifier_raw_tp_writable(void) { RUN(verifier_raw_tp_writable); } void test_verifier_reg_equal(void) { RUN(verifier_reg_equal); } diff --git a/tools/testing/selftests/bpf/progs/verifier_private_stack.c b/tools/testing/selftests/bpf/progs/verifier_private_stack.c new file mode 100644 index 000000000000..badd1fd1e3dd --- /dev/null +++ b/tools/testing/selftests/bpf/progs/verifier_private_stack.c @@ -0,0 +1,215 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include +#include "bpf_misc.h" + +/* From include/linux/filter.h */ +#define MAX_BPF_STACK 512 + +#if defined(__TARGET_ARCH_x86) + +SEC("kprobe") +__description("Private stack, single prog") +__success +__arch_x86_64 +__jited(" movabsq $0x{{.*}}, %r9") +__jited(" addq %gs:0x{{.*}}, %r9") +__jited(" movl $0x2a, %edi") +__jited(" movq %rdi, -0x100(%r9)") +__naked void private_stack_single_prog(void) +{ + asm volatile ( + "r1 = 42;" + "*(u64 *)(r10 - 256) = r1;" + "r0 = 0;" + "exit;" + : + : + : __clobber_all); +} + +__used +__naked static void cumulative_stack_depth_subprog(void) +{ + asm volatile ( + "r1 = 41;" + "*(u64 *)(r10 - 32) = r1;" + "call %[bpf_get_smp_processor_id];" + "exit;" + :: __imm(bpf_get_smp_processor_id) + : __clobber_all); +} + +SEC("kprobe") +__description("Private stack, subtree > MAX_BPF_STACK") +__success +__arch_x86_64 +/* private stack fp for the main prog */ +__jited(" movabsq $0x{{.*}}, %r9") +__jited(" addq %gs:0x{{.*}}, %r9") +__jited(" movl $0x2a, %edi") +__jited(" movq %rdi, -0x200(%r9)") +__jited(" pushq %r9") +__jited(" callq 0x{{.*}}") +__jited(" popq %r9") +__jited(" xorl %eax, %eax") +__naked void private_stack_nested_1(void) +{ + asm volatile ( + "r1 = 42;" + "*(u64 *)(r10 - %[max_bpf_stack]) = r1;" + "call cumulative_stack_depth_subprog;" + "r0 = 0;" + "exit;" + : + : __imm_const(max_bpf_stack, MAX_BPF_STACK) + : __clobber_all); +} + +SEC("kprobe") +__description("Private stack, subtree > MAX_BPF_STACK") +__success +__arch_x86_64 +/* private stack fp for the subprog */ +__jited(" addq $0x20, %r9") +__naked void private_stack_nested_2(void) +{ + asm volatile ( + "r1 = 42;" + "*(u64 *)(r10 - %[max_bpf_stack]) = r1;" + "call cumulative_stack_depth_subprog;" + "r0 = 0;" + "exit;" + : + : __imm_const(max_bpf_stack, MAX_BPF_STACK) + : __clobber_all); +} + +SEC("raw_tp") +__description("No private stack, nested") +__success +__arch_x86_64 +__jited(" subq $0x8, %rsp") +__naked void no_private_stack_nested(void) +{ + asm volatile ( + "r1 = 42;" + "*(u64 *)(r10 - 8) = r1;" + "call cumulative_stack_depth_subprog;" + "r0 = 0;" + "exit;" + : + : + : __clobber_all); +} + +__naked __noinline __used +static unsigned long loop_callback() +{ + asm volatile ( + "call %[bpf_get_prandom_u32];" + "r1 = 42;" + "*(u64 *)(r10 - 512) = r1;" + "call cumulative_stack_depth_subprog;" + "r0 = 0;" + "exit;" + : + : __imm(bpf_get_prandom_u32) + : __clobber_common); +} + +SEC("raw_tp") +__description("Private stack, callback") +__success +__arch_x86_64 +/* for func loop_callback */ +__jited("func #1") +__jited(" endbr64") +__jited(" nopl (%rax,%rax)") +__jited(" nopl (%rax)") +__jited(" pushq %rbp") +__jited(" movq %rsp, %rbp") +__jited(" endbr64") +__jited(" movabsq $0x{{.*}}, %r9") +__jited(" addq %gs:0x{{.*}}, %r9") +__jited(" pushq %r9") +__jited(" callq") +__jited(" popq %r9") +__jited(" movl $0x2a, %edi") +__jited(" movq %rdi, -0x200(%r9)") +__jited(" pushq %r9") +__jited(" callq") +__jited(" popq %r9") +__naked void private_stack_callback(void) +{ + asm volatile ( + "r1 = 1;" + "r2 = %[loop_callback];" + "r3 = 0;" + "r4 = 0;" + "call %[bpf_loop];" + "r0 = 0;" + "exit;" + : + : __imm_ptr(loop_callback), + __imm(bpf_loop) + : __clobber_common); +} + +SEC("fentry/bpf_fentry_test9") +__description("Private stack, exception in main prog") +__success __retval(0) +__arch_x86_64 +__jited(" pushq %r9") +__jited(" callq") +__jited(" popq %r9") +int private_stack_exception_main_prog(void) +{ + asm volatile ( + "r1 = 42;" + "*(u64 *)(r10 - 512) = r1;" + ::: __clobber_common); + + bpf_throw(0); + return 0; +} + +__used static int subprog_exception(void) +{ + bpf_throw(0); + return 0; +} + +SEC("fentry/bpf_fentry_test9") +__description("Private stack, exception in subprog") +__success __retval(0) +__arch_x86_64 +__jited(" movq %rdi, -0x200(%r9)") +__jited(" pushq %r9") +__jited(" callq") +__jited(" popq %r9") +int private_stack_exception_sub_prog(void) +{ + asm volatile ( + "r1 = 42;" + "*(u64 *)(r10 - 512) = r1;" + "call subprog_exception;" + ::: __clobber_common); + + return 0; +} + +#else + +SEC("kprobe") +__description("private stack is not supported, use a dummy test") +__success +int dummy_test(void) +{ + return 0; +} + +#endif + +char _license[] SEC("license") = "GPL";