diff mbox series

[bpf-next,v6,4/9] bpf: Mark each subprog with proper private stack modes

Message ID 20241020191405.2106256-1-yonghong.song@linux.dev (mailing list archive)
State Changes Requested
Delegated to: BPF
Headers show
Series bpf: Support private stack for bpf progs | expand

Checks

Context Check Description
bpf/vmtest-bpf-next-PR success PR summary
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for bpf-next, async
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 206 this patch: 206
netdev/build_tools success Errors and warnings before: 0 (+1) this patch: 0 (+1)
netdev/cc_maintainers warning 8 maintainers not CCed: song@kernel.org haoluo@google.com john.fastabend@gmail.com sdf@fomichev.me martin.lau@linux.dev kpsingh@kernel.org eddyz87@gmail.com jolsa@kernel.org
netdev/build_clang success Errors and warnings before: 257 this patch: 257
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 6962 this patch: 6962
netdev/checkpatch warning WARNING: line length of 81 exceeds 80 columns WARNING: line length of 84 exceeds 80 columns WARNING: line length of 85 exceeds 80 columns WARNING: line length of 86 exceeds 80 columns WARNING: line length of 91 exceeds 80 columns WARNING: line length of 92 exceeds 80 columns WARNING: line length of 95 exceeds 80 columns WARNING: line length of 96 exceeds 80 columns
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 6 this patch: 6
netdev/source_inline success Was 0 now: 0
bpf/vmtest-bpf-next-VM_Test-2 success Logs for Unittests
bpf/vmtest-bpf-next-VM_Test-0 success Logs for Lint
bpf/vmtest-bpf-next-VM_Test-5 success Logs for aarch64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-3 success Logs for Validate matrix.py
bpf/vmtest-bpf-next-VM_Test-1 success Logs for ShellCheck
bpf/vmtest-bpf-next-VM_Test-10 success Logs for aarch64-gcc / veristat
bpf/vmtest-bpf-next-VM_Test-12 success Logs for s390x-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-4 success Logs for aarch64-gcc / build / build for aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-9 success Logs for aarch64-gcc / test (test_verifier, false, 360) / test_verifier on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-6 success Logs for aarch64-gcc / test (test_maps, false, 360) / test_maps on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-8 success Logs for aarch64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-7 success Logs for aarch64-gcc / test (test_progs, false, 360) / test_progs on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-11 success Logs for s390x-gcc / build / build for s390x with gcc
bpf/vmtest-bpf-next-VM_Test-16 success Logs for s390x-gcc / veristat
bpf/vmtest-bpf-next-VM_Test-17 success Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-18 success Logs for x86_64-gcc / build / build for x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-19 success Logs for x86_64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-27 success Logs for x86_64-llvm-17 / build / build for x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-33 success Logs for x86_64-llvm-17 / veristat
bpf/vmtest-bpf-next-VM_Test-15 success Logs for s390x-gcc / test (test_verifier, false, 360) / test_verifier on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-28 success Logs for x86_64-llvm-17 / build-release / build for x86_64 with llvm-17-O2
bpf/vmtest-bpf-next-VM_Test-34 success Logs for x86_64-llvm-18 / build / build for x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-35 success Logs for x86_64-llvm-18 / build-release / build for x86_64 with llvm-18-O2
bpf/vmtest-bpf-next-VM_Test-41 success Logs for x86_64-llvm-18 / veristat
bpf/vmtest-bpf-next-VM_Test-14 success Logs for s390x-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-13 success Logs for s390x-gcc / test (test_progs, false, 360) / test_progs on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-20 success Logs for x86_64-gcc / test (test_maps, false, 360) / test_maps on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-21 success Logs for x86_64-gcc / test (test_progs, false, 360) / test_progs on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-22 success Logs for x86_64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-23 success Logs for x86_64-gcc / test (test_progs_no_alu32_parallel, true, 30) / test_progs_no_alu32_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-24 success Logs for x86_64-gcc / test (test_progs_parallel, true, 30) / test_progs_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-25 success Logs for x86_64-gcc / test (test_verifier, false, 360) / test_verifier on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-26 success Logs for x86_64-gcc / veristat / veristat on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-29 success Logs for x86_64-llvm-17 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-30 success Logs for x86_64-llvm-17 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-31 success Logs for x86_64-llvm-17 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-32 success Logs for x86_64-llvm-17 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-36 success Logs for x86_64-llvm-18 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-40 success Logs for x86_64-llvm-18 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-37 success Logs for x86_64-llvm-18 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-38 success Logs for x86_64-llvm-18 / test (test_progs_cpuv4, false, 360) / test_progs_cpuv4 on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-39 success Logs for x86_64-llvm-18 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-18

Commit Message

Yonghong Song Oct. 20, 2024, 7:14 p.m. UTC
Three private stack modes are used to direct jit action:
  NO_PRIV_STACK:        do not use private stack
  PRIV_STACK_SUB_PROG:  adjust frame pointer address (similar to normal stack)
  PRIV_STACK_ROOT_PROG: set the frame pointer

Note that for subtree root prog (main prog or callback fn), even if the
bpf_prog stack size is 0, PRIV_STACK_ROOT_PROG mode is still used.
This is for bpf exception handling. More details can be found in
subsequent jit support and selftest patches.

Signed-off-by: Yonghong Song <yonghong.song@linux.dev>
---
 include/linux/bpf.h   |  9 +++++++++
 kernel/bpf/core.c     | 19 +++++++++++++++++++
 kernel/bpf/verifier.c | 29 +++++++++++++++++++++++++++++
 3 files changed, 57 insertions(+)

Comments

Jiri Olsa Oct. 20, 2024, 10:01 p.m. UTC | #1
On Sun, Oct 20, 2024 at 12:14:05PM -0700, Yonghong Song wrote:
> Three private stack modes are used to direct jit action:
>   NO_PRIV_STACK:        do not use private stack
>   PRIV_STACK_SUB_PROG:  adjust frame pointer address (similar to normal stack)
>   PRIV_STACK_ROOT_PROG: set the frame pointer
> 
> Note that for subtree root prog (main prog or callback fn), even if the
> bpf_prog stack size is 0, PRIV_STACK_ROOT_PROG mode is still used.
> This is for bpf exception handling. More details can be found in
> subsequent jit support and selftest patches.
> 
> Signed-off-by: Yonghong Song <yonghong.song@linux.dev>
> ---
>  include/linux/bpf.h   |  9 +++++++++
>  kernel/bpf/core.c     | 19 +++++++++++++++++++
>  kernel/bpf/verifier.c | 29 +++++++++++++++++++++++++++++
>  3 files changed, 57 insertions(+)
> 
> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> index 376e43fc72b9..27430e9dcfe3 100644
> --- a/include/linux/bpf.h
> +++ b/include/linux/bpf.h
> @@ -1456,6 +1456,12 @@ struct btf_mod_pair {
>  
>  struct bpf_kfunc_desc_tab;
>  
> +enum bpf_priv_stack_mode {
> +	NO_PRIV_STACK,
> +	PRIV_STACK_SUB_PROG,
> +	PRIV_STACK_ROOT_PROG,
> +};
> +
>  struct bpf_prog_aux {
>  	atomic64_t refcnt;
>  	u32 used_map_cnt;
> @@ -1472,6 +1478,9 @@ struct bpf_prog_aux {
>  	u32 ctx_arg_info_size;
>  	u32 max_rdonly_access;
>  	u32 max_rdwr_access;
> +	enum bpf_priv_stack_mode priv_stack_mode;
> +	u16 subtree_stack_depth; /* Subtree stack depth if PRIV_STACK_ROOT_PROG, 0 otherwise */
> +	void __percpu *priv_stack_ptr;
>  	struct btf *attach_btf;
>  	const struct bpf_ctx_arg_aux *ctx_arg_info;
>  	struct mutex dst_mutex; /* protects dst_* pointers below, *after* prog becomes visible */
> diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
> index 14d9288441f2..aee0055def4f 100644
> --- a/kernel/bpf/core.c
> +++ b/kernel/bpf/core.c
> @@ -1240,6 +1240,7 @@ void __weak bpf_jit_free(struct bpf_prog *fp)
>  		struct bpf_binary_header *hdr = bpf_jit_binary_hdr(fp);
>  
>  		bpf_jit_binary_free(hdr);
> +		free_percpu(fp->aux->priv_stack_ptr);

this should be also put to the x86 version of the bpf_jit_free ?

jirka

>  		WARN_ON_ONCE(!bpf_prog_kallsyms_verify_off(fp));
>  	}
>  
> @@ -2421,6 +2422,24 @@ struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err)
>  		if (*err)
>  			return fp;
>  
> +		if (fp->aux->priv_stack_eligible) {
> +			if (!fp->aux->stack_depth) {
> +				fp->aux->priv_stack_mode = NO_PRIV_STACK;
> +			} else {
> +				void __percpu *priv_stack_ptr;
> +
> +				fp->aux->priv_stack_mode = PRIV_STACK_ROOT_PROG;
> +				priv_stack_ptr =
> +					__alloc_percpu_gfp(fp->aux->stack_depth, 8, GFP_KERNEL);
> +				if (!priv_stack_ptr) {
> +					*err = -ENOMEM;
> +					return fp;
> +				}
> +				fp->aux->subtree_stack_depth = fp->aux->stack_depth;
> +				fp->aux->priv_stack_ptr = priv_stack_ptr;
> +			}
> +		}
> +
>  		fp = bpf_int_jit_compile(fp);
>  		bpf_prog_jit_attempt_done(fp);
>  		if (!fp->jited && jit_needed) {
> diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
> index 25283ee6f86f..f770015d6ad1 100644
> --- a/kernel/bpf/verifier.c
> +++ b/kernel/bpf/verifier.c
> @@ -20018,6 +20018,8 @@ static int jit_subprogs(struct bpf_verifier_env *env)
>  {
>  	struct bpf_prog *prog = env->prog, **func, *tmp;
>  	int i, j, subprog_start, subprog_end = 0, len, subprog;
> +	int subtree_top_idx, subtree_stack_depth;
> +	void __percpu *priv_stack_ptr;
>  	struct bpf_map *map_ptr;
>  	struct bpf_insn *insn;
>  	void *old_bpf_func;
> @@ -20096,6 +20098,33 @@ static int jit_subprogs(struct bpf_verifier_env *env)
>  		func[i]->is_func = 1;
>  		func[i]->sleepable = prog->sleepable;
>  		func[i]->aux->func_idx = i;
> +
> +		subtree_top_idx = env->subprog_info[i].subtree_top_idx;
> +		if (env->subprog_info[subtree_top_idx].priv_stack_eligible) {
> +			if (subtree_top_idx == i)
> +				func[i]->aux->subtree_stack_depth =
> +					env->subprog_info[i].subtree_stack_depth;
> +
> +			subtree_stack_depth = func[i]->aux->subtree_stack_depth;
> +			if (subtree_top_idx != i) {
> +				if (env->subprog_info[subtree_top_idx].subtree_stack_depth)
> +					func[i]->aux->priv_stack_mode = PRIV_STACK_SUB_PROG;
> +				else
> +					func[i]->aux->priv_stack_mode = NO_PRIV_STACK;
> +			} else if (!subtree_stack_depth) {
> +				func[i]->aux->priv_stack_mode = PRIV_STACK_ROOT_PROG;
> +			} else {
> +				func[i]->aux->priv_stack_mode = PRIV_STACK_ROOT_PROG;
> +				priv_stack_ptr =
> +					__alloc_percpu_gfp(subtree_stack_depth, 8, GFP_KERNEL);
> +				if (!priv_stack_ptr) {
> +					err = -ENOMEM;
> +					goto out_free;
> +				}
> +				func[i]->aux->priv_stack_ptr = priv_stack_ptr;
> +			}
> +		}
> +
>  		/* Below members will be freed only at prog->aux */
>  		func[i]->aux->btf = prog->aux->btf;
>  		func[i]->aux->func_info = prog->aux->func_info;
> -- 
> 2.43.5
> 
>
Yonghong Song Oct. 21, 2024, 4:22 a.m. UTC | #2
On 10/20/24 3:01 PM, Jiri Olsa wrote:
> On Sun, Oct 20, 2024 at 12:14:05PM -0700, Yonghong Song wrote:
>> Three private stack modes are used to direct jit action:
>>    NO_PRIV_STACK:        do not use private stack
>>    PRIV_STACK_SUB_PROG:  adjust frame pointer address (similar to normal stack)
>>    PRIV_STACK_ROOT_PROG: set the frame pointer
>>
>> Note that for subtree root prog (main prog or callback fn), even if the
>> bpf_prog stack size is 0, PRIV_STACK_ROOT_PROG mode is still used.
>> This is for bpf exception handling. More details can be found in
>> subsequent jit support and selftest patches.
>>
>> Signed-off-by: Yonghong Song <yonghong.song@linux.dev>
>> ---
>>   include/linux/bpf.h   |  9 +++++++++
>>   kernel/bpf/core.c     | 19 +++++++++++++++++++
>>   kernel/bpf/verifier.c | 29 +++++++++++++++++++++++++++++
>>   3 files changed, 57 insertions(+)
>>
>> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
>> index 376e43fc72b9..27430e9dcfe3 100644
>> --- a/include/linux/bpf.h
>> +++ b/include/linux/bpf.h
>> @@ -1456,6 +1456,12 @@ struct btf_mod_pair {
>>   
>>   struct bpf_kfunc_desc_tab;
>>   
>> +enum bpf_priv_stack_mode {
>> +	NO_PRIV_STACK,
>> +	PRIV_STACK_SUB_PROG,
>> +	PRIV_STACK_ROOT_PROG,
>> +};
>> +
>>   struct bpf_prog_aux {
>>   	atomic64_t refcnt;
>>   	u32 used_map_cnt;
>> @@ -1472,6 +1478,9 @@ struct bpf_prog_aux {
>>   	u32 ctx_arg_info_size;
>>   	u32 max_rdonly_access;
>>   	u32 max_rdwr_access;
>> +	enum bpf_priv_stack_mode priv_stack_mode;
>> +	u16 subtree_stack_depth; /* Subtree stack depth if PRIV_STACK_ROOT_PROG, 0 otherwise */
>> +	void __percpu *priv_stack_ptr;
>>   	struct btf *attach_btf;
>>   	const struct bpf_ctx_arg_aux *ctx_arg_info;
>>   	struct mutex dst_mutex; /* protects dst_* pointers below, *after* prog becomes visible */
>> diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
>> index 14d9288441f2..aee0055def4f 100644
>> --- a/kernel/bpf/core.c
>> +++ b/kernel/bpf/core.c
>> @@ -1240,6 +1240,7 @@ void __weak bpf_jit_free(struct bpf_prog *fp)
>>   		struct bpf_binary_header *hdr = bpf_jit_binary_hdr(fp);
>>   
>>   		bpf_jit_binary_free(hdr);
>> +		free_percpu(fp->aux->priv_stack_ptr);
> this should be also put to the x86 version of the bpf_jit_free ?

Thanks for spotting this! Indeed, the x86 version of bpf_jit_free should
be used. Will fix in the next revision.

>
> jirka
>
>>   		WARN_ON_ONCE(!bpf_prog_kallsyms_verify_off(fp));
>>   	}

[...]
diff mbox series

Patch

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 376e43fc72b9..27430e9dcfe3 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1456,6 +1456,12 @@  struct btf_mod_pair {
 
 struct bpf_kfunc_desc_tab;
 
+enum bpf_priv_stack_mode {
+	NO_PRIV_STACK,
+	PRIV_STACK_SUB_PROG,
+	PRIV_STACK_ROOT_PROG,
+};
+
 struct bpf_prog_aux {
 	atomic64_t refcnt;
 	u32 used_map_cnt;
@@ -1472,6 +1478,9 @@  struct bpf_prog_aux {
 	u32 ctx_arg_info_size;
 	u32 max_rdonly_access;
 	u32 max_rdwr_access;
+	enum bpf_priv_stack_mode priv_stack_mode;
+	u16 subtree_stack_depth; /* Subtree stack depth if PRIV_STACK_ROOT_PROG, 0 otherwise */
+	void __percpu *priv_stack_ptr;
 	struct btf *attach_btf;
 	const struct bpf_ctx_arg_aux *ctx_arg_info;
 	struct mutex dst_mutex; /* protects dst_* pointers below, *after* prog becomes visible */
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 14d9288441f2..aee0055def4f 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -1240,6 +1240,7 @@  void __weak bpf_jit_free(struct bpf_prog *fp)
 		struct bpf_binary_header *hdr = bpf_jit_binary_hdr(fp);
 
 		bpf_jit_binary_free(hdr);
+		free_percpu(fp->aux->priv_stack_ptr);
 		WARN_ON_ONCE(!bpf_prog_kallsyms_verify_off(fp));
 	}
 
@@ -2421,6 +2422,24 @@  struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err)
 		if (*err)
 			return fp;
 
+		if (fp->aux->priv_stack_eligible) {
+			if (!fp->aux->stack_depth) {
+				fp->aux->priv_stack_mode = NO_PRIV_STACK;
+			} else {
+				void __percpu *priv_stack_ptr;
+
+				fp->aux->priv_stack_mode = PRIV_STACK_ROOT_PROG;
+				priv_stack_ptr =
+					__alloc_percpu_gfp(fp->aux->stack_depth, 8, GFP_KERNEL);
+				if (!priv_stack_ptr) {
+					*err = -ENOMEM;
+					return fp;
+				}
+				fp->aux->subtree_stack_depth = fp->aux->stack_depth;
+				fp->aux->priv_stack_ptr = priv_stack_ptr;
+			}
+		}
+
 		fp = bpf_int_jit_compile(fp);
 		bpf_prog_jit_attempt_done(fp);
 		if (!fp->jited && jit_needed) {
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 25283ee6f86f..f770015d6ad1 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -20018,6 +20018,8 @@  static int jit_subprogs(struct bpf_verifier_env *env)
 {
 	struct bpf_prog *prog = env->prog, **func, *tmp;
 	int i, j, subprog_start, subprog_end = 0, len, subprog;
+	int subtree_top_idx, subtree_stack_depth;
+	void __percpu *priv_stack_ptr;
 	struct bpf_map *map_ptr;
 	struct bpf_insn *insn;
 	void *old_bpf_func;
@@ -20096,6 +20098,33 @@  static int jit_subprogs(struct bpf_verifier_env *env)
 		func[i]->is_func = 1;
 		func[i]->sleepable = prog->sleepable;
 		func[i]->aux->func_idx = i;
+
+		subtree_top_idx = env->subprog_info[i].subtree_top_idx;
+		if (env->subprog_info[subtree_top_idx].priv_stack_eligible) {
+			if (subtree_top_idx == i)
+				func[i]->aux->subtree_stack_depth =
+					env->subprog_info[i].subtree_stack_depth;
+
+			subtree_stack_depth = func[i]->aux->subtree_stack_depth;
+			if (subtree_top_idx != i) {
+				if (env->subprog_info[subtree_top_idx].subtree_stack_depth)
+					func[i]->aux->priv_stack_mode = PRIV_STACK_SUB_PROG;
+				else
+					func[i]->aux->priv_stack_mode = NO_PRIV_STACK;
+			} else if (!subtree_stack_depth) {
+				func[i]->aux->priv_stack_mode = PRIV_STACK_ROOT_PROG;
+			} else {
+				func[i]->aux->priv_stack_mode = PRIV_STACK_ROOT_PROG;
+				priv_stack_ptr =
+					__alloc_percpu_gfp(subtree_stack_depth, 8, GFP_KERNEL);
+				if (!priv_stack_ptr) {
+					err = -ENOMEM;
+					goto out_free;
+				}
+				func[i]->aux->priv_stack_ptr = priv_stack_ptr;
+			}
+		}
+
 		/* Below members will be freed only at prog->aux */
 		func[i]->aux->btf = prog->aux->btf;
 		func[i]->aux->func_info = prog->aux->func_info;