diff mbox series

[v2,bpf-next] bpf: move sleepable flag from bpf_prog_aux to bpf_prog

Message ID 20240309004739.2961431-1-andrii@kernel.org (mailing list archive)
State Accepted
Commit 66c8473135c62f478301a0e5b3012f203562dfa6
Delegated to: BPF
Headers show
Series [v2,bpf-next] bpf: move sleepable flag from bpf_prog_aux to bpf_prog | expand

Checks

Context Check Description
bpf/vmtest-bpf-next-VM_Test-45 fail Logs for x86_64-llvm-18 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-43 fail Logs for x86_64-llvm-18 / test (test_progs_cpuv4, false, 360) / test_progs_cpuv4 on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-46 success Logs for x86_64-llvm-18 / veristat
bpf/vmtest-bpf-next-VM_Test-44 fail Logs for x86_64-llvm-18 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-18
bpf/vmtest-bpf-next-PR success PR summary
netdev/series_format success Single patches do not need cover letters
netdev/tree_selection success Clearly marked for bpf-next, async
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 7577 this patch: 7577
netdev/build_tools success Errors and warnings before: 0 this patch: 0
netdev/cc_maintainers warning 26 maintainers not CCed: adrian.hunter@intel.com mhiramat@kernel.org netdev@vger.kernel.org mark.rutland@arm.com alexander.shishkin@linux.intel.com linux-perf-users@vger.kernel.org mathieu.desnoyers@efficios.com kuba@kernel.org edumazet@google.com acme@kernel.org namhyung@kernel.org jolsa@kernel.org haoluo@google.com john.fastabend@gmail.com song@kernel.org pabeni@redhat.com mingo@redhat.com eddyz87@gmail.com yonghong.song@linux.dev sdf@google.com martin.lau@linux.dev rostedt@goodmis.org peterz@infradead.org kpsingh@kernel.org irogers@google.com linux-trace-kernel@vger.kernel.org
netdev/build_clang success Errors and warnings before: 2250 this patch: 2250
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 8070 this patch: 8070
netdev/checkpatch warning WARNING: line length of 91 exceeds 80 columns
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 6 this patch: 6
netdev/source_inline success Was 0 now: 0
bpf/vmtest-bpf-next-VM_Test-16 success Logs for s390x-gcc / test (test_verifier, false, 360) / test_verifier on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-14 success Logs for s390x-gcc / test (test_progs, false, 360) / test_progs on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-29 success Logs for x86_64-llvm-17 / build-release / build for x86_64 with llvm-17 and -O2 optimization
bpf/vmtest-bpf-next-VM_Test-13 success Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-15 success Logs for x86_64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-3 success Logs for Validate matrix.py
bpf/vmtest-bpf-next-VM_Test-1 success Logs for ShellCheck
bpf/vmtest-bpf-next-VM_Test-0 success Logs for Lint
bpf/vmtest-bpf-next-VM_Test-2 success Logs for Unittests
bpf/vmtest-bpf-next-VM_Test-5 success Logs for aarch64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-4 success Logs for aarch64-gcc / build / build for aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-10 success Logs for aarch64-gcc / veristat
bpf/vmtest-bpf-next-VM_Test-11 success Logs for s390x-gcc / build / build for s390x with gcc
bpf/vmtest-bpf-next-VM_Test-12 success Logs for s390x-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-20 success Logs for x86_64-gcc / build-release
bpf/vmtest-bpf-next-VM_Test-17 success Logs for s390x-gcc / veristat
bpf/vmtest-bpf-next-VM_Test-19 success Logs for x86_64-gcc / build / build for x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-18 success Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-28 success Logs for x86_64-llvm-17 / build / build for x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-34 success Logs for x86_64-llvm-17 / veristat
bpf/vmtest-bpf-next-VM_Test-35 success Logs for x86_64-llvm-18 / build / build for x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-42 success Logs for x86_64-llvm-18 / veristat
bpf/vmtest-bpf-next-VM_Test-9 success Logs for aarch64-gcc / test (test_verifier, false, 360) / test_verifier on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-8 success Logs for aarch64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-7 success Logs for aarch64-gcc / test (test_progs, false, 360) / test_progs on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-6 success Logs for aarch64-gcc / test (test_maps, false, 360) / test_maps on aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-41 success Logs for x86_64-llvm-18 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-30 success Logs for x86_64-llvm-17 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-33 success Logs for x86_64-llvm-17 / test (test_verifier, false, 360) / test_verifier on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-26 success Logs for x86_64-gcc / test (test_verifier, false, 360) / test_verifier on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-22 success Logs for x86_64-gcc / test (test_progs, false, 360) / test_progs on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-21 success Logs for x86_64-gcc / test (test_maps, false, 360) / test_maps on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-27 success Logs for x86_64-gcc / veristat / veristat on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-23 success Logs for x86_64-gcc / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-31 success Logs for x86_64-llvm-17 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-24 success Logs for x86_64-gcc / test (test_progs_no_alu32_parallel, true, 30) / test_progs_no_alu32_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-32 success Logs for x86_64-llvm-17 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-17
bpf/vmtest-bpf-next-VM_Test-25 success Logs for x86_64-gcc / test (test_progs_parallel, true, 30) / test_progs_parallel on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-36 success Logs for x86_64-llvm-18 / build-release / build for x86_64 with llvm-18 and -O2 optimization
bpf/vmtest-bpf-next-VM_Test-40 success Logs for x86_64-llvm-18 / test (test_progs_no_alu32, false, 360) / test_progs_no_alu32 on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-38 success Logs for x86_64-llvm-18 / test (test_progs, false, 360) / test_progs on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-39 success Logs for x86_64-llvm-18 / test (test_progs_cpuv4, false, 360) / test_progs_cpuv4 on x86_64 with llvm-18
bpf/vmtest-bpf-next-VM_Test-37 success Logs for x86_64-llvm-18 / test (test_maps, false, 360) / test_maps on x86_64 with llvm-18

Commit Message

Andrii Nakryiko March 9, 2024, 12:47 a.m. UTC
prog->aux->sleepable is checked very frequently as part of (some) BPF
program run hot paths. So this extra aux indirection seems wasteful and
on busy systems might cause unnecessary memory cache misses.

Let's move sleepable flag into prog itself to eliminate unnecessary
pointer dereference.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
---
 include/linux/bpf.h            |  8 ++++----
 kernel/bpf/bpf_iter.c          |  4 ++--
 kernel/bpf/core.c              |  2 +-
 kernel/bpf/syscall.c           |  6 +++---
 kernel/bpf/trampoline.c        |  4 ++--
 kernel/bpf/verifier.c          | 12 ++++++------
 kernel/events/core.c           |  2 +-
 kernel/trace/bpf_trace.c       |  2 +-
 net/bpf/bpf_dummy_struct_ops.c |  2 +-
 9 files changed, 21 insertions(+), 21 deletions(-)

Comments

Jiri Olsa March 11, 2024, 8:19 a.m. UTC | #1
On Fri, Mar 08, 2024 at 04:47:39PM -0800, Andrii Nakryiko wrote:
> prog->aux->sleepable is checked very frequently as part of (some) BPF
> program run hot paths. So this extra aux indirection seems wasteful and
> on busy systems might cause unnecessary memory cache misses.
> 
> Let's move sleepable flag into prog itself to eliminate unnecessary
> pointer dereference.
> 
> Signed-off-by: Andrii Nakryiko <andrii@kernel.org>

curious did it show in some profile? however lgtm

Acked-by: Jiri Olsa <jolsa@kernel.org>

jirka


> ---
>  include/linux/bpf.h            |  8 ++++----
>  kernel/bpf/bpf_iter.c          |  4 ++--
>  kernel/bpf/core.c              |  2 +-
>  kernel/bpf/syscall.c           |  6 +++---
>  kernel/bpf/trampoline.c        |  4 ++--
>  kernel/bpf/verifier.c          | 12 ++++++------
>  kernel/events/core.c           |  2 +-
>  kernel/trace/bpf_trace.c       |  2 +-
>  net/bpf/bpf_dummy_struct_ops.c |  2 +-
>  9 files changed, 21 insertions(+), 21 deletions(-)
> 
> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> index 95e07673cdc1..b444237e01a0 100644
> --- a/include/linux/bpf.h
> +++ b/include/linux/bpf.h
> @@ -1452,7 +1452,6 @@ struct bpf_prog_aux {
>  	bool attach_btf_trace; /* true if attaching to BTF-enabled raw tp */
>  	bool attach_tracing_prog; /* true if tracing another tracing program */
>  	bool func_proto_unreliable;
> -	bool sleepable;
>  	bool tail_call_reachable;
>  	bool xdp_has_frags;
>  	bool exception_cb;
> @@ -1537,7 +1536,8 @@ struct bpf_prog {
>  				enforce_expected_attach_type:1, /* Enforce expected_attach_type checking at attach time */
>  				call_get_stack:1, /* Do we call bpf_get_stack() or bpf_get_stackid() */
>  				call_get_func_ip:1, /* Do we call get_func_ip() */
> -				tstamp_type_access:1; /* Accessed __sk_buff->tstamp_type */
> +				tstamp_type_access:1, /* Accessed __sk_buff->tstamp_type */
> +				sleepable:1;	/* BPF program is sleepable */
>  	enum bpf_prog_type	type;		/* Type of BPF program */
>  	enum bpf_attach_type	expected_attach_type; /* For some prog types */
>  	u32			len;		/* Number of filter blocks */
> @@ -2108,14 +2108,14 @@ bpf_prog_run_array_uprobe(const struct bpf_prog_array __rcu *array_rcu,
>  	old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx);
>  	item = &array->items[0];
>  	while ((prog = READ_ONCE(item->prog))) {
> -		if (!prog->aux->sleepable)
> +		if (!prog->sleepable)
>  			rcu_read_lock();
>  
>  		run_ctx.bpf_cookie = item->bpf_cookie;
>  		ret &= run_prog(prog, ctx);
>  		item++;
>  
> -		if (!prog->aux->sleepable)
> +		if (!prog->sleepable)
>  			rcu_read_unlock();
>  	}
>  	bpf_reset_run_ctx(old_run_ctx);
> diff --git a/kernel/bpf/bpf_iter.c b/kernel/bpf/bpf_iter.c
> index 0fae79164187..112581cf97e7 100644
> --- a/kernel/bpf/bpf_iter.c
> +++ b/kernel/bpf/bpf_iter.c
> @@ -548,7 +548,7 @@ int bpf_iter_link_attach(const union bpf_attr *attr, bpfptr_t uattr,
>  		return -ENOENT;
>  
>  	/* Only allow sleepable program for resched-able iterator */
> -	if (prog->aux->sleepable && !bpf_iter_target_support_resched(tinfo))
> +	if (prog->sleepable && !bpf_iter_target_support_resched(tinfo))
>  		return -EINVAL;
>  
>  	link = kzalloc(sizeof(*link), GFP_USER | __GFP_NOWARN);
> @@ -697,7 +697,7 @@ int bpf_iter_run_prog(struct bpf_prog *prog, void *ctx)
>  	struct bpf_run_ctx run_ctx, *old_run_ctx;
>  	int ret;
>  
> -	if (prog->aux->sleepable) {
> +	if (prog->sleepable) {
>  		rcu_read_lock_trace();
>  		migrate_disable();
>  		might_fault();
> diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
> index 134b7979f537..c58d1781c708 100644
> --- a/kernel/bpf/core.c
> +++ b/kernel/bpf/core.c
> @@ -2701,7 +2701,7 @@ void __bpf_free_used_maps(struct bpf_prog_aux *aux,
>  	bool sleepable;
>  	u32 i;
>  
> -	sleepable = aux->sleepable;
> +	sleepable = aux->prog->sleepable;
>  	for (i = 0; i < len; i++) {
>  		map = used_maps[i];
>  		if (map->ops->map_poke_untrack)
> diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
> index f63f4da4db5e..1a257c281e26 100644
> --- a/kernel/bpf/syscall.c
> +++ b/kernel/bpf/syscall.c
> @@ -2212,7 +2212,7 @@ static void __bpf_prog_put_noref(struct bpf_prog *prog, bool deferred)
>  		btf_put(prog->aux->attach_btf);
>  
>  	if (deferred) {
> -		if (prog->aux->sleepable)
> +		if (prog->sleepable)
>  			call_rcu_tasks_trace(&prog->aux->rcu, __bpf_prog_put_rcu);
>  		else
>  			call_rcu(&prog->aux->rcu, __bpf_prog_put_rcu);
> @@ -2777,11 +2777,11 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size)
>  	}
>  
>  	prog->expected_attach_type = attr->expected_attach_type;
> +	prog->sleepable = !!(attr->prog_flags & BPF_F_SLEEPABLE);
>  	prog->aux->attach_btf = attach_btf;
>  	prog->aux->attach_btf_id = attr->attach_btf_id;
>  	prog->aux->dst_prog = dst_prog;
>  	prog->aux->dev_bound = !!attr->prog_ifindex;
> -	prog->aux->sleepable = attr->prog_flags & BPF_F_SLEEPABLE;
>  	prog->aux->xdp_has_frags = attr->prog_flags & BPF_F_XDP_HAS_FRAGS;
>  
>  	/* move token into prog->aux, reuse taken refcnt */
> @@ -5512,7 +5512,7 @@ static int bpf_prog_bind_map(union bpf_attr *attr)
>  	/* The bpf program will not access the bpf map, but for the sake of
>  	 * simplicity, increase sleepable_refcnt for sleepable program as well.
>  	 */
> -	if (prog->aux->sleepable)
> +	if (prog->sleepable)
>  		atomic64_inc(&map->sleepable_refcnt);
>  	memcpy(used_maps_new, used_maps_old,
>  	       sizeof(used_maps_old[0]) * prog->aux->used_map_cnt);
> diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c
> index d382f5ebe06c..db7599c59c78 100644
> --- a/kernel/bpf/trampoline.c
> +++ b/kernel/bpf/trampoline.c
> @@ -1014,7 +1014,7 @@ void notrace __bpf_tramp_exit(struct bpf_tramp_image *tr)
>  
>  bpf_trampoline_enter_t bpf_trampoline_enter(const struct bpf_prog *prog)
>  {
> -	bool sleepable = prog->aux->sleepable;
> +	bool sleepable = prog->sleepable;
>  
>  	if (bpf_prog_check_recur(prog))
>  		return sleepable ? __bpf_prog_enter_sleepable_recur :
> @@ -1029,7 +1029,7 @@ bpf_trampoline_enter_t bpf_trampoline_enter(const struct bpf_prog *prog)
>  
>  bpf_trampoline_exit_t bpf_trampoline_exit(const struct bpf_prog *prog)
>  {
> -	bool sleepable = prog->aux->sleepable;
> +	bool sleepable = prog->sleepable;
>  
>  	if (bpf_prog_check_recur(prog))
>  		return sleepable ? __bpf_prog_exit_sleepable_recur :
> diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
> index bf084c693507..7ceee73ff598 100644
> --- a/kernel/bpf/verifier.c
> +++ b/kernel/bpf/verifier.c
> @@ -5273,7 +5273,7 @@ static int map_kptr_match_type(struct bpf_verifier_env *env,
>  
>  static bool in_sleepable(struct bpf_verifier_env *env)
>  {
> -	return env->prog->aux->sleepable;
> +	return env->prog->sleepable;
>  }
>  
>  /* The non-sleepable programs and sleepable programs with explicit bpf_rcu_read_lock()
> @@ -18090,7 +18090,7 @@ static int check_map_prog_compatibility(struct bpf_verifier_env *env,
>  		return -EINVAL;
>  	}
>  
> -	if (prog->aux->sleepable)
> +	if (prog->sleepable)
>  		switch (map->map_type) {
>  		case BPF_MAP_TYPE_HASH:
>  		case BPF_MAP_TYPE_LRU_HASH:
> @@ -18277,7 +18277,7 @@ static int resolve_pseudo_ldimm64(struct bpf_verifier_env *env)
>  				return -E2BIG;
>  			}
>  
> -			if (env->prog->aux->sleepable)
> +			if (env->prog->sleepable)
>  				atomic64_inc(&map->sleepable_refcnt);
>  			/* hold the map. If the program is rejected by verifier,
>  			 * the map will be released by release_maps() or it
> @@ -20833,7 +20833,7 @@ int bpf_check_attach_target(struct bpf_verifier_log *log,
>  			}
>  		}
>  
> -		if (prog->aux->sleepable) {
> +		if (prog->sleepable) {
>  			ret = -EINVAL;
>  			switch (prog->type) {
>  			case BPF_PROG_TYPE_TRACING:
> @@ -20944,14 +20944,14 @@ static int check_attach_btf_id(struct bpf_verifier_env *env)
>  	u64 key;
>  
>  	if (prog->type == BPF_PROG_TYPE_SYSCALL) {
> -		if (prog->aux->sleepable)
> +		if (prog->sleepable)
>  			/* attach_btf_id checked to be zero already */
>  			return 0;
>  		verbose(env, "Syscall programs can only be sleepable\n");
>  		return -EINVAL;
>  	}
>  
> -	if (prog->aux->sleepable && !can_be_sleepable(prog)) {
> +	if (prog->sleepable && !can_be_sleepable(prog)) {
>  		verbose(env, "Only fentry/fexit/fmod_ret, lsm, iter, uprobe, and struct_ops programs can be sleepable\n");
>  		return -EINVAL;
>  	}
> diff --git a/kernel/events/core.c b/kernel/events/core.c
> index 5ecfa57e3b97..724e6d7e128f 100644
> --- a/kernel/events/core.c
> +++ b/kernel/events/core.c
> @@ -10553,7 +10553,7 @@ int perf_event_set_bpf_prog(struct perf_event *event, struct bpf_prog *prog,
>  	    (is_syscall_tp && prog->type != BPF_PROG_TYPE_TRACEPOINT))
>  		return -EINVAL;
>  
> -	if (prog->type == BPF_PROG_TYPE_KPROBE && prog->aux->sleepable && !is_uprobe)
> +	if (prog->type == BPF_PROG_TYPE_KPROBE && prog->sleepable && !is_uprobe)
>  		/* only uprobe programs are allowed to be sleepable */
>  		return -EINVAL;
>  
> diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
> index 241ddf5e3895..0a5c4efc73c3 100644
> --- a/kernel/trace/bpf_trace.c
> +++ b/kernel/trace/bpf_trace.c
> @@ -3256,7 +3256,7 @@ static int uprobe_prog_run(struct bpf_uprobe *uprobe,
>  		.uprobe = uprobe,
>  	};
>  	struct bpf_prog *prog = link->link.prog;
> -	bool sleepable = prog->aux->sleepable;
> +	bool sleepable = prog->sleepable;
>  	struct bpf_run_ctx *old_run_ctx;
>  	int err = 0;
>  
> diff --git a/net/bpf/bpf_dummy_struct_ops.c b/net/bpf/bpf_dummy_struct_ops.c
> index 1b5f812e6972..de33dc1b0daa 100644
> --- a/net/bpf/bpf_dummy_struct_ops.c
> +++ b/net/bpf/bpf_dummy_struct_ops.c
> @@ -174,7 +174,7 @@ static int bpf_dummy_ops_check_member(const struct btf_type *t,
>  	case offsetof(struct bpf_dummy_ops, test_sleepable):
>  		break;
>  	default:
> -		if (prog->aux->sleepable)
> +		if (prog->sleepable)
>  			return -EINVAL;
>  	}
>  
> -- 
> 2.43.0
> 
>
Andrii Nakryiko March 11, 2024, 5:35 p.m. UTC | #2
On Mon, Mar 11, 2024 at 1:20 AM Jiri Olsa <olsajiri@gmail.com> wrote:
>
> On Fri, Mar 08, 2024 at 04:47:39PM -0800, Andrii Nakryiko wrote:
> > prog->aux->sleepable is checked very frequently as part of (some) BPF
> > program run hot paths. So this extra aux indirection seems wasteful and
> > on busy systems might cause unnecessary memory cache misses.
> >
> > Let's move sleepable flag into prog itself to eliminate unnecessary
> > pointer dereference.
> >
> > Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
>
> curious did it show in some profile? however lgtm

Not really, micro-benchmarks won't show this. It's only on busy
systems where CPU cache is heavily utilized that this could make a
difference. I stumbled upon this just by reading code and realizing
this is now not just a verifier thing anymore.

>
> Acked-by: Jiri Olsa <jolsa@kernel.org>
>
> jirka
>
>
> > ---
> >  include/linux/bpf.h            |  8 ++++----
> >  kernel/bpf/bpf_iter.c          |  4 ++--
> >  kernel/bpf/core.c              |  2 +-
> >  kernel/bpf/syscall.c           |  6 +++---
> >  kernel/bpf/trampoline.c        |  4 ++--
> >  kernel/bpf/verifier.c          | 12 ++++++------
> >  kernel/events/core.c           |  2 +-
> >  kernel/trace/bpf_trace.c       |  2 +-
> >  net/bpf/bpf_dummy_struct_ops.c |  2 +-
> >  9 files changed, 21 insertions(+), 21 deletions(-)
> >

[...]
Alexei Starovoitov March 11, 2024, 11:49 p.m. UTC | #3
On Fri, Mar 8, 2024 at 4:47 PM Andrii Nakryiko <andrii@kernel.org> wrote:
>
> prog->aux->sleepable is checked very frequently as part of (some) BPF
> program run hot paths. So this extra aux indirection seems wasteful and
> on busy systems might cause unnecessary memory cache misses.
>
> Let's move sleepable flag into prog itself to eliminate unnecessary
> pointer dereference.
>
> Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
> ---
>  include/linux/bpf.h            |  8 ++++----
>  kernel/bpf/bpf_iter.c          |  4 ++--
>  kernel/bpf/core.c              |  2 +-
>  kernel/bpf/syscall.c           |  6 +++---
>  kernel/bpf/trampoline.c        |  4 ++--
>  kernel/bpf/verifier.c          | 12 ++++++------
>  kernel/events/core.c           |  2 +-
>  kernel/trace/bpf_trace.c       |  2 +-
>  net/bpf/bpf_dummy_struct_ops.c |  2 +-
>  9 files changed, 21 insertions(+), 21 deletions(-)

Looks good.
It might help Benjamin too to add sleepable callbacks.

Separately, noticed that jit_subprogs() doesn't copy
sleepable flag into subprogs's aux.
This patch doesn't change that.

Though subprogs are never called directly
probably worth a follow up to copy the flag into subprogs?
Just for completeness.
patchwork-bot+netdevbpf@kernel.org March 12, 2024, midnight UTC | #4
Hello:

This patch was applied to bpf/bpf-next.git (master)
by Alexei Starovoitov <ast@kernel.org>:

On Fri,  8 Mar 2024 16:47:39 -0800 you wrote:
> prog->aux->sleepable is checked very frequently as part of (some) BPF
> program run hot paths. So this extra aux indirection seems wasteful and
> on busy systems might cause unnecessary memory cache misses.
> 
> Let's move sleepable flag into prog itself to eliminate unnecessary
> pointer dereference.
> 
> [...]

Here is the summary with links:
  - [v2,bpf-next] bpf: move sleepable flag from bpf_prog_aux to bpf_prog
    https://git.kernel.org/bpf/bpf-next/c/66c8473135c6

You are awesome, thank you!
Andrii Nakryiko March 12, 2024, 12:03 a.m. UTC | #5
On Mon, Mar 11, 2024 at 4:49 PM Alexei Starovoitov
<alexei.starovoitov@gmail.com> wrote:
>
> On Fri, Mar 8, 2024 at 4:47 PM Andrii Nakryiko <andrii@kernel.org> wrote:
> >
> > prog->aux->sleepable is checked very frequently as part of (some) BPF
> > program run hot paths. So this extra aux indirection seems wasteful and
> > on busy systems might cause unnecessary memory cache misses.
> >
> > Let's move sleepable flag into prog itself to eliminate unnecessary
> > pointer dereference.
> >
> > Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
> > ---
> >  include/linux/bpf.h            |  8 ++++----
> >  kernel/bpf/bpf_iter.c          |  4 ++--
> >  kernel/bpf/core.c              |  2 +-
> >  kernel/bpf/syscall.c           |  6 +++---
> >  kernel/bpf/trampoline.c        |  4 ++--
> >  kernel/bpf/verifier.c          | 12 ++++++------
> >  kernel/events/core.c           |  2 +-
> >  kernel/trace/bpf_trace.c       |  2 +-
> >  net/bpf/bpf_dummy_struct_ops.c |  2 +-
> >  9 files changed, 21 insertions(+), 21 deletions(-)
>
> Looks good.
> It might help Benjamin too to add sleepable callbacks.
>
> Separately, noticed that jit_subprogs() doesn't copy
> sleepable flag into subprogs's aux.
> This patch doesn't change that.
>
> Though subprogs are never called directly
> probably worth a follow up to copy the flag into subprogs?
> Just for completeness.

Sure, I'll send a follow up patch
diff mbox series

Patch

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 95e07673cdc1..b444237e01a0 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1452,7 +1452,6 @@  struct bpf_prog_aux {
 	bool attach_btf_trace; /* true if attaching to BTF-enabled raw tp */
 	bool attach_tracing_prog; /* true if tracing another tracing program */
 	bool func_proto_unreliable;
-	bool sleepable;
 	bool tail_call_reachable;
 	bool xdp_has_frags;
 	bool exception_cb;
@@ -1537,7 +1536,8 @@  struct bpf_prog {
 				enforce_expected_attach_type:1, /* Enforce expected_attach_type checking at attach time */
 				call_get_stack:1, /* Do we call bpf_get_stack() or bpf_get_stackid() */
 				call_get_func_ip:1, /* Do we call get_func_ip() */
-				tstamp_type_access:1; /* Accessed __sk_buff->tstamp_type */
+				tstamp_type_access:1, /* Accessed __sk_buff->tstamp_type */
+				sleepable:1;	/* BPF program is sleepable */
 	enum bpf_prog_type	type;		/* Type of BPF program */
 	enum bpf_attach_type	expected_attach_type; /* For some prog types */
 	u32			len;		/* Number of filter blocks */
@@ -2108,14 +2108,14 @@  bpf_prog_run_array_uprobe(const struct bpf_prog_array __rcu *array_rcu,
 	old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx);
 	item = &array->items[0];
 	while ((prog = READ_ONCE(item->prog))) {
-		if (!prog->aux->sleepable)
+		if (!prog->sleepable)
 			rcu_read_lock();
 
 		run_ctx.bpf_cookie = item->bpf_cookie;
 		ret &= run_prog(prog, ctx);
 		item++;
 
-		if (!prog->aux->sleepable)
+		if (!prog->sleepable)
 			rcu_read_unlock();
 	}
 	bpf_reset_run_ctx(old_run_ctx);
diff --git a/kernel/bpf/bpf_iter.c b/kernel/bpf/bpf_iter.c
index 0fae79164187..112581cf97e7 100644
--- a/kernel/bpf/bpf_iter.c
+++ b/kernel/bpf/bpf_iter.c
@@ -548,7 +548,7 @@  int bpf_iter_link_attach(const union bpf_attr *attr, bpfptr_t uattr,
 		return -ENOENT;
 
 	/* Only allow sleepable program for resched-able iterator */
-	if (prog->aux->sleepable && !bpf_iter_target_support_resched(tinfo))
+	if (prog->sleepable && !bpf_iter_target_support_resched(tinfo))
 		return -EINVAL;
 
 	link = kzalloc(sizeof(*link), GFP_USER | __GFP_NOWARN);
@@ -697,7 +697,7 @@  int bpf_iter_run_prog(struct bpf_prog *prog, void *ctx)
 	struct bpf_run_ctx run_ctx, *old_run_ctx;
 	int ret;
 
-	if (prog->aux->sleepable) {
+	if (prog->sleepable) {
 		rcu_read_lock_trace();
 		migrate_disable();
 		might_fault();
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 134b7979f537..c58d1781c708 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -2701,7 +2701,7 @@  void __bpf_free_used_maps(struct bpf_prog_aux *aux,
 	bool sleepable;
 	u32 i;
 
-	sleepable = aux->sleepable;
+	sleepable = aux->prog->sleepable;
 	for (i = 0; i < len; i++) {
 		map = used_maps[i];
 		if (map->ops->map_poke_untrack)
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index f63f4da4db5e..1a257c281e26 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -2212,7 +2212,7 @@  static void __bpf_prog_put_noref(struct bpf_prog *prog, bool deferred)
 		btf_put(prog->aux->attach_btf);
 
 	if (deferred) {
-		if (prog->aux->sleepable)
+		if (prog->sleepable)
 			call_rcu_tasks_trace(&prog->aux->rcu, __bpf_prog_put_rcu);
 		else
 			call_rcu(&prog->aux->rcu, __bpf_prog_put_rcu);
@@ -2777,11 +2777,11 @@  static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size)
 	}
 
 	prog->expected_attach_type = attr->expected_attach_type;
+	prog->sleepable = !!(attr->prog_flags & BPF_F_SLEEPABLE);
 	prog->aux->attach_btf = attach_btf;
 	prog->aux->attach_btf_id = attr->attach_btf_id;
 	prog->aux->dst_prog = dst_prog;
 	prog->aux->dev_bound = !!attr->prog_ifindex;
-	prog->aux->sleepable = attr->prog_flags & BPF_F_SLEEPABLE;
 	prog->aux->xdp_has_frags = attr->prog_flags & BPF_F_XDP_HAS_FRAGS;
 
 	/* move token into prog->aux, reuse taken refcnt */
@@ -5512,7 +5512,7 @@  static int bpf_prog_bind_map(union bpf_attr *attr)
 	/* The bpf program will not access the bpf map, but for the sake of
 	 * simplicity, increase sleepable_refcnt for sleepable program as well.
 	 */
-	if (prog->aux->sleepable)
+	if (prog->sleepable)
 		atomic64_inc(&map->sleepable_refcnt);
 	memcpy(used_maps_new, used_maps_old,
 	       sizeof(used_maps_old[0]) * prog->aux->used_map_cnt);
diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c
index d382f5ebe06c..db7599c59c78 100644
--- a/kernel/bpf/trampoline.c
+++ b/kernel/bpf/trampoline.c
@@ -1014,7 +1014,7 @@  void notrace __bpf_tramp_exit(struct bpf_tramp_image *tr)
 
 bpf_trampoline_enter_t bpf_trampoline_enter(const struct bpf_prog *prog)
 {
-	bool sleepable = prog->aux->sleepable;
+	bool sleepable = prog->sleepable;
 
 	if (bpf_prog_check_recur(prog))
 		return sleepable ? __bpf_prog_enter_sleepable_recur :
@@ -1029,7 +1029,7 @@  bpf_trampoline_enter_t bpf_trampoline_enter(const struct bpf_prog *prog)
 
 bpf_trampoline_exit_t bpf_trampoline_exit(const struct bpf_prog *prog)
 {
-	bool sleepable = prog->aux->sleepable;
+	bool sleepable = prog->sleepable;
 
 	if (bpf_prog_check_recur(prog))
 		return sleepable ? __bpf_prog_exit_sleepable_recur :
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index bf084c693507..7ceee73ff598 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -5273,7 +5273,7 @@  static int map_kptr_match_type(struct bpf_verifier_env *env,
 
 static bool in_sleepable(struct bpf_verifier_env *env)
 {
-	return env->prog->aux->sleepable;
+	return env->prog->sleepable;
 }
 
 /* The non-sleepable programs and sleepable programs with explicit bpf_rcu_read_lock()
@@ -18090,7 +18090,7 @@  static int check_map_prog_compatibility(struct bpf_verifier_env *env,
 		return -EINVAL;
 	}
 
-	if (prog->aux->sleepable)
+	if (prog->sleepable)
 		switch (map->map_type) {
 		case BPF_MAP_TYPE_HASH:
 		case BPF_MAP_TYPE_LRU_HASH:
@@ -18277,7 +18277,7 @@  static int resolve_pseudo_ldimm64(struct bpf_verifier_env *env)
 				return -E2BIG;
 			}
 
-			if (env->prog->aux->sleepable)
+			if (env->prog->sleepable)
 				atomic64_inc(&map->sleepable_refcnt);
 			/* hold the map. If the program is rejected by verifier,
 			 * the map will be released by release_maps() or it
@@ -20833,7 +20833,7 @@  int bpf_check_attach_target(struct bpf_verifier_log *log,
 			}
 		}
 
-		if (prog->aux->sleepable) {
+		if (prog->sleepable) {
 			ret = -EINVAL;
 			switch (prog->type) {
 			case BPF_PROG_TYPE_TRACING:
@@ -20944,14 +20944,14 @@  static int check_attach_btf_id(struct bpf_verifier_env *env)
 	u64 key;
 
 	if (prog->type == BPF_PROG_TYPE_SYSCALL) {
-		if (prog->aux->sleepable)
+		if (prog->sleepable)
 			/* attach_btf_id checked to be zero already */
 			return 0;
 		verbose(env, "Syscall programs can only be sleepable\n");
 		return -EINVAL;
 	}
 
-	if (prog->aux->sleepable && !can_be_sleepable(prog)) {
+	if (prog->sleepable && !can_be_sleepable(prog)) {
 		verbose(env, "Only fentry/fexit/fmod_ret, lsm, iter, uprobe, and struct_ops programs can be sleepable\n");
 		return -EINVAL;
 	}
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 5ecfa57e3b97..724e6d7e128f 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -10553,7 +10553,7 @@  int perf_event_set_bpf_prog(struct perf_event *event, struct bpf_prog *prog,
 	    (is_syscall_tp && prog->type != BPF_PROG_TYPE_TRACEPOINT))
 		return -EINVAL;
 
-	if (prog->type == BPF_PROG_TYPE_KPROBE && prog->aux->sleepable && !is_uprobe)
+	if (prog->type == BPF_PROG_TYPE_KPROBE && prog->sleepable && !is_uprobe)
 		/* only uprobe programs are allowed to be sleepable */
 		return -EINVAL;
 
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 241ddf5e3895..0a5c4efc73c3 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -3256,7 +3256,7 @@  static int uprobe_prog_run(struct bpf_uprobe *uprobe,
 		.uprobe = uprobe,
 	};
 	struct bpf_prog *prog = link->link.prog;
-	bool sleepable = prog->aux->sleepable;
+	bool sleepable = prog->sleepable;
 	struct bpf_run_ctx *old_run_ctx;
 	int err = 0;
 
diff --git a/net/bpf/bpf_dummy_struct_ops.c b/net/bpf/bpf_dummy_struct_ops.c
index 1b5f812e6972..de33dc1b0daa 100644
--- a/net/bpf/bpf_dummy_struct_ops.c
+++ b/net/bpf/bpf_dummy_struct_ops.c
@@ -174,7 +174,7 @@  static int bpf_dummy_ops_check_member(const struct btf_type *t,
 	case offsetof(struct bpf_dummy_ops, test_sleepable):
 		break;
 	default:
-		if (prog->aux->sleepable)
+		if (prog->sleepable)
 			return -EINVAL;
 	}