Message ID | 4ed563d21d1e34508a347a5b91a8089cbbae47c2.1727914243.git.dxu@dxuuu.xyz (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | Support eliding map lookup nullness | expand |
On Wed, Oct 2, 2024 at 5:12 PM Daniel Xu <dxu@dxuuu.xyz> wrote: > > This commit allows progs to elide a null check on statically known map > lookup keys. In other words, if the verifier can statically prove that > the lookup will be in-bounds, allow the prog to drop the null check. > > This is useful for two reasons: > > 1. Large numbers of nullness checks (especially when they cannot fail) > unnecessarily pushes prog towards BPF_COMPLEXITY_LIMIT_JMP_SEQ. > 2. It forms a tighter contract between programmer and verifier. > > For (1), bpftrace is starting to make heavier use of percpu scratch > maps. As a result, for user scripts with large number of unrolled loops, > we are starting to hit jump complexity verification errors. These > percpu lookups cannot fail anyways, as we only use static key values. > Eliding nullness probably results in less work for verifier as well. > > For (2), percpu scratch maps are often used as a larger stack, as the > currrent stack is limited to 512 bytes. In these situations, it is > desirable for the programmer to express: "this lookup should never fail, > and if it does, it means I messed up the code". By omitting the null > check, the programmer can "ask" the verifier to double check the logic. > > Tests also have to be updated in sync with these changes, as the > verifier is more efficient with this change. Notable, iters.c tests had > to be changed to use a map type that still requires null checks, as it's > exercising verifier tracking logic w.r.t iterators. > > Acked-by: Eduard Zingerman <eddyz87@gmail.com> > Signed-off-by: Daniel Xu <dxu@dxuuu.xyz> > --- > kernel/bpf/verifier.c | 73 ++++++++++++++++++- > tools/testing/selftests/bpf/progs/iters.c | 14 ++-- > .../selftests/bpf/progs/map_kptr_fail.c | 2 +- > .../selftests/bpf/progs/verifier_map_in_map.c | 2 +- > .../testing/selftests/bpf/verifier/map_kptr.c | 2 +- > 5 files changed, 82 insertions(+), 11 deletions(-) > > diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c > index 7d9b38ffd220..9ee2cd6c0508 100644 > --- a/kernel/bpf/verifier.c > +++ b/kernel/bpf/verifier.c > @@ -284,6 +284,7 @@ struct bpf_call_arg_meta { > u32 ret_btf_id; > u32 subprogno; > struct btf_field *kptr_field; > + long const_map_key; key might collide with special -1 on 32-bit archs ? s64 instead ? > }; > > struct bpf_kfunc_call_arg_meta { > @@ -10416,6 +10417,60 @@ static void update_loop_inline_state(struct bpf_verifier_env *env, u32 subprogno > state->callback_subprogno == subprogno); > } > > +/* Returns whether or not the given map type can potentially elide > + * lookup return value nullness check. This is possible if the key > + * is statically known. > + */ > +static bool can_elide_value_nullness(enum bpf_map_type type) > +{ > + switch (type) { > + case BPF_MAP_TYPE_ARRAY: > + case BPF_MAP_TYPE_PERCPU_ARRAY: > + return true; > + default: > + return false; > + } > +} > + > +/* Returns constant key value if possible, else -1 */ > +static long get_constant_map_key(struct bpf_verifier_env *env, > + struct bpf_reg_state *key) > +{ > + struct bpf_func_state *state = func(env, key); > + struct bpf_reg_state *reg; > + int stack_off; > + int slot; > + int spi; > + > + if (!env->bpf_capable) > + return -1; > + if (key->type != PTR_TO_STACK) > + return -1; > + if (!tnum_is_const(key->var_off)) > + return -1; > + > + stack_off = key->off + key->var_off.value; > + slot = -stack_off - 1; > + if (slot < 0) > + /* Stack grew upwards. This is properly checked > + * as part of helper argument processing, but since > + * this runs before those checks, we need to preemptively > + * do this here. > + */ > + return -1; > + else if (slot >= state->allocated_stack) > + /* Stack uninitialized */ > + return -1; > + > + spi = slot / BPF_REG_SIZE; > + reg = &state->stack[spi].spilled_ptr; reg->type == SCALAR check is missing. slot->type should also be checked. spiller_ptr is only correct for STACK_SPILL. it should also recognize that STACK_ZERO means zero. > + if (!tnum_is_const(reg->var_off)) > + /* Stack value not statically known */ > + return -1; I suspect tnum_is_const could pass for pointers and other !scalar. > + > + return reg->var_off.value; and this will be zero by accident. Bits of check_stack_read_fixed_off() should probably be factored out and used here. > +} > + > static int get_helper_proto(struct bpf_verifier_env *env, int func_id, > const struct bpf_func_proto **ptr) > { > @@ -10513,6 +10568,15 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn > env->insn_aux_data[insn_idx].storage_get_func_atomic = true; > } > > + /* Logically we are trying to check on key register state before > + * the helper is called, so process here. Otherwise argument processing > + * may clobber the spilled key values. > + */ > + regs = cur_regs(env); > + if (func_id == BPF_FUNC_map_lookup_elem) > + meta.const_map_key = get_constant_map_key(env, ®s[BPF_REG_2]); > + > + I think the logic is too specific. Let's make it more generic. It probably better to do later in: case ARG_PTR_TO_MAP_KEY: after check_helper_mem_access() returns 0 and store into meta.const_map_key. > meta.func_id = func_id; > /* check args */ > for (i = 0; i < MAX_BPF_FUNC_REG_ARGS; i++) { > @@ -10773,10 +10837,17 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn > "kernel subsystem misconfigured verifier\n"); > return -EINVAL; > } > + > + if (func_id == BPF_FUNC_map_lookup_elem && and only here check for func_id. Saving meta.const_map_key can be unconditional for all ARG_PTR_TO_MAP_KEY. We can use it in a follow up to optimize lookup/update from arrays. Currently array_map_gen_lookup() still emits a load from key and bounds check. All that can be optimized with const_map_key. > + can_elide_value_nullness(meta.map_ptr->map_type) && > + meta.const_map_key >= 0 && > + meta.const_map_key < meta.map_ptr->max_entries) > + ret_flag &= ~PTR_MAYBE_NULL; > + > regs[BPF_REG_0].map_ptr = meta.map_ptr; > regs[BPF_REG_0].map_uid = meta.map_uid; > regs[BPF_REG_0].type = PTR_TO_MAP_VALUE | ret_flag; > - if (!type_may_be_null(ret_type) && > + if (!type_may_be_null(regs[BPF_REG_0].type) && I would use ret_flag here instead of R0.type. pw-bot: cr
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 7d9b38ffd220..9ee2cd6c0508 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -284,6 +284,7 @@ struct bpf_call_arg_meta { u32 ret_btf_id; u32 subprogno; struct btf_field *kptr_field; + long const_map_key; }; struct bpf_kfunc_call_arg_meta { @@ -10416,6 +10417,60 @@ static void update_loop_inline_state(struct bpf_verifier_env *env, u32 subprogno state->callback_subprogno == subprogno); } +/* Returns whether or not the given map type can potentially elide + * lookup return value nullness check. This is possible if the key + * is statically known. + */ +static bool can_elide_value_nullness(enum bpf_map_type type) +{ + switch (type) { + case BPF_MAP_TYPE_ARRAY: + case BPF_MAP_TYPE_PERCPU_ARRAY: + return true; + default: + return false; + } +} + +/* Returns constant key value if possible, else -1 */ +static long get_constant_map_key(struct bpf_verifier_env *env, + struct bpf_reg_state *key) +{ + struct bpf_func_state *state = func(env, key); + struct bpf_reg_state *reg; + int stack_off; + int slot; + int spi; + + if (!env->bpf_capable) + return -1; + if (key->type != PTR_TO_STACK) + return -1; + if (!tnum_is_const(key->var_off)) + return -1; + + stack_off = key->off + key->var_off.value; + slot = -stack_off - 1; + if (slot < 0) + /* Stack grew upwards. This is properly checked + * as part of helper argument processing, but since + * this runs before those checks, we need to preemptively + * do this here. + */ + return -1; + else if (slot >= state->allocated_stack) + /* Stack uninitialized */ + return -1; + + spi = slot / BPF_REG_SIZE; + reg = &state->stack[spi].spilled_ptr; + if (!tnum_is_const(reg->var_off)) + /* Stack value not statically known */ + return -1; + + return reg->var_off.value; +} + static int get_helper_proto(struct bpf_verifier_env *env, int func_id, const struct bpf_func_proto **ptr) { @@ -10513,6 +10568,15 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn env->insn_aux_data[insn_idx].storage_get_func_atomic = true; } + /* Logically we are trying to check on key register state before + * the helper is called, so process here. Otherwise argument processing + * may clobber the spilled key values. + */ + regs = cur_regs(env); + if (func_id == BPF_FUNC_map_lookup_elem) + meta.const_map_key = get_constant_map_key(env, ®s[BPF_REG_2]); + + meta.func_id = func_id; /* check args */ for (i = 0; i < MAX_BPF_FUNC_REG_ARGS; i++) { @@ -10773,10 +10837,17 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn "kernel subsystem misconfigured verifier\n"); return -EINVAL; } + + if (func_id == BPF_FUNC_map_lookup_elem && + can_elide_value_nullness(meta.map_ptr->map_type) && + meta.const_map_key >= 0 && + meta.const_map_key < meta.map_ptr->max_entries) + ret_flag &= ~PTR_MAYBE_NULL; + regs[BPF_REG_0].map_ptr = meta.map_ptr; regs[BPF_REG_0].map_uid = meta.map_uid; regs[BPF_REG_0].type = PTR_TO_MAP_VALUE | ret_flag; - if (!type_may_be_null(ret_type) && + if (!type_may_be_null(regs[BPF_REG_0].type) && btf_record_has_field(meta.map_ptr->record, BPF_SPIN_LOCK)) { regs[BPF_REG_0].id = ++env->id_gen; } diff --git a/tools/testing/selftests/bpf/progs/iters.c b/tools/testing/selftests/bpf/progs/iters.c index ef70b88bccb2..24e6cd946396 100644 --- a/tools/testing/selftests/bpf/progs/iters.c +++ b/tools/testing/selftests/bpf/progs/iters.c @@ -524,11 +524,11 @@ int iter_subprog_iters(const void *ctx) } struct { - __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(type, BPF_MAP_TYPE_HASH); __type(key, int); __type(value, int); __uint(max_entries, 1000); -} arr_map SEC(".maps"); +} hash_map SEC(".maps"); SEC("?raw_tp") __failure __msg("invalid mem access 'scalar'") @@ -539,7 +539,7 @@ int iter_err_too_permissive1(const void *ctx) MY_PID_GUARD(); - map_val = bpf_map_lookup_elem(&arr_map, &key); + map_val = bpf_map_lookup_elem(&hash_map, &key); if (!map_val) return 0; @@ -561,12 +561,12 @@ int iter_err_too_permissive2(const void *ctx) MY_PID_GUARD(); - map_val = bpf_map_lookup_elem(&arr_map, &key); + map_val = bpf_map_lookup_elem(&hash_map, &key); if (!map_val) return 0; bpf_repeat(1000000) { - map_val = bpf_map_lookup_elem(&arr_map, &key); + map_val = bpf_map_lookup_elem(&hash_map, &key); } *map_val = 123; @@ -585,7 +585,7 @@ int iter_err_too_permissive3(const void *ctx) MY_PID_GUARD(); bpf_repeat(1000000) { - map_val = bpf_map_lookup_elem(&arr_map, &key); + map_val = bpf_map_lookup_elem(&hash_map, &key); found = true; } @@ -606,7 +606,7 @@ int iter_tricky_but_fine(const void *ctx) MY_PID_GUARD(); bpf_repeat(1000000) { - map_val = bpf_map_lookup_elem(&arr_map, &key); + map_val = bpf_map_lookup_elem(&hash_map, &key); if (map_val) { found = true; break; diff --git a/tools/testing/selftests/bpf/progs/map_kptr_fail.c b/tools/testing/selftests/bpf/progs/map_kptr_fail.c index 450bb373b179..c4a81d1c1354 100644 --- a/tools/testing/selftests/bpf/progs/map_kptr_fail.c +++ b/tools/testing/selftests/bpf/progs/map_kptr_fail.c @@ -345,7 +345,7 @@ int reject_indirect_global_func_access(struct __sk_buff *ctx) } SEC("?tc") -__failure __msg("Unreleased reference id=5 alloc_insn=") +__failure __msg("Unreleased reference id=4 alloc_insn=") int kptr_xchg_ref_state(struct __sk_buff *ctx) { struct prog_test_ref_kfunc *p; diff --git a/tools/testing/selftests/bpf/progs/verifier_map_in_map.c b/tools/testing/selftests/bpf/progs/verifier_map_in_map.c index 4eaab1468eb7..7d088ba99ea5 100644 --- a/tools/testing/selftests/bpf/progs/verifier_map_in_map.c +++ b/tools/testing/selftests/bpf/progs/verifier_map_in_map.c @@ -47,7 +47,7 @@ l0_%=: r0 = 0; \ SEC("xdp") __description("map in map state pruning") -__success __msg("processed 26 insns") +__success __msg("processed 15 insns") __log_level(2) __retval(0) __flag(BPF_F_TEST_STATE_FREQ) __naked void map_in_map_state_pruning(void) { diff --git a/tools/testing/selftests/bpf/verifier/map_kptr.c b/tools/testing/selftests/bpf/verifier/map_kptr.c index f420c0312aa0..4b39f8472f9b 100644 --- a/tools/testing/selftests/bpf/verifier/map_kptr.c +++ b/tools/testing/selftests/bpf/verifier/map_kptr.c @@ -373,7 +373,7 @@ .prog_type = BPF_PROG_TYPE_SCHED_CLS, .fixup_map_kptr = { 1 }, .result = REJECT, - .errstr = "Unreleased reference id=5 alloc_insn=20", + .errstr = "Unreleased reference id=4 alloc_insn=20", .fixup_kfunc_btf_id = { { "bpf_kfunc_call_test_acquire", 15 }, }