@@ -1902,6 +1902,14 @@ __bpf_kfunc void *bpf_obj_new_impl(u64 local_type_id__k, void *meta__ign)
return p;
}
+__bpf_kfunc void *bpf_percpu_obj_new_impl(u64 local_type_id__k, void *meta__ign)
+{
+ u64 size = local_type_id__k;
+
+ /* The verifier has ensured that meta__ign must be NULL */
+ return bpf_mem_alloc(&bpf_global_percpu_ma, size);
+}
+
/* Must be called under migrate_disable(), as required by bpf_mem_free */
void __bpf_obj_drop_impl(void *p, const struct btf_record *rec)
{
@@ -1930,6 +1938,12 @@ __bpf_kfunc void bpf_obj_drop_impl(void *p__alloc, void *meta__ign)
__bpf_obj_drop_impl(p, meta ? meta->record : NULL);
}
+__bpf_kfunc void bpf_percpu_obj_drop_impl(void *p__alloc, void *meta__ign)
+{
+ /* The verifier has ensured that meta__ign must be NULL */
+ bpf_mem_free_rcu(&bpf_global_percpu_ma, p__alloc);
+}
+
__bpf_kfunc void *bpf_refcount_acquire_impl(void *p__refcounted_kptr, void *meta__ign)
{
struct btf_struct_meta *meta = meta__ign;
@@ -2442,7 +2456,9 @@ BTF_SET8_START(generic_btf_ids)
BTF_ID_FLAGS(func, crash_kexec, KF_DESTRUCTIVE)
#endif
BTF_ID_FLAGS(func, bpf_obj_new_impl, KF_ACQUIRE | KF_RET_NULL)
+BTF_ID_FLAGS(func, bpf_percpu_obj_new_impl, KF_ACQUIRE | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_obj_drop_impl, KF_RELEASE)
+BTF_ID_FLAGS(func, bpf_percpu_obj_drop_impl, KF_RELEASE)
BTF_ID_FLAGS(func, bpf_refcount_acquire_impl, KF_ACQUIRE | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_list_push_front_impl)
BTF_ID_FLAGS(func, bpf_list_push_back_impl)
@@ -304,7 +304,7 @@ struct bpf_kfunc_call_arg_meta {
/* arg_{btf,btf_id,owning_ref} are used by kfunc-specific handling,
* generally to pass info about user-defined local kptr types to later
* verification logic
- * bpf_obj_drop
+ * bpf_obj_drop/bpf_percpu_obj_drop
* Record the local kptr type to be drop'd
* bpf_refcount_acquire (via KF_ARG_PTR_TO_REFCOUNTED_KPTR arg type)
* Record the local kptr type to be refcount_incr'd and use
@@ -5001,6 +5001,8 @@ static int map_kptr_match_type(struct bpf_verifier_env *env,
perm_flags |= PTR_UNTRUSTED;
} else {
perm_flags = PTR_MAYBE_NULL | MEM_ALLOC;
+ if (kptr_field->type == BPF_KPTR_PERCPU)
+ perm_flags |= MEM_PERCPU;
}
if (base_type(reg->type) != PTR_TO_BTF_ID || (type_flag(reg->type) & ~perm_flags))
@@ -5044,7 +5046,7 @@ static int map_kptr_match_type(struct bpf_verifier_env *env,
*/
if (!btf_struct_ids_match(&env->log, reg->btf, reg->btf_id, reg->off,
kptr_field->kptr.btf, kptr_field->kptr.btf_id,
- kptr_field->type == BPF_KPTR_REF))
+ kptr_field->type != BPF_KPTR_UNREF))
goto bad_type;
return 0;
bad_type:
@@ -5088,7 +5090,18 @@ static bool rcu_safe_kptr(const struct btf_field *field)
{
const struct btf_field_kptr *kptr = &field->kptr;
- return field->type == BPF_KPTR_REF && rcu_protected_object(kptr->btf, kptr->btf_id);
+ return field->type == BPF_KPTR_PERCPU ||
+ (field->type == BPF_KPTR_REF && rcu_protected_object(kptr->btf, kptr->btf_id));
+}
+
+static u32 btf_ld_kptr_type(struct bpf_verifier_env *env, struct btf_field *kptr_field)
+{
+ if (rcu_safe_kptr(kptr_field) && in_rcu_cs(env)) {
+ if (kptr_field->type != BPF_KPTR_PERCPU)
+ return PTR_MAYBE_NULL | MEM_RCU;
+ return PTR_MAYBE_NULL | MEM_RCU | MEM_PERCPU;
+ }
+ return PTR_MAYBE_NULL | PTR_UNTRUSTED;
}
static int check_map_kptr_access(struct bpf_verifier_env *env, u32 regno,
@@ -5114,7 +5127,8 @@ static int check_map_kptr_access(struct bpf_verifier_env *env, u32 regno,
/* We only allow loading referenced kptr, since it will be marked as
* untrusted, similar to unreferenced kptr.
*/
- if (class != BPF_LDX && kptr_field->type == BPF_KPTR_REF) {
+ if (class != BPF_LDX &&
+ (kptr_field->type == BPF_KPTR_REF || kptr_field->type == BPF_KPTR_PERCPU)) {
verbose(env, "store to referenced kptr disallowed\n");
return -EACCES;
}
@@ -5125,10 +5139,7 @@ static int check_map_kptr_access(struct bpf_verifier_env *env, u32 regno,
* value from map as PTR_TO_BTF_ID, with the correct type.
*/
mark_btf_ld_reg(env, cur_regs(env), value_regno, PTR_TO_BTF_ID, kptr_field->kptr.btf,
- kptr_field->kptr.btf_id,
- rcu_safe_kptr(kptr_field) && in_rcu_cs(env) ?
- PTR_MAYBE_NULL | MEM_RCU :
- PTR_MAYBE_NULL | PTR_UNTRUSTED);
+ kptr_field->kptr.btf_id, btf_ld_kptr_type(env, kptr_field));
/* For mark_ptr_or_null_reg */
val_reg->id = ++env->id_gen;
} else if (class == BPF_STX) {
@@ -5182,6 +5193,7 @@ static int check_map_access(struct bpf_verifier_env *env, u32 regno,
switch (field->type) {
case BPF_KPTR_UNREF:
case BPF_KPTR_REF:
+ case BPF_KPTR_PERCPU:
if (src != ACCESS_DIRECT) {
verbose(env, "kptr cannot be accessed indirectly by helper\n");
return -EACCES;
@@ -7320,7 +7332,7 @@ static int process_kptr_func(struct bpf_verifier_env *env, int regno,
verbose(env, "off=%d doesn't point to kptr\n", kptr_off);
return -EACCES;
}
- if (kptr_field->type != BPF_KPTR_REF) {
+ if (kptr_field->type != BPF_KPTR_REF && kptr_field->type != BPF_KPTR_PERCPU) {
verbose(env, "off=%d kptr isn't referenced kptr\n", kptr_off);
return -EACCES;
}
@@ -7831,8 +7843,10 @@ static int check_reg_type(struct bpf_verifier_env *env, u32 regno,
if (base_type(arg_type) == ARG_PTR_TO_MEM)
type &= ~DYNPTR_TYPE_FLAG_MASK;
- if (meta->func_id == BPF_FUNC_kptr_xchg && type_is_alloc(type))
+ if (meta->func_id == BPF_FUNC_kptr_xchg && type_is_alloc(type)) {
type &= ~MEM_ALLOC;
+ type &= ~MEM_PERCPU;
+ }
for (i = 0; i < ARRAY_SIZE(compatible->types); i++) {
expected = compatible->types[i];
@@ -7915,6 +7929,7 @@ static int check_reg_type(struct bpf_verifier_env *env, u32 regno,
break;
}
case PTR_TO_BTF_ID | MEM_ALLOC:
+ case PTR_TO_BTF_ID | MEM_PERCPU | MEM_ALLOC:
if (meta->func_id != BPF_FUNC_spin_lock && meta->func_id != BPF_FUNC_spin_unlock &&
meta->func_id != BPF_FUNC_kptr_xchg) {
verbose(env, "verifier internal error: unimplemented handling of MEM_ALLOC\n");
@@ -9882,8 +9897,11 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
if (func_id == BPF_FUNC_kptr_xchg) {
ret_btf = meta.kptr_field->kptr.btf;
ret_btf_id = meta.kptr_field->kptr.btf_id;
- if (!btf_is_kernel(ret_btf))
+ if (!btf_is_kernel(ret_btf)) {
regs[BPF_REG_0].type |= MEM_ALLOC;
+ if (meta.kptr_field->type == BPF_KPTR_PERCPU)
+ regs[BPF_REG_0].type |= MEM_PERCPU;
+ }
} else {
if (fn->ret_btf_id == BPF_PTR_POISON) {
verbose(env, "verifier internal error:");
@@ -10268,6 +10286,8 @@ enum special_kfunc_type {
KF_bpf_dynptr_slice,
KF_bpf_dynptr_slice_rdwr,
KF_bpf_dynptr_clone,
+ KF_bpf_percpu_obj_new_impl,
+ KF_bpf_percpu_obj_drop_impl,
};
BTF_SET_START(special_kfunc_set)
@@ -10288,6 +10308,8 @@ BTF_ID(func, bpf_dynptr_from_xdp)
BTF_ID(func, bpf_dynptr_slice)
BTF_ID(func, bpf_dynptr_slice_rdwr)
BTF_ID(func, bpf_dynptr_clone)
+BTF_ID(func, bpf_percpu_obj_new_impl)
+BTF_ID(func, bpf_percpu_obj_drop_impl)
BTF_SET_END(special_kfunc_set)
BTF_ID_LIST(special_kfunc_list)
@@ -10310,6 +10332,8 @@ BTF_ID(func, bpf_dynptr_from_xdp)
BTF_ID(func, bpf_dynptr_slice)
BTF_ID(func, bpf_dynptr_slice_rdwr)
BTF_ID(func, bpf_dynptr_clone)
+BTF_ID(func, bpf_percpu_obj_new_impl)
+BTF_ID(func, bpf_percpu_obj_drop_impl)
static bool is_kfunc_ret_null(struct bpf_kfunc_call_arg_meta *meta)
{
@@ -11004,7 +11028,17 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
}
break;
case KF_ARG_PTR_TO_ALLOC_BTF_ID:
- if (reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) {
+ if (reg->type == (PTR_TO_BTF_ID | MEM_ALLOC)) {
+ if (meta->func_id != special_kfunc_list[KF_bpf_obj_drop_impl]) {
+ verbose(env, "arg#%d expected for bpf_obj_drop_impl()\n", i);
+ return -EINVAL;
+ }
+ } else if (reg->type == (PTR_TO_BTF_ID | MEM_ALLOC | MEM_PERCPU)) {
+ if (meta->func_id != special_kfunc_list[KF_bpf_percpu_obj_drop_impl]) {
+ verbose(env, "arg#%d expected for bpf_percpu_obj_drop_impl()\n", i);
+ return -EINVAL;
+ }
+ } else {
verbose(env, "arg#%d expected pointer to allocated object\n", i);
return -EINVAL;
}
@@ -11012,8 +11046,7 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
verbose(env, "allocated object must be referenced\n");
return -EINVAL;
}
- if (meta->btf == btf_vmlinux &&
- meta->func_id == special_kfunc_list[KF_bpf_obj_drop_impl]) {
+ if (meta->btf == btf_vmlinux) {
meta->arg_btf = reg->btf;
meta->arg_btf_id = reg->btf_id;
}
@@ -11413,6 +11446,7 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
/* Only exception is bpf_obj_new_impl */
if (meta.btf != btf_vmlinux ||
(meta.func_id != special_kfunc_list[KF_bpf_obj_new_impl] &&
+ meta.func_id != special_kfunc_list[KF_bpf_percpu_obj_new_impl] &&
meta.func_id != special_kfunc_list[KF_bpf_refcount_acquire_impl])) {
verbose(env, "acquire kernel function does not return PTR_TO_BTF_ID\n");
return -EINVAL;
@@ -11426,11 +11460,16 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
ptr_type = btf_type_skip_modifiers(desc_btf, t->type, &ptr_type_id);
if (meta.btf == btf_vmlinux && btf_id_set_contains(&special_kfunc_set, meta.func_id)) {
- if (meta.func_id == special_kfunc_list[KF_bpf_obj_new_impl]) {
+ if (meta.func_id == special_kfunc_list[KF_bpf_obj_new_impl] ||
+ meta.func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl]) {
+ struct btf_struct_meta *struct_meta;
struct btf *ret_btf;
u32 ret_btf_id;
- if (unlikely(!bpf_global_ma_set))
+ if (meta.func_id == special_kfunc_list[KF_bpf_obj_new_impl] && !bpf_global_ma_set)
+ return -ENOMEM;
+
+ if (meta.func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl] && !bpf_global_percpu_ma_set)
return -ENOMEM;
if (((u64)(u32)meta.arg_constant.value) != meta.arg_constant.value) {
@@ -11443,24 +11482,38 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
/* This may be NULL due to user not supplying a BTF */
if (!ret_btf) {
- verbose(env, "bpf_obj_new requires prog BTF\n");
+ verbose(env, "bpf_obj_new/bpf_percpu_obj_new requires prog BTF\n");
return -EINVAL;
}
ret_t = btf_type_by_id(ret_btf, ret_btf_id);
if (!ret_t || !__btf_type_is_struct(ret_t)) {
- verbose(env, "bpf_obj_new type ID argument must be of a struct\n");
+ verbose(env, "bpf_obj_new/bpf_percpu_obj_new type ID argument must be of a struct\n");
return -EINVAL;
}
+ struct_meta = btf_find_struct_meta(ret_btf, ret_btf_id);
+ if (meta.func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl]) {
+ if (!__btf_type_is_scalar_struct(env, ret_btf, ret_t, 0)) {
+ verbose(env, "bpf_percpu_obj_new type ID argument must be of a struct of scalars\n");
+ return -EINVAL;
+ }
+
+ if (struct_meta) {
+ verbose(env, "bpf_percpu_obj_new type ID argument must not contain special fields\n");
+ return -EINVAL;
+ }
+ }
+
mark_reg_known_zero(env, regs, BPF_REG_0);
regs[BPF_REG_0].type = PTR_TO_BTF_ID | MEM_ALLOC;
regs[BPF_REG_0].btf = ret_btf;
regs[BPF_REG_0].btf_id = ret_btf_id;
+ if (meta.func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl])
+ regs[BPF_REG_0].type |= MEM_PERCPU;
insn_aux->obj_new_size = ret_t->size;
- insn_aux->kptr_struct_meta =
- btf_find_struct_meta(ret_btf, ret_btf_id);
+ insn_aux->kptr_struct_meta = struct_meta;
} else if (meta.func_id == special_kfunc_list[KF_bpf_refcount_acquire_impl]) {
mark_reg_known_zero(env, regs, BPF_REG_0);
regs[BPF_REG_0].type = PTR_TO_BTF_ID | MEM_ALLOC;
@@ -11597,7 +11650,8 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
regs[BPF_REG_0].id = ++env->id_gen;
} else if (btf_type_is_void(t)) {
if (meta.btf == btf_vmlinux && btf_id_set_contains(&special_kfunc_set, meta.func_id)) {
- if (meta.func_id == special_kfunc_list[KF_bpf_obj_drop_impl]) {
+ if (meta.func_id == special_kfunc_list[KF_bpf_obj_drop_impl] ||
+ meta.func_id == special_kfunc_list[KF_bpf_percpu_obj_drop_impl]) {
insn_aux->kptr_struct_meta =
btf_find_struct_meta(meta.arg_btf,
meta.arg_btf_id);
@@ -18266,21 +18320,35 @@ static int fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
insn->imm = BPF_CALL_IMM(desc->addr);
if (insn->off)
return 0;
- if (desc->func_id == special_kfunc_list[KF_bpf_obj_new_impl]) {
+ if (desc->func_id == special_kfunc_list[KF_bpf_obj_new_impl] ||
+ desc->func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl]) {
struct btf_struct_meta *kptr_struct_meta = env->insn_aux_data[insn_idx].kptr_struct_meta;
struct bpf_insn addr[2] = { BPF_LD_IMM64(BPF_REG_2, (long)kptr_struct_meta) };
u64 obj_new_size = env->insn_aux_data[insn_idx].obj_new_size;
+ if (desc->func_id == special_kfunc_list[KF_bpf_percpu_obj_new_impl] && kptr_struct_meta) {
+ verbose(env, "verifier internal error: NULL kptr_struct_meta expected at insn_idx %d\n",
+ insn_idx);
+ return -EFAULT;
+ }
+
insn_buf[0] = BPF_MOV64_IMM(BPF_REG_1, obj_new_size);
insn_buf[1] = addr[0];
insn_buf[2] = addr[1];
insn_buf[3] = *insn;
*cnt = 4;
} else if (desc->func_id == special_kfunc_list[KF_bpf_obj_drop_impl] ||
+ desc->func_id == special_kfunc_list[KF_bpf_percpu_obj_drop_impl] ||
desc->func_id == special_kfunc_list[KF_bpf_refcount_acquire_impl]) {
struct btf_struct_meta *kptr_struct_meta = env->insn_aux_data[insn_idx].kptr_struct_meta;
struct bpf_insn addr[2] = { BPF_LD_IMM64(BPF_REG_2, (long)kptr_struct_meta) };
+ if (desc->func_id == special_kfunc_list[KF_bpf_percpu_obj_drop_impl] && kptr_struct_meta) {
+ verbose(env, "verifier internal error: NULL kptr_struct_meta expected at insn_idx %d\n",
+ insn_idx);
+ return -EFAULT;
+ }
+
if (desc->func_id == special_kfunc_list[KF_bpf_refcount_acquire_impl] &&
!kptr_struct_meta) {
verbose(env, "verifier internal error: kptr_struct_meta expected at insn_idx %d\n",
Add two new kfunc's, bpf_percpu_obj_new_impl() and bpf_percpu_obj_drop_impl(), to allocate a percpu obj. Two functions are very similar to bpf_obj_new_impl() and bpf_obj_drop_impl(). The major difference is related to percpu handling. bpf_rcu_read_lock() struct val_t __percpu_kptr *v = map_val->percpu_data; ... bpf_rcu_read_unlock() For a percpu data map_val like above 'v', the reg->type is set as PTR_TO_BTF_ID | MEM_PERCPU | MEM_RCU if inside rcu critical section. MEM_RCU marking here is similar to NON_OWN_REF as 'v' is not a owning reference. But NON_OWN_REF is trusted and typically inside the spinlock while MEM_RCU is under rcu read lock. RCU is preferred here since percpu data structures mean potential concurrent access into its contents. Also, bpf_percpu_obj_new_impl() is restricted such that no pointers or special fields are allowed. Therefore, the bpf_list_head and bpf_rb_root will not be supported in this patch set to avoid potential memory leak issue due to racing between bpf_obj_free_fields() and another bpf_kptr_xchg() moving an allocated object to bpf_list_head and bpf_rb_root. Signed-off-by: Yonghong Song <yonghong.song@linux.dev> --- kernel/bpf/helpers.c | 16 ++++++ kernel/bpf/verifier.c | 112 +++++++++++++++++++++++++++++++++--------- 2 files changed, 106 insertions(+), 22 deletions(-)