Message ID | 20230912070149.969939-3-zhouchuyi@bytedance.com (mailing list archive) |
---|---|
State | Changes Requested |
Delegated to: | BPF |
Headers | show |
Series | Add Open-coded process and css iters | expand |
On Tue, Sep 12, 2023 at 03:01:45PM +0800, Chuyi Zhou wrote: > This patch adds kfuncs bpf_iter_css_task_{new,next,destroy} which allow > creation and manipulation of struct bpf_iter_css_task in open-coded > iterator style. These kfuncs actually wrapps css_task_iter_{start,next, > end}. BPF programs can use these kfuncs through bpf_for_each macro for > iteration of all tasks under a css. > > css_task_iter_*() would try to get the global spin-lock *css_set_lock*, so > the bpf side has to be careful in where it allows to use this iter. > Currently we only allow it in bpf_lsm and bpf iter-s. > > Signed-off-by: Chuyi Zhou <zhouchuyi@bytedance.com> > --- > include/uapi/linux/bpf.h | 4 +++ > kernel/bpf/helpers.c | 3 +++ > kernel/bpf/task_iter.c | 48 ++++++++++++++++++++++++++++++++++ > kernel/bpf/verifier.c | 23 ++++++++++++++++ > tools/include/uapi/linux/bpf.h | 4 +++ > tools/lib/bpf/bpf_helpers.h | 7 +++++ > 6 files changed, 89 insertions(+) > > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h > index 73b155e52204..de02c0971428 100644 > --- a/include/uapi/linux/bpf.h > +++ b/include/uapi/linux/bpf.h > @@ -7318,4 +7318,8 @@ struct bpf_iter_num { > __u64 __opaque[1]; > } __attribute__((aligned(8))); > > +struct bpf_iter_css_task { > + __u64 __opaque[1]; > +} __attribute__((aligned(8))); > + > #endif /* _UAPI__LINUX_BPF_H__ */ > diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c > index b0a9834f1051..d6a16becfbb9 100644 > --- a/kernel/bpf/helpers.c > +++ b/kernel/bpf/helpers.c > @@ -2504,6 +2504,9 @@ BTF_ID_FLAGS(func, bpf_dynptr_slice_rdwr, KF_RET_NULL) > BTF_ID_FLAGS(func, bpf_iter_num_new, KF_ITER_NEW) > BTF_ID_FLAGS(func, bpf_iter_num_next, KF_ITER_NEXT | KF_RET_NULL) > BTF_ID_FLAGS(func, bpf_iter_num_destroy, KF_ITER_DESTROY) > +BTF_ID_FLAGS(func, bpf_iter_css_task_new, KF_ITER_NEW) Looking at selftest: struct cgroup_subsys_state *css = &cgrp->self; realized that we're missing KF_TRUSTED_ARGS here. > +BTF_ID_FLAGS(func, bpf_iter_css_task_next, KF_ITER_NEXT | KF_RET_NULL) > +BTF_ID_FLAGS(func, bpf_iter_css_task_destroy, KF_ITER_DESTROY) > BTF_ID_FLAGS(func, bpf_dynptr_adjust) > BTF_ID_FLAGS(func, bpf_dynptr_is_null) > BTF_ID_FLAGS(func, bpf_dynptr_is_rdonly) > diff --git a/kernel/bpf/task_iter.c b/kernel/bpf/task_iter.c > index 7473068ed313..d8539cc05ffd 100644 > --- a/kernel/bpf/task_iter.c > +++ b/kernel/bpf/task_iter.c > @@ -803,6 +803,54 @@ const struct bpf_func_proto bpf_find_vma_proto = { > .arg5_type = ARG_ANYTHING, > }; > > +struct bpf_iter_css_task_kern { > + struct css_task_iter *css_it; > +} __attribute__((aligned(8))); > + > +__bpf_kfunc int bpf_iter_css_task_new(struct bpf_iter_css_task *it, > + struct cgroup_subsys_state *css, unsigned int flags) the verifier does a type check, but it's not strong enough. We need KF_TRUSTED_ARGS to make sure the pointer is valid. The BTF_TYPE_SAFE_RCU(struct cgroup) { probably doesn't need to change, since '&cgrp->self' is not a pointer deref. The verifier should understand that cgroup_subsys_state is also PTR_TRUSTED just like 'cgrp' pointer. Also please add negative tests in patch 6. Like doing bpf_rcu_read_unlock() in the middle and check that the verifier catches such mistake.
On 9/12/23 12:01 AM, Chuyi Zhou wrote: > +__bpf_kfunc int bpf_iter_css_task_new(struct bpf_iter_css_task *it, > + struct cgroup_subsys_state *css, unsigned int flags) > +{ > + struct bpf_iter_css_task_kern *kit = (void *)it; > + > + BUILD_BUG_ON(sizeof(struct bpf_iter_css_task_kern) != sizeof(struct bpf_iter_css_task)); > + BUILD_BUG_ON(__alignof__(struct bpf_iter_css_task_kern) != > + __alignof__(struct bpf_iter_css_task)); > + > + switch (flags) { > + case CSS_TASK_ITER_PROCS | CSS_TASK_ITER_THREADED: > + case CSS_TASK_ITER_PROCS: > + case 0: > + break; > + default: > + return -EINVAL; > + } > + > + kit->css_it = kzalloc(sizeof(struct css_task_iter), GFP_KERNEL); > + if (!kit->css_it) > + return -ENOMEM; > + css_task_iter_start(css, flags, kit->css_it); > + return 0; > +} > + > +static bool check_css_task_iter_allowlist(struct bpf_verifier_env *env) > +{ > + enum bpf_prog_type prog_type = resolve_prog_type(env->prog); > + > + switch (prog_type) { > + case BPF_PROG_TYPE_LSM: This will allow the non-sleepable lsm prog to call bpf_iter_css_task_new. The above kzalloc(GFP_KERNEL) in bpf_iter_css_task_new should trigger a lockdep error in the lsm selftest in patch 6. > + return true; > + case BPF_TRACE_ITER: > + return env->prog->aux->sleepable; > + default: > + return false; > + } > +}
Hello Martin. 在 2023/9/13 03:57, Martin KaFai Lau 写道: > On 9/12/23 12:01 AM, Chuyi Zhou wrote: >> +__bpf_kfunc int bpf_iter_css_task_new(struct bpf_iter_css_task *it, >> + struct cgroup_subsys_state *css, unsigned int flags) >> +{ >> + struct bpf_iter_css_task_kern *kit = (void *)it; >> + >> + BUILD_BUG_ON(sizeof(struct bpf_iter_css_task_kern) != >> sizeof(struct bpf_iter_css_task)); >> + BUILD_BUG_ON(__alignof__(struct bpf_iter_css_task_kern) != >> + __alignof__(struct bpf_iter_css_task)); >> + >> + switch (flags) { >> + case CSS_TASK_ITER_PROCS | CSS_TASK_ITER_THREADED: >> + case CSS_TASK_ITER_PROCS: >> + case 0: >> + break; >> + default: >> + return -EINVAL; >> + } >> + >> + kit->css_it = kzalloc(sizeof(struct css_task_iter), GFP_KERNEL); >> + if (!kit->css_it) >> + return -ENOMEM; >> + css_task_iter_start(css, flags, kit->css_it); >> + return 0; >> +} >> + > >> +static bool check_css_task_iter_allowlist(struct bpf_verifier_env *env) >> +{ >> + enum bpf_prog_type prog_type = resolve_prog_type(env->prog); >> + >> + switch (prog_type) { >> + case BPF_PROG_TYPE_LSM: > > This will allow the non-sleepable lsm prog to call > bpf_iter_css_task_new. The above kzalloc(GFP_KERNEL) in > bpf_iter_css_task_new should trigger a lockdep error in the lsm selftest > in patch 6. Thanks for your test. I missed the dmesg error since I did not set CONFIG_LOCKDEP. I think here we can use kzalloc(GFP_ATOMIC) to eliminate this error if we want to use this iter in non-sleepable lsm prog. I just tested, it works well. > >> + return true; >> + case BPF_TRACE_ITER: >> + return env->prog->aux->sleepable; >> + default: >> + return false; >> + } >> +} >
Hello, Alexei. 在 2023/9/13 01:13, Alexei Starovoitov 写道: > On Tue, Sep 12, 2023 at 03:01:45PM +0800, Chuyi Zhou wrote: >> This patch adds kfuncs bpf_iter_css_task_{new,next,destroy} which allow >> creation and manipulation of struct bpf_iter_css_task in open-coded >> iterator style. These kfuncs actually wrapps css_task_iter_{start,next, >> end}. BPF programs can use these kfuncs through bpf_for_each macro for >> iteration of all tasks under a css. >> >> css_task_iter_*() would try to get the global spin-lock *css_set_lock*, so >> the bpf side has to be careful in where it allows to use this iter. >> Currently we only allow it in bpf_lsm and bpf iter-s. >> >> Signed-off-by: Chuyi Zhou <zhouchuyi@bytedance.com> >> --- >> include/uapi/linux/bpf.h | 4 +++ >> kernel/bpf/helpers.c | 3 +++ >> kernel/bpf/task_iter.c | 48 ++++++++++++++++++++++++++++++++++ >> kernel/bpf/verifier.c | 23 ++++++++++++++++ >> tools/include/uapi/linux/bpf.h | 4 +++ >> tools/lib/bpf/bpf_helpers.h | 7 +++++ >> 6 files changed, 89 insertions(+) >> >> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h >> index 73b155e52204..de02c0971428 100644 >> --- a/include/uapi/linux/bpf.h >> +++ b/include/uapi/linux/bpf.h >> @@ -7318,4 +7318,8 @@ struct bpf_iter_num { >> __u64 __opaque[1]; >> } __attribute__((aligned(8))); >> >> +struct bpf_iter_css_task { >> + __u64 __opaque[1]; >> +} __attribute__((aligned(8))); >> + >> #endif /* _UAPI__LINUX_BPF_H__ */ >> diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c >> index b0a9834f1051..d6a16becfbb9 100644 >> --- a/kernel/bpf/helpers.c >> +++ b/kernel/bpf/helpers.c >> @@ -2504,6 +2504,9 @@ BTF_ID_FLAGS(func, bpf_dynptr_slice_rdwr, KF_RET_NULL) >> BTF_ID_FLAGS(func, bpf_iter_num_new, KF_ITER_NEW) >> BTF_ID_FLAGS(func, bpf_iter_num_next, KF_ITER_NEXT | KF_RET_NULL) >> BTF_ID_FLAGS(func, bpf_iter_num_destroy, KF_ITER_DESTROY) >> +BTF_ID_FLAGS(func, bpf_iter_css_task_new, KF_ITER_NEW) > > Looking at selftest: > struct cgroup_subsys_state *css = &cgrp->self; > > realized that we're missing KF_TRUSTED_ARGS here. > >> +BTF_ID_FLAGS(func, bpf_iter_css_task_next, KF_ITER_NEXT | KF_RET_NULL) >> +BTF_ID_FLAGS(func, bpf_iter_css_task_destroy, KF_ITER_DESTROY) >> BTF_ID_FLAGS(func, bpf_dynptr_adjust) >> BTF_ID_FLAGS(func, bpf_dynptr_is_null) >> BTF_ID_FLAGS(func, bpf_dynptr_is_rdonly) >> diff --git a/kernel/bpf/task_iter.c b/kernel/bpf/task_iter.c >> index 7473068ed313..d8539cc05ffd 100644 >> --- a/kernel/bpf/task_iter.c >> +++ b/kernel/bpf/task_iter.c >> @@ -803,6 +803,54 @@ const struct bpf_func_proto bpf_find_vma_proto = { >> .arg5_type = ARG_ANYTHING, >> }; >> >> +struct bpf_iter_css_task_kern { >> + struct css_task_iter *css_it; >> +} __attribute__((aligned(8))); >> + >> +__bpf_kfunc int bpf_iter_css_task_new(struct bpf_iter_css_task *it, >> + struct cgroup_subsys_state *css, unsigned int flags) > > the verifier does a type check, but it's not strong enough. > We need KF_TRUSTED_ARGS to make sure the pointer is valid. > The BTF_TYPE_SAFE_RCU(struct cgroup) { > probably doesn't need to change, since '&cgrp->self' is not a pointer deref. > The verifier should understand that cgroup_subsys_state is also PTR_TRUSTED > just like 'cgrp' pointer. Got it. It seems we should also apply this to bpf_iter_css_{pre,post}_new. > > Also please add negative tests in patch 6. > Like doing bpf_rcu_read_unlock() in the middle and check that the verifier > catches such mistake. I will do it in next version. Thanks.
On Tue, Sep 12, 2023 at 12:02 AM Chuyi Zhou <zhouchuyi@bytedance.com> wrote: > > This patch adds kfuncs bpf_iter_css_task_{new,next,destroy} which allow > creation and manipulation of struct bpf_iter_css_task in open-coded > iterator style. These kfuncs actually wrapps css_task_iter_{start,next, > end}. BPF programs can use these kfuncs through bpf_for_each macro for > iteration of all tasks under a css. > > css_task_iter_*() would try to get the global spin-lock *css_set_lock*, so > the bpf side has to be careful in where it allows to use this iter. > Currently we only allow it in bpf_lsm and bpf iter-s. > > Signed-off-by: Chuyi Zhou <zhouchuyi@bytedance.com> > --- > include/uapi/linux/bpf.h | 4 +++ > kernel/bpf/helpers.c | 3 +++ > kernel/bpf/task_iter.c | 48 ++++++++++++++++++++++++++++++++++ > kernel/bpf/verifier.c | 23 ++++++++++++++++ > tools/include/uapi/linux/bpf.h | 4 +++ > tools/lib/bpf/bpf_helpers.h | 7 +++++ > 6 files changed, 89 insertions(+) > > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h > index 73b155e52204..de02c0971428 100644 > --- a/include/uapi/linux/bpf.h > +++ b/include/uapi/linux/bpf.h > @@ -7318,4 +7318,8 @@ struct bpf_iter_num { > __u64 __opaque[1]; > } __attribute__((aligned(8))); > > +struct bpf_iter_css_task { > + __u64 __opaque[1]; > +} __attribute__((aligned(8))); > + > #endif /* _UAPI__LINUX_BPF_H__ */ > diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c > index b0a9834f1051..d6a16becfbb9 100644 > --- a/kernel/bpf/helpers.c > +++ b/kernel/bpf/helpers.c > @@ -2504,6 +2504,9 @@ BTF_ID_FLAGS(func, bpf_dynptr_slice_rdwr, KF_RET_NULL) > BTF_ID_FLAGS(func, bpf_iter_num_new, KF_ITER_NEW) > BTF_ID_FLAGS(func, bpf_iter_num_next, KF_ITER_NEXT | KF_RET_NULL) > BTF_ID_FLAGS(func, bpf_iter_num_destroy, KF_ITER_DESTROY) > +BTF_ID_FLAGS(func, bpf_iter_css_task_new, KF_ITER_NEW) > +BTF_ID_FLAGS(func, bpf_iter_css_task_next, KF_ITER_NEXT | KF_RET_NULL) > +BTF_ID_FLAGS(func, bpf_iter_css_task_destroy, KF_ITER_DESTROY) > BTF_ID_FLAGS(func, bpf_dynptr_adjust) > BTF_ID_FLAGS(func, bpf_dynptr_is_null) > BTF_ID_FLAGS(func, bpf_dynptr_is_rdonly) > diff --git a/kernel/bpf/task_iter.c b/kernel/bpf/task_iter.c > index 7473068ed313..d8539cc05ffd 100644 > --- a/kernel/bpf/task_iter.c > +++ b/kernel/bpf/task_iter.c > @@ -803,6 +803,54 @@ const struct bpf_func_proto bpf_find_vma_proto = { > .arg5_type = ARG_ANYTHING, > }; > > +struct bpf_iter_css_task_kern { > + struct css_task_iter *css_it; > +} __attribute__((aligned(8))); > + > +__bpf_kfunc int bpf_iter_css_task_new(struct bpf_iter_css_task *it, > + struct cgroup_subsys_state *css, unsigned int flags) > +{ > + struct bpf_iter_css_task_kern *kit = (void *)it; > + > + BUILD_BUG_ON(sizeof(struct bpf_iter_css_task_kern) != sizeof(struct bpf_iter_css_task)); > + BUILD_BUG_ON(__alignof__(struct bpf_iter_css_task_kern) != > + __alignof__(struct bpf_iter_css_task)); > + > + switch (flags) { > + case CSS_TASK_ITER_PROCS | CSS_TASK_ITER_THREADED: > + case CSS_TASK_ITER_PROCS: > + case 0: > + break; > + default: > + return -EINVAL; > + } > + > + kit->css_it = kzalloc(sizeof(struct css_task_iter), GFP_KERNEL); Dave used bpf_mem_alloc() inside his iterator, any reason to not use it here? > + if (!kit->css_it) > + return -ENOMEM; > + css_task_iter_start(css, flags, kit->css_it); > + return 0; > +} > + > +__bpf_kfunc struct task_struct *bpf_iter_css_task_next(struct bpf_iter_css_task *it) > +{ > + struct bpf_iter_css_task_kern *kit = (void *)it; > + > + if (!kit->css_it) > + return NULL; > + return css_task_iter_next(kit->css_it); > +} > + > +__bpf_kfunc void bpf_iter_css_task_destroy(struct bpf_iter_css_task *it) > +{ > + struct bpf_iter_css_task_kern *kit = (void *)it; > + > + if (!kit->css_it) > + return; > + css_task_iter_end(kit->css_it); > + kfree(kit->css_it); > +} > + > DEFINE_PER_CPU(struct mmap_unlock_irq_work, mmap_unlock_work); > > static void do_mmap_read_unlock(struct irq_work *entry) > diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c > index dbba2b806017..2367483bf4c2 100644 > --- a/kernel/bpf/verifier.c > +++ b/kernel/bpf/verifier.c > @@ -10332,6 +10332,7 @@ enum special_kfunc_type { > KF_bpf_dynptr_clone, > KF_bpf_percpu_obj_new_impl, > KF_bpf_percpu_obj_drop_impl, > + KF_bpf_iter_css_task_new, > }; > > BTF_SET_START(special_kfunc_set) > @@ -10354,6 +10355,7 @@ BTF_ID(func, bpf_dynptr_slice_rdwr) > BTF_ID(func, bpf_dynptr_clone) > BTF_ID(func, bpf_percpu_obj_new_impl) > BTF_ID(func, bpf_percpu_obj_drop_impl) > +BTF_ID(func, bpf_iter_css_task_new) > BTF_SET_END(special_kfunc_set) > > BTF_ID_LIST(special_kfunc_list) > @@ -10378,6 +10380,7 @@ BTF_ID(func, bpf_dynptr_slice_rdwr) > BTF_ID(func, bpf_dynptr_clone) > BTF_ID(func, bpf_percpu_obj_new_impl) > BTF_ID(func, bpf_percpu_obj_drop_impl) > +BTF_ID(func, bpf_iter_css_task_new) > > static bool is_kfunc_ret_null(struct bpf_kfunc_call_arg_meta *meta) > { > @@ -10902,6 +10905,20 @@ static int process_kf_arg_ptr_to_rbtree_node(struct bpf_verifier_env *env, > &meta->arg_rbtree_root.field); > } > > +static bool check_css_task_iter_allowlist(struct bpf_verifier_env *env) > +{ > + enum bpf_prog_type prog_type = resolve_prog_type(env->prog); > + > + switch (prog_type) { > + case BPF_PROG_TYPE_LSM: > + return true; > + case BPF_TRACE_ITER: > + return env->prog->aux->sleepable; > + default: > + return false; > + } > +} > + > static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_arg_meta *meta, > int insn_idx) > { > @@ -11152,6 +11169,12 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_ > break; > } > case KF_ARG_PTR_TO_ITER: > + if (meta->func_id == special_kfunc_list[KF_bpf_iter_css_task_new]) { > + if (!check_css_task_iter_allowlist(env)) { > + verbose(env, "css_task_iter is only allowed in bpf_lsm and bpf iter-s\n"); > + return -EINVAL; > + } > + } > ret = process_iter_arg(env, regno, insn_idx, meta); > if (ret < 0) > return ret; > diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h > index 73b155e52204..de02c0971428 100644 > --- a/tools/include/uapi/linux/bpf.h > +++ b/tools/include/uapi/linux/bpf.h > @@ -7318,4 +7318,8 @@ struct bpf_iter_num { > __u64 __opaque[1]; > } __attribute__((aligned(8))); > > +struct bpf_iter_css_task { > + __u64 __opaque[1]; > +} __attribute__((aligned(8))); > + > #endif /* _UAPI__LINUX_BPF_H__ */ > diff --git a/tools/lib/bpf/bpf_helpers.h b/tools/lib/bpf/bpf_helpers.h > index 77ceea575dc7..f48723c6c593 100644 > --- a/tools/lib/bpf/bpf_helpers.h > +++ b/tools/lib/bpf/bpf_helpers.h > @@ -303,6 +303,13 @@ extern int bpf_iter_num_new(struct bpf_iter_num *it, int start, int end) __weak > extern int *bpf_iter_num_next(struct bpf_iter_num *it) __weak __ksym; > extern void bpf_iter_num_destroy(struct bpf_iter_num *it) __weak __ksym; > > +struct bpf_iter_css_task; > +struct cgroup_subsys_state; > +extern int bpf_iter_css_task_new(struct bpf_iter_css_task *it, > + struct cgroup_subsys_state *css, unsigned int flags) __weak __ksym; > +extern struct task_struct *bpf_iter_css_task_next(struct bpf_iter_css_task *it) __weak __ksym; > +extern void bpf_iter_css_task_destroy(struct bpf_iter_css_task *it) __weak __ksym; please move this into bpf_experimental.h under selftests, this shouldn't be in libbpf's stable API headers > + > #ifndef bpf_for_each > /* bpf_for_each(iter_type, cur_elem, args...) provides generic construct for > * using BPF open-coded iterators without having to write mundane explicit > -- > 2.20.1 >
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 73b155e52204..de02c0971428 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -7318,4 +7318,8 @@ struct bpf_iter_num { __u64 __opaque[1]; } __attribute__((aligned(8))); +struct bpf_iter_css_task { + __u64 __opaque[1]; +} __attribute__((aligned(8))); + #endif /* _UAPI__LINUX_BPF_H__ */ diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index b0a9834f1051..d6a16becfbb9 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -2504,6 +2504,9 @@ BTF_ID_FLAGS(func, bpf_dynptr_slice_rdwr, KF_RET_NULL) BTF_ID_FLAGS(func, bpf_iter_num_new, KF_ITER_NEW) BTF_ID_FLAGS(func, bpf_iter_num_next, KF_ITER_NEXT | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_iter_num_destroy, KF_ITER_DESTROY) +BTF_ID_FLAGS(func, bpf_iter_css_task_new, KF_ITER_NEW) +BTF_ID_FLAGS(func, bpf_iter_css_task_next, KF_ITER_NEXT | KF_RET_NULL) +BTF_ID_FLAGS(func, bpf_iter_css_task_destroy, KF_ITER_DESTROY) BTF_ID_FLAGS(func, bpf_dynptr_adjust) BTF_ID_FLAGS(func, bpf_dynptr_is_null) BTF_ID_FLAGS(func, bpf_dynptr_is_rdonly) diff --git a/kernel/bpf/task_iter.c b/kernel/bpf/task_iter.c index 7473068ed313..d8539cc05ffd 100644 --- a/kernel/bpf/task_iter.c +++ b/kernel/bpf/task_iter.c @@ -803,6 +803,54 @@ const struct bpf_func_proto bpf_find_vma_proto = { .arg5_type = ARG_ANYTHING, }; +struct bpf_iter_css_task_kern { + struct css_task_iter *css_it; +} __attribute__((aligned(8))); + +__bpf_kfunc int bpf_iter_css_task_new(struct bpf_iter_css_task *it, + struct cgroup_subsys_state *css, unsigned int flags) +{ + struct bpf_iter_css_task_kern *kit = (void *)it; + + BUILD_BUG_ON(sizeof(struct bpf_iter_css_task_kern) != sizeof(struct bpf_iter_css_task)); + BUILD_BUG_ON(__alignof__(struct bpf_iter_css_task_kern) != + __alignof__(struct bpf_iter_css_task)); + + switch (flags) { + case CSS_TASK_ITER_PROCS | CSS_TASK_ITER_THREADED: + case CSS_TASK_ITER_PROCS: + case 0: + break; + default: + return -EINVAL; + } + + kit->css_it = kzalloc(sizeof(struct css_task_iter), GFP_KERNEL); + if (!kit->css_it) + return -ENOMEM; + css_task_iter_start(css, flags, kit->css_it); + return 0; +} + +__bpf_kfunc struct task_struct *bpf_iter_css_task_next(struct bpf_iter_css_task *it) +{ + struct bpf_iter_css_task_kern *kit = (void *)it; + + if (!kit->css_it) + return NULL; + return css_task_iter_next(kit->css_it); +} + +__bpf_kfunc void bpf_iter_css_task_destroy(struct bpf_iter_css_task *it) +{ + struct bpf_iter_css_task_kern *kit = (void *)it; + + if (!kit->css_it) + return; + css_task_iter_end(kit->css_it); + kfree(kit->css_it); +} + DEFINE_PER_CPU(struct mmap_unlock_irq_work, mmap_unlock_work); static void do_mmap_read_unlock(struct irq_work *entry) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index dbba2b806017..2367483bf4c2 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -10332,6 +10332,7 @@ enum special_kfunc_type { KF_bpf_dynptr_clone, KF_bpf_percpu_obj_new_impl, KF_bpf_percpu_obj_drop_impl, + KF_bpf_iter_css_task_new, }; BTF_SET_START(special_kfunc_set) @@ -10354,6 +10355,7 @@ BTF_ID(func, bpf_dynptr_slice_rdwr) BTF_ID(func, bpf_dynptr_clone) BTF_ID(func, bpf_percpu_obj_new_impl) BTF_ID(func, bpf_percpu_obj_drop_impl) +BTF_ID(func, bpf_iter_css_task_new) BTF_SET_END(special_kfunc_set) BTF_ID_LIST(special_kfunc_list) @@ -10378,6 +10380,7 @@ BTF_ID(func, bpf_dynptr_slice_rdwr) BTF_ID(func, bpf_dynptr_clone) BTF_ID(func, bpf_percpu_obj_new_impl) BTF_ID(func, bpf_percpu_obj_drop_impl) +BTF_ID(func, bpf_iter_css_task_new) static bool is_kfunc_ret_null(struct bpf_kfunc_call_arg_meta *meta) { @@ -10902,6 +10905,20 @@ static int process_kf_arg_ptr_to_rbtree_node(struct bpf_verifier_env *env, &meta->arg_rbtree_root.field); } +static bool check_css_task_iter_allowlist(struct bpf_verifier_env *env) +{ + enum bpf_prog_type prog_type = resolve_prog_type(env->prog); + + switch (prog_type) { + case BPF_PROG_TYPE_LSM: + return true; + case BPF_TRACE_ITER: + return env->prog->aux->sleepable; + default: + return false; + } +} + static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_arg_meta *meta, int insn_idx) { @@ -11152,6 +11169,12 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_ break; } case KF_ARG_PTR_TO_ITER: + if (meta->func_id == special_kfunc_list[KF_bpf_iter_css_task_new]) { + if (!check_css_task_iter_allowlist(env)) { + verbose(env, "css_task_iter is only allowed in bpf_lsm and bpf iter-s\n"); + return -EINVAL; + } + } ret = process_iter_arg(env, regno, insn_idx, meta); if (ret < 0) return ret; diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 73b155e52204..de02c0971428 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -7318,4 +7318,8 @@ struct bpf_iter_num { __u64 __opaque[1]; } __attribute__((aligned(8))); +struct bpf_iter_css_task { + __u64 __opaque[1]; +} __attribute__((aligned(8))); + #endif /* _UAPI__LINUX_BPF_H__ */ diff --git a/tools/lib/bpf/bpf_helpers.h b/tools/lib/bpf/bpf_helpers.h index 77ceea575dc7..f48723c6c593 100644 --- a/tools/lib/bpf/bpf_helpers.h +++ b/tools/lib/bpf/bpf_helpers.h @@ -303,6 +303,13 @@ extern int bpf_iter_num_new(struct bpf_iter_num *it, int start, int end) __weak extern int *bpf_iter_num_next(struct bpf_iter_num *it) __weak __ksym; extern void bpf_iter_num_destroy(struct bpf_iter_num *it) __weak __ksym; +struct bpf_iter_css_task; +struct cgroup_subsys_state; +extern int bpf_iter_css_task_new(struct bpf_iter_css_task *it, + struct cgroup_subsys_state *css, unsigned int flags) __weak __ksym; +extern struct task_struct *bpf_iter_css_task_next(struct bpf_iter_css_task *it) __weak __ksym; +extern void bpf_iter_css_task_destroy(struct bpf_iter_css_task *it) __weak __ksym; + #ifndef bpf_for_each /* bpf_for_each(iter_type, cur_elem, args...) provides generic construct for * using BPF open-coded iterators without having to write mundane explicit
This patch adds kfuncs bpf_iter_css_task_{new,next,destroy} which allow creation and manipulation of struct bpf_iter_css_task in open-coded iterator style. These kfuncs actually wrapps css_task_iter_{start,next, end}. BPF programs can use these kfuncs through bpf_for_each macro for iteration of all tasks under a css. css_task_iter_*() would try to get the global spin-lock *css_set_lock*, so the bpf side has to be careful in where it allows to use this iter. Currently we only allow it in bpf_lsm and bpf iter-s. Signed-off-by: Chuyi Zhou <zhouchuyi@bytedance.com> --- include/uapi/linux/bpf.h | 4 +++ kernel/bpf/helpers.c | 3 +++ kernel/bpf/task_iter.c | 48 ++++++++++++++++++++++++++++++++++ kernel/bpf/verifier.c | 23 ++++++++++++++++ tools/include/uapi/linux/bpf.h | 4 +++ tools/lib/bpf/bpf_helpers.h | 7 +++++ 6 files changed, 89 insertions(+)