Message ID | 20230827072057.1591929-3-zhouchuyi@bytedance.com (mailing list archive) |
---|---|
State | RFC |
Delegated to: | BPF |
Headers | show |
Series | Add Open-coded process and css iters | expand |
On Sun, Aug 27, 2023 at 12:21 AM Chuyi Zhou <zhouchuyi@bytedance.com> wrote: > > This patch adds kfuncs bpf_iter_process_{new,next,destroy} which allow > creation and manipulation of struct bpf_iter_process in open-coded iterator > style. BPF programs can use these kfuncs or through bpf_for_each macro to > iterate all processes in the system. > > Signed-off-by: Chuyi Zhou <zhouchuyi@bytedance.com> > --- > include/uapi/linux/bpf.h | 4 ++++ > kernel/bpf/helpers.c | 3 +++ > kernel/bpf/task_iter.c | 31 +++++++++++++++++++++++++++++++ > tools/include/uapi/linux/bpf.h | 4 ++++ > tools/lib/bpf/bpf_helpers.h | 5 +++++ > 5 files changed, 47 insertions(+) > > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h > index 2a6e9b99564b..cfbd527e3733 100644 > --- a/include/uapi/linux/bpf.h > +++ b/include/uapi/linux/bpf.h > @@ -7199,4 +7199,8 @@ struct bpf_iter_css_task { > __u64 __opaque[1]; > } __attribute__((aligned(8))); > > +struct bpf_iter_process { > + __u64 __opaque[1]; > +} __attribute__((aligned(8))); > + > #endif /* _UAPI__LINUX_BPF_H__ */ > diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c > index cf113ad24837..81a2005edc26 100644 > --- a/kernel/bpf/helpers.c > +++ b/kernel/bpf/helpers.c > @@ -2458,6 +2458,9 @@ BTF_ID_FLAGS(func, bpf_iter_num_destroy, KF_ITER_DESTROY) > BTF_ID_FLAGS(func, bpf_iter_css_task_new, KF_ITER_NEW) > BTF_ID_FLAGS(func, bpf_iter_css_task_next, KF_ITER_NEXT | KF_RET_NULL) > BTF_ID_FLAGS(func, bpf_iter_css_task_destroy, KF_ITER_DESTROY) > +BTF_ID_FLAGS(func, bpf_iter_process_new, KF_ITER_NEW) > +BTF_ID_FLAGS(func, bpf_iter_process_next, KF_ITER_NEXT | KF_RET_NULL) > +BTF_ID_FLAGS(func, bpf_iter_process_destroy, KF_ITER_DESTROY) > BTF_ID_FLAGS(func, bpf_dynptr_adjust) > BTF_ID_FLAGS(func, bpf_dynptr_is_null) > BTF_ID_FLAGS(func, bpf_dynptr_is_rdonly) > diff --git a/kernel/bpf/task_iter.c b/kernel/bpf/task_iter.c > index b1bdba40b684..a6717a76c1e0 100644 > --- a/kernel/bpf/task_iter.c > +++ b/kernel/bpf/task_iter.c > @@ -862,6 +862,37 @@ __bpf_kfunc void bpf_iter_css_task_destroy(struct bpf_iter_css_task *it) > kfree(kit->css_it); > } > > +struct bpf_iter_process_kern { > + struct task_struct *tsk; > +} __attribute__((aligned(8))); > + > +__bpf_kfunc int bpf_iter_process_new(struct bpf_iter_process *it) > +{ > + struct bpf_iter_process_kern *kit = (void *)it; > + > + BUILD_BUG_ON(sizeof(struct bpf_iter_process_kern) != sizeof(struct bpf_iter_process)); > + BUILD_BUG_ON(__alignof__(struct bpf_iter_process_kern) != > + __alignof__(struct bpf_iter_process)); > + > + rcu_read_lock(); > + kit->tsk = &init_task; > + return 0; > +} > + > +__bpf_kfunc struct task_struct *bpf_iter_process_next(struct bpf_iter_process *it) > +{ > + struct bpf_iter_process_kern *kit = (void *)it; > + > + kit->tsk = next_task(kit->tsk); > + > + return kit->tsk == &init_task ? NULL : kit->tsk; > +} > + > +__bpf_kfunc void bpf_iter_process_destroy(struct bpf_iter_process *it) > +{ > + rcu_read_unlock(); > +} This iter can be used in all ctx-s which is nice, but let's make the verifier enforce rcu_read_lock/unlock done by bpf prog instead of doing in the ctor/dtor of iter, since in sleepable progs the verifier won't recognize that body is RCU CS. We'd need to teach the verifier to allow bpf_iter_process_new() inside in_rcu_cs() and make sure there is no rcu_read_unlock while BPF_ITER_STATE_ACTIVE. bpf_iter_process_destroy() would become a nop.
Hello, Alexei. 在 2023/9/6 04:09, Alexei Starovoitov 写道: > On Sun, Aug 27, 2023 at 12:21 AM Chuyi Zhou <zhouchuyi@bytedance.com> wrote: >> >> This patch adds kfuncs bpf_iter_process_{new,next,destroy} which allow >> creation and manipulation of struct bpf_iter_process in open-coded iterator >> style. BPF programs can use these kfuncs or through bpf_for_each macro to >> iterate all processes in the system. >> >> Signed-off-by: Chuyi Zhou <zhouchuyi@bytedance.com> >> --- >> include/uapi/linux/bpf.h | 4 ++++ >> kernel/bpf/helpers.c | 3 +++ >> kernel/bpf/task_iter.c | 31 +++++++++++++++++++++++++++++++ >> tools/include/uapi/linux/bpf.h | 4 ++++ >> tools/lib/bpf/bpf_helpers.h | 5 +++++ >> 5 files changed, 47 insertions(+) >> >> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h >> index 2a6e9b99564b..cfbd527e3733 100644 >> --- a/include/uapi/linux/bpf.h >> +++ b/include/uapi/linux/bpf.h >> @@ -7199,4 +7199,8 @@ struct bpf_iter_css_task { >> __u64 __opaque[1]; >> } __attribute__((aligned(8))); >> >> +struct bpf_iter_process { >> + __u64 __opaque[1]; >> +} __attribute__((aligned(8))); >> + >> #endif /* _UAPI__LINUX_BPF_H__ */ >> diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c >> index cf113ad24837..81a2005edc26 100644 >> --- a/kernel/bpf/helpers.c >> +++ b/kernel/bpf/helpers.c >> @@ -2458,6 +2458,9 @@ BTF_ID_FLAGS(func, bpf_iter_num_destroy, KF_ITER_DESTROY) >> BTF_ID_FLAGS(func, bpf_iter_css_task_new, KF_ITER_NEW) >> BTF_ID_FLAGS(func, bpf_iter_css_task_next, KF_ITER_NEXT | KF_RET_NULL) >> BTF_ID_FLAGS(func, bpf_iter_css_task_destroy, KF_ITER_DESTROY) >> +BTF_ID_FLAGS(func, bpf_iter_process_new, KF_ITER_NEW) >> +BTF_ID_FLAGS(func, bpf_iter_process_next, KF_ITER_NEXT | KF_RET_NULL) >> +BTF_ID_FLAGS(func, bpf_iter_process_destroy, KF_ITER_DESTROY) >> BTF_ID_FLAGS(func, bpf_dynptr_adjust) >> BTF_ID_FLAGS(func, bpf_dynptr_is_null) >> BTF_ID_FLAGS(func, bpf_dynptr_is_rdonly) >> diff --git a/kernel/bpf/task_iter.c b/kernel/bpf/task_iter.c >> index b1bdba40b684..a6717a76c1e0 100644 >> --- a/kernel/bpf/task_iter.c >> +++ b/kernel/bpf/task_iter.c >> @@ -862,6 +862,37 @@ __bpf_kfunc void bpf_iter_css_task_destroy(struct bpf_iter_css_task *it) >> kfree(kit->css_it); >> } >> >> +struct bpf_iter_process_kern { >> + struct task_struct *tsk; >> +} __attribute__((aligned(8))); >> + >> +__bpf_kfunc int bpf_iter_process_new(struct bpf_iter_process *it) >> +{ >> + struct bpf_iter_process_kern *kit = (void *)it; >> + >> + BUILD_BUG_ON(sizeof(struct bpf_iter_process_kern) != sizeof(struct bpf_iter_process)); >> + BUILD_BUG_ON(__alignof__(struct bpf_iter_process_kern) != >> + __alignof__(struct bpf_iter_process)); >> + >> + rcu_read_lock(); >> + kit->tsk = &init_task; >> + return 0; >> +} >> + >> +__bpf_kfunc struct task_struct *bpf_iter_process_next(struct bpf_iter_process *it) >> +{ >> + struct bpf_iter_process_kern *kit = (void *)it; >> + >> + kit->tsk = next_task(kit->tsk); >> + >> + return kit->tsk == &init_task ? NULL : kit->tsk; >> +} >> + >> +__bpf_kfunc void bpf_iter_process_destroy(struct bpf_iter_process *it) >> +{ >> + rcu_read_unlock(); >> +} > > This iter can be used in all ctx-s which is nice, but let's > make the verifier enforce rcu_read_lock/unlock done by bpf prog > instead of doing in the ctor/dtor of iter, since > in sleepable progs the verifier won't recognize that body is RCU CS. > We'd need to teach the verifier to allow bpf_iter_process_new() > inside in_rcu_cs() and make sure there is no rcu_read_unlock > while BPF_ITER_STATE_ACTIVE. > bpf_iter_process_destroy() would become a nop. Thanks for your review! I think bpf_iter_process_{new, next, destroy} should be protected by bpf_rcu_read_lock/unlock explicitly whether the prog is sleepable or not, right? I'm not very familiar with the BPF verifier, but I believe there is still a risk in directly calling these kfuns even if in_rcu_cs() is true. Maby what we actually need here is to enforce BPF verifier to check env->cur_state->active_rcu_lock is true when we want to call these kfuncs. Thanks.
On Wed, Sep 6, 2023 at 5:38 AM Chuyi Zhou <zhouchuyi@bytedance.com> wrote: > > Hello, Alexei. > > 在 2023/9/6 04:09, Alexei Starovoitov 写道: > > On Sun, Aug 27, 2023 at 12:21 AM Chuyi Zhou <zhouchuyi@bytedance.com> wrote: > >> > >> This patch adds kfuncs bpf_iter_process_{new,next,destroy} which allow > >> creation and manipulation of struct bpf_iter_process in open-coded iterator > >> style. BPF programs can use these kfuncs or through bpf_for_each macro to > >> iterate all processes in the system. > >> > >> Signed-off-by: Chuyi Zhou <zhouchuyi@bytedance.com> > >> --- > >> include/uapi/linux/bpf.h | 4 ++++ > >> kernel/bpf/helpers.c | 3 +++ > >> kernel/bpf/task_iter.c | 31 +++++++++++++++++++++++++++++++ > >> tools/include/uapi/linux/bpf.h | 4 ++++ > >> tools/lib/bpf/bpf_helpers.h | 5 +++++ > >> 5 files changed, 47 insertions(+) > >> > >> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h > >> index 2a6e9b99564b..cfbd527e3733 100644 > >> --- a/include/uapi/linux/bpf.h > >> +++ b/include/uapi/linux/bpf.h > >> @@ -7199,4 +7199,8 @@ struct bpf_iter_css_task { > >> __u64 __opaque[1]; > >> } __attribute__((aligned(8))); > >> > >> +struct bpf_iter_process { > >> + __u64 __opaque[1]; > >> +} __attribute__((aligned(8))); > >> + > >> #endif /* _UAPI__LINUX_BPF_H__ */ > >> diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c > >> index cf113ad24837..81a2005edc26 100644 > >> --- a/kernel/bpf/helpers.c > >> +++ b/kernel/bpf/helpers.c > >> @@ -2458,6 +2458,9 @@ BTF_ID_FLAGS(func, bpf_iter_num_destroy, KF_ITER_DESTROY) > >> BTF_ID_FLAGS(func, bpf_iter_css_task_new, KF_ITER_NEW) > >> BTF_ID_FLAGS(func, bpf_iter_css_task_next, KF_ITER_NEXT | KF_RET_NULL) > >> BTF_ID_FLAGS(func, bpf_iter_css_task_destroy, KF_ITER_DESTROY) > >> +BTF_ID_FLAGS(func, bpf_iter_process_new, KF_ITER_NEW) > >> +BTF_ID_FLAGS(func, bpf_iter_process_next, KF_ITER_NEXT | KF_RET_NULL) > >> +BTF_ID_FLAGS(func, bpf_iter_process_destroy, KF_ITER_DESTROY) > >> BTF_ID_FLAGS(func, bpf_dynptr_adjust) > >> BTF_ID_FLAGS(func, bpf_dynptr_is_null) > >> BTF_ID_FLAGS(func, bpf_dynptr_is_rdonly) > >> diff --git a/kernel/bpf/task_iter.c b/kernel/bpf/task_iter.c > >> index b1bdba40b684..a6717a76c1e0 100644 > >> --- a/kernel/bpf/task_iter.c > >> +++ b/kernel/bpf/task_iter.c > >> @@ -862,6 +862,37 @@ __bpf_kfunc void bpf_iter_css_task_destroy(struct bpf_iter_css_task *it) > >> kfree(kit->css_it); > >> } > >> > >> +struct bpf_iter_process_kern { > >> + struct task_struct *tsk; > >> +} __attribute__((aligned(8))); > >> + > >> +__bpf_kfunc int bpf_iter_process_new(struct bpf_iter_process *it) > >> +{ > >> + struct bpf_iter_process_kern *kit = (void *)it; > >> + > >> + BUILD_BUG_ON(sizeof(struct bpf_iter_process_kern) != sizeof(struct bpf_iter_process)); > >> + BUILD_BUG_ON(__alignof__(struct bpf_iter_process_kern) != > >> + __alignof__(struct bpf_iter_process)); > >> + > >> + rcu_read_lock(); > >> + kit->tsk = &init_task; > >> + return 0; > >> +} > >> + > >> +__bpf_kfunc struct task_struct *bpf_iter_process_next(struct bpf_iter_process *it) > >> +{ > >> + struct bpf_iter_process_kern *kit = (void *)it; > >> + > >> + kit->tsk = next_task(kit->tsk); > >> + > >> + return kit->tsk == &init_task ? NULL : kit->tsk; > >> +} > >> + > >> +__bpf_kfunc void bpf_iter_process_destroy(struct bpf_iter_process *it) > >> +{ > >> + rcu_read_unlock(); > >> +} > > > > This iter can be used in all ctx-s which is nice, but let's > > make the verifier enforce rcu_read_lock/unlock done by bpf prog > > instead of doing in the ctor/dtor of iter, since > > in sleepable progs the verifier won't recognize that body is RCU CS. > > We'd need to teach the verifier to allow bpf_iter_process_new() > > inside in_rcu_cs() and make sure there is no rcu_read_unlock > > while BPF_ITER_STATE_ACTIVE. > > bpf_iter_process_destroy() would become a nop. > > Thanks for your review! > > I think bpf_iter_process_{new, next, destroy} should be protected by > bpf_rcu_read_lock/unlock explicitly whether the prog is sleepable or > not, right? Correct. By explicit bpf_rcu_read_lock() in case of sleepable progs or just by using them in normal bpf progs that have implicit rcu_read_lock() done before calling into them. > I'm not very familiar with the BPF verifier, but I believe > there is still a risk in directly calling these kfuns even if > in_rcu_cs() is true. > > Maby what we actually need here is to enforce BPF verifier to check > env->cur_state->active_rcu_lock is true when we want to call these kfuncs. active_rcu_lock means explicit bpf_rcu_read_lock. Currently we do allow bpf_rcu_read_lock in non-sleepable, but it's pointless. Technically we can extend the check: if (in_rbtree_lock_required_cb(env) && (rcu_lock || rcu_unlock)) { verbose(env, "Calling bpf_rcu_read_{lock,unlock} in unnecessary rbtree callback\n"); return -EACCES; } to discourage their use in all non-sleepable, but it will break some progs. I think it's ok to check in_rcu_cs() to allow bpf_iter_process_*(). If bpf prog adds explicit and unnecessary bpf_rcu_read_lock() around the iter ops it won't do any harm. Just need to make sure that rcu unlock logic: } else if (rcu_unlock) { bpf_for_each_reg_in_vstate(env->cur_state, state, reg, ({ if (reg->type & MEM_RCU) { reg->type &= ~(MEM_RCU | PTR_MAYBE_NULL); reg->type |= PTR_UNTRUSTED; } })); clears iter state that depends on rcu. I thought about changing mark_stack_slots_iter() to do st->type = PTR_TO_STACK | MEM_RCU; so that the above clearing logic kicks in, but it might be better to have something iter specific. is_iter_reg_valid_init() should probably be changed to make sure reg->type is not UNTRUSTED. Andrii, do you have better suggestions?
Hello, 在 2023/9/7 01:17, Alexei Starovoitov 写道: [...cut...] >>> This iter can be used in all ctx-s which is nice, but let's >>> make the verifier enforce rcu_read_lock/unlock done by bpf prog >>> instead of doing in the ctor/dtor of iter, since >>> in sleepable progs the verifier won't recognize that body is RCU CS. >>> We'd need to teach the verifier to allow bpf_iter_process_new() >>> inside in_rcu_cs() and make sure there is no rcu_read_unlock >>> while BPF_ITER_STATE_ACTIVE. >>> bpf_iter_process_destroy() would become a nop. >> >> Thanks for your review! >> >> I think bpf_iter_process_{new, next, destroy} should be protected by >> bpf_rcu_read_lock/unlock explicitly whether the prog is sleepable or >> not, right? > > Correct. By explicit bpf_rcu_read_lock() in case of sleepable progs > or just by using them in normal bpf progs that have implicit rcu_read_lock() > done before calling into them. Thanks for your explanation, I missed the latter. > >> I'm not very familiar with the BPF verifier, but I believe >> there is still a risk in directly calling these kfuns even if >> in_rcu_cs() is true. >> >> Maby what we actually need here is to enforce BPF verifier to check >> env->cur_state->active_rcu_lock is true when we want to call these kfuncs. > > active_rcu_lock means explicit bpf_rcu_read_lock. > Currently we do allow bpf_rcu_read_lock in non-sleepable, but it's pointless. > > Technically we can extend the check: > if (in_rbtree_lock_required_cb(env) && (rcu_lock || > rcu_unlock)) { > verbose(env, "Calling > bpf_rcu_read_{lock,unlock} in unnecessary rbtree callback\n"); > return -EACCES; > } > to discourage their use in all non-sleepable, but it will break some progs. > > I think it's ok to check in_rcu_cs() to allow bpf_iter_process_*(). > If bpf prog adds explicit and unnecessary bpf_rcu_read_lock() around > the iter ops it won't do any harm. > Just need to make sure that rcu unlock logic: > } else if (rcu_unlock) { > bpf_for_each_reg_in_vstate(env->cur_state, > state, reg, ({ > if (reg->type & MEM_RCU) { > reg->type &= ~(MEM_RCU | > PTR_MAYBE_NULL); > reg->type |= PTR_UNTRUSTED; > } > })); > clears iter state that depends on rcu. > > I thought about changing mark_stack_slots_iter() to do > st->type = PTR_TO_STACK | MEM_RCU; > so that the above clearing logic kicks in, > but it might be better to have something iter specific. > is_iter_reg_valid_init() should probably be changed to > make sure reg->type is not UNTRUSTED. > Maybe it's something looks like the following? diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index bb78212fa5b2..9185c4a40a21 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1172,7 +1172,15 @@ static bool is_dynptr_type_expected(struct bpf_verifier_env *env, struct bpf_reg static void __mark_reg_known_zero(struct bpf_reg_state *reg); +static bool in_rcu_cs(struct bpf_verifier_env *env); + +/* check whether we are using bpf_iter_process_*() or bpf_iter_css_*() */ +static bool is_iter_need_rcu(struct bpf_kfunc_call_arg_meta *meta) +{ + +} static int mark_stack_slots_iter(struct bpf_verifier_env *env, + struct bpf_kfunc_call_arg_meta *meta, struct bpf_reg_state *reg, int insn_idx, struct btf *btf, u32 btf_id, int nr_slots) { @@ -1193,6 +1201,12 @@ static int mark_stack_slots_iter(struct bpf_verifier_env *env, __mark_reg_known_zero(st); st->type = PTR_TO_STACK; /* we don't have dedicated reg type */ + if (is_iter_need_rcu(meta)) { + if (in_rcu_cs(env)) + st->type |= MEM_RCU; + else + st->type |= PTR_UNTRUSTED; + } st->live |= REG_LIVE_WRITTEN; st->ref_obj_id = i == 0 ? id : 0; st->iter.btf = btf; @@ -1281,6 +1295,8 @@ static bool is_iter_reg_valid_init(struct bpf_verifier_env *env, struct bpf_reg_ struct bpf_stack_state *slot = &state->stack[spi - i]; struct bpf_reg_state *st = &slot->spilled_ptr; + if (st->type & PTR_UNTRUSTED) + return false; /* only main (first) slot has ref_obj_id set */ if (i == 0 && !st->ref_obj_id) return false; > Andrii, > do you have better suggestions?
On Thu, Sep 7, 2023 at 5:02 AM Chuyi Zhou <zhouchuyi@bytedance.com> wrote: > > Hello, > 在 2023/9/7 01:17, Alexei Starovoitov 写道: > [...cut...] > >>> This iter can be used in all ctx-s which is nice, but let's > >>> make the verifier enforce rcu_read_lock/unlock done by bpf prog > >>> instead of doing in the ctor/dtor of iter, since > >>> in sleepable progs the verifier won't recognize that body is RCU CS. > >>> We'd need to teach the verifier to allow bpf_iter_process_new() > >>> inside in_rcu_cs() and make sure there is no rcu_read_unlock > >>> while BPF_ITER_STATE_ACTIVE. > >>> bpf_iter_process_destroy() would become a nop. > >> > >> Thanks for your review! > >> > >> I think bpf_iter_process_{new, next, destroy} should be protected by > >> bpf_rcu_read_lock/unlock explicitly whether the prog is sleepable or > >> not, right? > > > > Correct. By explicit bpf_rcu_read_lock() in case of sleepable progs > > or just by using them in normal bpf progs that have implicit rcu_read_lock() > > done before calling into them. > Thanks for your explanation, I missed the latter. > > > >> I'm not very familiar with the BPF verifier, but I believe > >> there is still a risk in directly calling these kfuns even if > >> in_rcu_cs() is true. > >> > >> Maby what we actually need here is to enforce BPF verifier to check > >> env->cur_state->active_rcu_lock is true when we want to call these kfuncs. > > > > active_rcu_lock means explicit bpf_rcu_read_lock. > > Currently we do allow bpf_rcu_read_lock in non-sleepable, but it's pointless. > > > > Technically we can extend the check: > > if (in_rbtree_lock_required_cb(env) && (rcu_lock || > > rcu_unlock)) { > > verbose(env, "Calling > > bpf_rcu_read_{lock,unlock} in unnecessary rbtree callback\n"); > > return -EACCES; > > } > > to discourage their use in all non-sleepable, but it will break some progs. > > > > I think it's ok to check in_rcu_cs() to allow bpf_iter_process_*(). > > If bpf prog adds explicit and unnecessary bpf_rcu_read_lock() around > > the iter ops it won't do any harm. > > Just need to make sure that rcu unlock logic: > > } else if (rcu_unlock) { > > bpf_for_each_reg_in_vstate(env->cur_state, > > state, reg, ({ > > if (reg->type & MEM_RCU) { > > reg->type &= ~(MEM_RCU | > > PTR_MAYBE_NULL); > > reg->type |= PTR_UNTRUSTED; > > } > > })); > > clears iter state that depends on rcu. > > > > I thought about changing mark_stack_slots_iter() to do > > st->type = PTR_TO_STACK | MEM_RCU; > > so that the above clearing logic kicks in, > > but it might be better to have something iter specific. > > is_iter_reg_valid_init() should probably be changed to > > make sure reg->type is not UNTRUSTED. > > > Maybe it's something looks like the following? > > diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c > index bb78212fa5b2..9185c4a40a21 100644 > --- a/kernel/bpf/verifier.c > +++ b/kernel/bpf/verifier.c > @@ -1172,7 +1172,15 @@ static bool is_dynptr_type_expected(struct > bpf_verifier_env *env, struct bpf_reg > > static void __mark_reg_known_zero(struct bpf_reg_state *reg); > > +static bool in_rcu_cs(struct bpf_verifier_env *env); > + > +/* check whether we are using bpf_iter_process_*() or bpf_iter_css_*() */ > +static bool is_iter_need_rcu(struct bpf_kfunc_call_arg_meta *meta) > +{ > + > +} > static int mark_stack_slots_iter(struct bpf_verifier_env *env, > + struct bpf_kfunc_call_arg_meta *meta, > struct bpf_reg_state *reg, int insn_idx, > struct btf *btf, u32 btf_id, int nr_slots) > { > @@ -1193,6 +1201,12 @@ static int mark_stack_slots_iter(struct > bpf_verifier_env *env, > > __mark_reg_known_zero(st); > st->type = PTR_TO_STACK; /* we don't have dedicated reg > type */ > + if (is_iter_need_rcu(meta)) { > + if (in_rcu_cs(env)) > + st->type |= MEM_RCU; > + else > + st->type |= PTR_UNTRUSTED; > + } > st->live |= REG_LIVE_WRITTEN; > st->ref_obj_id = i == 0 ? id : 0; > st->iter.btf = btf; > @@ -1281,6 +1295,8 @@ static bool is_iter_reg_valid_init(struct > bpf_verifier_env *env, struct bpf_reg_ > struct bpf_stack_state *slot = &state->stack[spi - i]; > struct bpf_reg_state *st = &slot->spilled_ptr; > > + if (st->type & PTR_UNTRUSTED) > + return false; Yep. All makes sense to me.
On Wed, Sep 6, 2023 at 10:18 AM Alexei Starovoitov <alexei.starovoitov@gmail.com> wrote: > > On Wed, Sep 6, 2023 at 5:38 AM Chuyi Zhou <zhouchuyi@bytedance.com> wrote: > > > > Hello, Alexei. > > > > 在 2023/9/6 04:09, Alexei Starovoitov 写道: > > > On Sun, Aug 27, 2023 at 12:21 AM Chuyi Zhou <zhouchuyi@bytedance.com> wrote: > > >> > > >> This patch adds kfuncs bpf_iter_process_{new,next,destroy} which allow > > >> creation and manipulation of struct bpf_iter_process in open-coded iterator > > >> style. BPF programs can use these kfuncs or through bpf_for_each macro to > > >> iterate all processes in the system. > > >> > > >> Signed-off-by: Chuyi Zhou <zhouchuyi@bytedance.com> > > >> --- > > >> include/uapi/linux/bpf.h | 4 ++++ > > >> kernel/bpf/helpers.c | 3 +++ > > >> kernel/bpf/task_iter.c | 31 +++++++++++++++++++++++++++++++ > > >> tools/include/uapi/linux/bpf.h | 4 ++++ > > >> tools/lib/bpf/bpf_helpers.h | 5 +++++ > > >> 5 files changed, 47 insertions(+) > > >> > > >> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h > > >> index 2a6e9b99564b..cfbd527e3733 100644 > > >> --- a/include/uapi/linux/bpf.h > > >> +++ b/include/uapi/linux/bpf.h > > >> @@ -7199,4 +7199,8 @@ struct bpf_iter_css_task { > > >> __u64 __opaque[1]; > > >> } __attribute__((aligned(8))); > > >> > > >> +struct bpf_iter_process { > > >> + __u64 __opaque[1]; > > >> +} __attribute__((aligned(8))); > > >> + > > >> #endif /* _UAPI__LINUX_BPF_H__ */ > > >> diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c > > >> index cf113ad24837..81a2005edc26 100644 > > >> --- a/kernel/bpf/helpers.c > > >> +++ b/kernel/bpf/helpers.c > > >> @@ -2458,6 +2458,9 @@ BTF_ID_FLAGS(func, bpf_iter_num_destroy, KF_ITER_DESTROY) > > >> BTF_ID_FLAGS(func, bpf_iter_css_task_new, KF_ITER_NEW) > > >> BTF_ID_FLAGS(func, bpf_iter_css_task_next, KF_ITER_NEXT | KF_RET_NULL) > > >> BTF_ID_FLAGS(func, bpf_iter_css_task_destroy, KF_ITER_DESTROY) > > >> +BTF_ID_FLAGS(func, bpf_iter_process_new, KF_ITER_NEW) > > >> +BTF_ID_FLAGS(func, bpf_iter_process_next, KF_ITER_NEXT | KF_RET_NULL) > > >> +BTF_ID_FLAGS(func, bpf_iter_process_destroy, KF_ITER_DESTROY) > > >> BTF_ID_FLAGS(func, bpf_dynptr_adjust) > > >> BTF_ID_FLAGS(func, bpf_dynptr_is_null) > > >> BTF_ID_FLAGS(func, bpf_dynptr_is_rdonly) > > >> diff --git a/kernel/bpf/task_iter.c b/kernel/bpf/task_iter.c > > >> index b1bdba40b684..a6717a76c1e0 100644 > > >> --- a/kernel/bpf/task_iter.c > > >> +++ b/kernel/bpf/task_iter.c > > >> @@ -862,6 +862,37 @@ __bpf_kfunc void bpf_iter_css_task_destroy(struct bpf_iter_css_task *it) > > >> kfree(kit->css_it); > > >> } > > >> > > >> +struct bpf_iter_process_kern { > > >> + struct task_struct *tsk; > > >> +} __attribute__((aligned(8))); > > >> + > > >> +__bpf_kfunc int bpf_iter_process_new(struct bpf_iter_process *it) > > >> +{ > > >> + struct bpf_iter_process_kern *kit = (void *)it; > > >> + > > >> + BUILD_BUG_ON(sizeof(struct bpf_iter_process_kern) != sizeof(struct bpf_iter_process)); > > >> + BUILD_BUG_ON(__alignof__(struct bpf_iter_process_kern) != > > >> + __alignof__(struct bpf_iter_process)); > > >> + > > >> + rcu_read_lock(); > > >> + kit->tsk = &init_task; > > >> + return 0; > > >> +} > > >> + > > >> +__bpf_kfunc struct task_struct *bpf_iter_process_next(struct bpf_iter_process *it) > > >> +{ > > >> + struct bpf_iter_process_kern *kit = (void *)it; > > >> + > > >> + kit->tsk = next_task(kit->tsk); > > >> + > > >> + return kit->tsk == &init_task ? NULL : kit->tsk; > > >> +} > > >> + > > >> +__bpf_kfunc void bpf_iter_process_destroy(struct bpf_iter_process *it) > > >> +{ > > >> + rcu_read_unlock(); > > >> +} > > > > > > This iter can be used in all ctx-s which is nice, but let's > > > make the verifier enforce rcu_read_lock/unlock done by bpf prog > > > instead of doing in the ctor/dtor of iter, since > > > in sleepable progs the verifier won't recognize that body is RCU CS. > > > We'd need to teach the verifier to allow bpf_iter_process_new() > > > inside in_rcu_cs() and make sure there is no rcu_read_unlock > > > while BPF_ITER_STATE_ACTIVE. > > > bpf_iter_process_destroy() would become a nop. > > > > Thanks for your review! > > > > I think bpf_iter_process_{new, next, destroy} should be protected by > > bpf_rcu_read_lock/unlock explicitly whether the prog is sleepable or > > not, right? > > Correct. By explicit bpf_rcu_read_lock() in case of sleepable progs > or just by using them in normal bpf progs that have implicit rcu_read_lock() > done before calling into them. > > > I'm not very familiar with the BPF verifier, but I believe > > there is still a risk in directly calling these kfuns even if > > in_rcu_cs() is true. > > > > Maby what we actually need here is to enforce BPF verifier to check > > env->cur_state->active_rcu_lock is true when we want to call these kfuncs. > > active_rcu_lock means explicit bpf_rcu_read_lock. > Currently we do allow bpf_rcu_read_lock in non-sleepable, but it's pointless. > > Technically we can extend the check: > if (in_rbtree_lock_required_cb(env) && (rcu_lock || > rcu_unlock)) { > verbose(env, "Calling > bpf_rcu_read_{lock,unlock} in unnecessary rbtree callback\n"); > return -EACCES; > } > to discourage their use in all non-sleepable, but it will break some progs. > > I think it's ok to check in_rcu_cs() to allow bpf_iter_process_*(). > If bpf prog adds explicit and unnecessary bpf_rcu_read_lock() around > the iter ops it won't do any harm. > Just need to make sure that rcu unlock logic: > } else if (rcu_unlock) { > bpf_for_each_reg_in_vstate(env->cur_state, > state, reg, ({ > if (reg->type & MEM_RCU) { > reg->type &= ~(MEM_RCU | > PTR_MAYBE_NULL); > reg->type |= PTR_UNTRUSTED; > } > })); > clears iter state that depends on rcu. > > I thought about changing mark_stack_slots_iter() to do > st->type = PTR_TO_STACK | MEM_RCU; > so that the above clearing logic kicks in, > but it might be better to have something iter specific. > is_iter_reg_valid_init() should probably be changed to > make sure reg->type is not UNTRUSTED. > > Andrii, > do you have better suggestions? What if we just remember inside bpf_reg_state.iter state whether iterator needs to be RCU protected (it's just one bit if we don't allow nesting rcu_read_lock()/rcu_read_unlock(), or we'd need to remember RCU nestedness level), and then when validating iter_next and iter_destroy() kfuncs, check that we are still in RCU-protected region (if we have nestedness, then iter->rcu_nest_level <= cur_rcu_nest_level, if I understand correctly). And if not, provide a clear and nice message. That seems straightforward enough, but am I missing anything subtle?
On Wed, 13 Sept 2023 at 00:12, Andrii Nakryiko <andrii.nakryiko@gmail.com> wrote: > > On Wed, Sep 6, 2023 at 10:18 AM Alexei Starovoitov > <alexei.starovoitov@gmail.com> wrote: > > > > On Wed, Sep 6, 2023 at 5:38 AM Chuyi Zhou <zhouchuyi@bytedance.com> wrote: > > > > > > Hello, Alexei. > > > > > > 在 2023/9/6 04:09, Alexei Starovoitov 写道: > > > > On Sun, Aug 27, 2023 at 12:21 AM Chuyi Zhou <zhouchuyi@bytedance.com> wrote: > > > >> > > > >> This patch adds kfuncs bpf_iter_process_{new,next,destroy} which allow > > > >> creation and manipulation of struct bpf_iter_process in open-coded iterator > > > >> style. BPF programs can use these kfuncs or through bpf_for_each macro to > > > >> iterate all processes in the system. > > > >> > > > >> Signed-off-by: Chuyi Zhou <zhouchuyi@bytedance.com> > > > >> --- > > > >> include/uapi/linux/bpf.h | 4 ++++ > > > >> kernel/bpf/helpers.c | 3 +++ > > > >> kernel/bpf/task_iter.c | 31 +++++++++++++++++++++++++++++++ > > > >> tools/include/uapi/linux/bpf.h | 4 ++++ > > > >> tools/lib/bpf/bpf_helpers.h | 5 +++++ > > > >> 5 files changed, 47 insertions(+) > > > >> > > > >> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h > > > >> index 2a6e9b99564b..cfbd527e3733 100644 > > > >> --- a/include/uapi/linux/bpf.h > > > >> +++ b/include/uapi/linux/bpf.h > > > >> @@ -7199,4 +7199,8 @@ struct bpf_iter_css_task { > > > >> __u64 __opaque[1]; > > > >> } __attribute__((aligned(8))); > > > >> > > > >> +struct bpf_iter_process { > > > >> + __u64 __opaque[1]; > > > >> +} __attribute__((aligned(8))); > > > >> + > > > >> #endif /* _UAPI__LINUX_BPF_H__ */ > > > >> diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c > > > >> index cf113ad24837..81a2005edc26 100644 > > > >> --- a/kernel/bpf/helpers.c > > > >> +++ b/kernel/bpf/helpers.c > > > >> @@ -2458,6 +2458,9 @@ BTF_ID_FLAGS(func, bpf_iter_num_destroy, KF_ITER_DESTROY) > > > >> BTF_ID_FLAGS(func, bpf_iter_css_task_new, KF_ITER_NEW) > > > >> BTF_ID_FLAGS(func, bpf_iter_css_task_next, KF_ITER_NEXT | KF_RET_NULL) > > > >> BTF_ID_FLAGS(func, bpf_iter_css_task_destroy, KF_ITER_DESTROY) > > > >> +BTF_ID_FLAGS(func, bpf_iter_process_new, KF_ITER_NEW) > > > >> +BTF_ID_FLAGS(func, bpf_iter_process_next, KF_ITER_NEXT | KF_RET_NULL) > > > >> +BTF_ID_FLAGS(func, bpf_iter_process_destroy, KF_ITER_DESTROY) > > > >> BTF_ID_FLAGS(func, bpf_dynptr_adjust) > > > >> BTF_ID_FLAGS(func, bpf_dynptr_is_null) > > > >> BTF_ID_FLAGS(func, bpf_dynptr_is_rdonly) > > > >> diff --git a/kernel/bpf/task_iter.c b/kernel/bpf/task_iter.c > > > >> index b1bdba40b684..a6717a76c1e0 100644 > > > >> --- a/kernel/bpf/task_iter.c > > > >> +++ b/kernel/bpf/task_iter.c > > > >> @@ -862,6 +862,37 @@ __bpf_kfunc void bpf_iter_css_task_destroy(struct bpf_iter_css_task *it) > > > >> kfree(kit->css_it); > > > >> } > > > >> > > > >> +struct bpf_iter_process_kern { > > > >> + struct task_struct *tsk; > > > >> +} __attribute__((aligned(8))); > > > >> + > > > >> +__bpf_kfunc int bpf_iter_process_new(struct bpf_iter_process *it) > > > >> +{ > > > >> + struct bpf_iter_process_kern *kit = (void *)it; > > > >> + > > > >> + BUILD_BUG_ON(sizeof(struct bpf_iter_process_kern) != sizeof(struct bpf_iter_process)); > > > >> + BUILD_BUG_ON(__alignof__(struct bpf_iter_process_kern) != > > > >> + __alignof__(struct bpf_iter_process)); > > > >> + > > > >> + rcu_read_lock(); > > > >> + kit->tsk = &init_task; > > > >> + return 0; > > > >> +} > > > >> + > > > >> +__bpf_kfunc struct task_struct *bpf_iter_process_next(struct bpf_iter_process *it) > > > >> +{ > > > >> + struct bpf_iter_process_kern *kit = (void *)it; > > > >> + > > > >> + kit->tsk = next_task(kit->tsk); > > > >> + > > > >> + return kit->tsk == &init_task ? NULL : kit->tsk; > > > >> +} > > > >> + > > > >> +__bpf_kfunc void bpf_iter_process_destroy(struct bpf_iter_process *it) > > > >> +{ > > > >> + rcu_read_unlock(); > > > >> +} > > > > > > > > This iter can be used in all ctx-s which is nice, but let's > > > > make the verifier enforce rcu_read_lock/unlock done by bpf prog > > > > instead of doing in the ctor/dtor of iter, since > > > > in sleepable progs the verifier won't recognize that body is RCU CS. > > > > We'd need to teach the verifier to allow bpf_iter_process_new() > > > > inside in_rcu_cs() and make sure there is no rcu_read_unlock > > > > while BPF_ITER_STATE_ACTIVE. > > > > bpf_iter_process_destroy() would become a nop. > > > > > > Thanks for your review! > > > > > > I think bpf_iter_process_{new, next, destroy} should be protected by > > > bpf_rcu_read_lock/unlock explicitly whether the prog is sleepable or > > > not, right? > > > > Correct. By explicit bpf_rcu_read_lock() in case of sleepable progs > > or just by using them in normal bpf progs that have implicit rcu_read_lock() > > done before calling into them. > > > > > I'm not very familiar with the BPF verifier, but I believe > > > there is still a risk in directly calling these kfuns even if > > > in_rcu_cs() is true. > > > > > > Maby what we actually need here is to enforce BPF verifier to check > > > env->cur_state->active_rcu_lock is true when we want to call these kfuncs. > > > > active_rcu_lock means explicit bpf_rcu_read_lock. > > Currently we do allow bpf_rcu_read_lock in non-sleepable, but it's pointless. > > > > Technically we can extend the check: > > if (in_rbtree_lock_required_cb(env) && (rcu_lock || > > rcu_unlock)) { > > verbose(env, "Calling > > bpf_rcu_read_{lock,unlock} in unnecessary rbtree callback\n"); > > return -EACCES; > > } > > to discourage their use in all non-sleepable, but it will break some progs. > > > > I think it's ok to check in_rcu_cs() to allow bpf_iter_process_*(). > > If bpf prog adds explicit and unnecessary bpf_rcu_read_lock() around > > the iter ops it won't do any harm. > > Just need to make sure that rcu unlock logic: > > } else if (rcu_unlock) { > > bpf_for_each_reg_in_vstate(env->cur_state, > > state, reg, ({ > > if (reg->type & MEM_RCU) { > > reg->type &= ~(MEM_RCU | > > PTR_MAYBE_NULL); > > reg->type |= PTR_UNTRUSTED; > > } > > })); > > clears iter state that depends on rcu. > > > > I thought about changing mark_stack_slots_iter() to do > > st->type = PTR_TO_STACK | MEM_RCU; > > so that the above clearing logic kicks in, > > but it might be better to have something iter specific. > > is_iter_reg_valid_init() should probably be changed to > > make sure reg->type is not UNTRUSTED. > > > > Andrii, > > do you have better suggestions? > > What if we just remember inside bpf_reg_state.iter state whether > iterator needs to be RCU protected (it's just one bit if we don't > allow nesting rcu_read_lock()/rcu_read_unlock(), or we'd need to > remember RCU nestedness level), and then when validating iter_next and > iter_destroy() kfuncs, check that we are still in RCU-protected region > (if we have nestedness, then iter->rcu_nest_level <= > cur_rcu_nest_level, if I understand correctly). And if not, provide a > clear and nice message. > > That seems straightforward enough, but am I missing anything subtle? > We also need to ensure one does not do a bpf_rcu_read_unlock and bpf_rcu_read_lock again between the iter_new and iter_next/iter_destroy calls. Simply checking we are in an RCU protected region will pass the verifier in such a case. A simple solution might be associating an ID with the RCU CS, so make active_rcu_lock a 32-bit ID which is monotonically increasing for each new RCU region. Ofcourse, all of this only matters for sleepable programs. Then check if id recorded in iter state is same on next and destroy.
On Tue, Sep 12, 2023 at 3:21 PM Kumar Kartikeya Dwivedi <memxor@gmail.com> wrote: > > On Wed, 13 Sept 2023 at 00:12, Andrii Nakryiko > <andrii.nakryiko@gmail.com> wrote: > > > > On Wed, Sep 6, 2023 at 10:18 AM Alexei Starovoitov > > <alexei.starovoitov@gmail.com> wrote: > > > > > > On Wed, Sep 6, 2023 at 5:38 AM Chuyi Zhou <zhouchuyi@bytedance.com> wrote: > > > > > > > > Hello, Alexei. > > > > > > > > 在 2023/9/6 04:09, Alexei Starovoitov 写道: > > > > > On Sun, Aug 27, 2023 at 12:21 AM Chuyi Zhou <zhouchuyi@bytedance.com> wrote: > > > > >> > > > > >> This patch adds kfuncs bpf_iter_process_{new,next,destroy} which allow > > > > >> creation and manipulation of struct bpf_iter_process in open-coded iterator > > > > >> style. BPF programs can use these kfuncs or through bpf_for_each macro to > > > > >> iterate all processes in the system. > > > > >> > > > > >> Signed-off-by: Chuyi Zhou <zhouchuyi@bytedance.com> > > > > >> --- > > > > >> include/uapi/linux/bpf.h | 4 ++++ > > > > >> kernel/bpf/helpers.c | 3 +++ > > > > >> kernel/bpf/task_iter.c | 31 +++++++++++++++++++++++++++++++ > > > > >> tools/include/uapi/linux/bpf.h | 4 ++++ > > > > >> tools/lib/bpf/bpf_helpers.h | 5 +++++ > > > > >> 5 files changed, 47 insertions(+) > > > > >> > > > > >> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h > > > > >> index 2a6e9b99564b..cfbd527e3733 100644 > > > > >> --- a/include/uapi/linux/bpf.h > > > > >> +++ b/include/uapi/linux/bpf.h > > > > >> @@ -7199,4 +7199,8 @@ struct bpf_iter_css_task { > > > > >> __u64 __opaque[1]; > > > > >> } __attribute__((aligned(8))); > > > > >> > > > > >> +struct bpf_iter_process { > > > > >> + __u64 __opaque[1]; > > > > >> +} __attribute__((aligned(8))); > > > > >> + > > > > >> #endif /* _UAPI__LINUX_BPF_H__ */ > > > > >> diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c > > > > >> index cf113ad24837..81a2005edc26 100644 > > > > >> --- a/kernel/bpf/helpers.c > > > > >> +++ b/kernel/bpf/helpers.c > > > > >> @@ -2458,6 +2458,9 @@ BTF_ID_FLAGS(func, bpf_iter_num_destroy, KF_ITER_DESTROY) > > > > >> BTF_ID_FLAGS(func, bpf_iter_css_task_new, KF_ITER_NEW) > > > > >> BTF_ID_FLAGS(func, bpf_iter_css_task_next, KF_ITER_NEXT | KF_RET_NULL) > > > > >> BTF_ID_FLAGS(func, bpf_iter_css_task_destroy, KF_ITER_DESTROY) > > > > >> +BTF_ID_FLAGS(func, bpf_iter_process_new, KF_ITER_NEW) > > > > >> +BTF_ID_FLAGS(func, bpf_iter_process_next, KF_ITER_NEXT | KF_RET_NULL) > > > > >> +BTF_ID_FLAGS(func, bpf_iter_process_destroy, KF_ITER_DESTROY) > > > > >> BTF_ID_FLAGS(func, bpf_dynptr_adjust) > > > > >> BTF_ID_FLAGS(func, bpf_dynptr_is_null) > > > > >> BTF_ID_FLAGS(func, bpf_dynptr_is_rdonly) > > > > >> diff --git a/kernel/bpf/task_iter.c b/kernel/bpf/task_iter.c > > > > >> index b1bdba40b684..a6717a76c1e0 100644 > > > > >> --- a/kernel/bpf/task_iter.c > > > > >> +++ b/kernel/bpf/task_iter.c > > > > >> @@ -862,6 +862,37 @@ __bpf_kfunc void bpf_iter_css_task_destroy(struct bpf_iter_css_task *it) > > > > >> kfree(kit->css_it); > > > > >> } > > > > >> > > > > >> +struct bpf_iter_process_kern { > > > > >> + struct task_struct *tsk; > > > > >> +} __attribute__((aligned(8))); > > > > >> + > > > > >> +__bpf_kfunc int bpf_iter_process_new(struct bpf_iter_process *it) > > > > >> +{ > > > > >> + struct bpf_iter_process_kern *kit = (void *)it; > > > > >> + > > > > >> + BUILD_BUG_ON(sizeof(struct bpf_iter_process_kern) != sizeof(struct bpf_iter_process)); > > > > >> + BUILD_BUG_ON(__alignof__(struct bpf_iter_process_kern) != > > > > >> + __alignof__(struct bpf_iter_process)); > > > > >> + > > > > >> + rcu_read_lock(); > > > > >> + kit->tsk = &init_task; > > > > >> + return 0; > > > > >> +} > > > > >> + > > > > >> +__bpf_kfunc struct task_struct *bpf_iter_process_next(struct bpf_iter_process *it) > > > > >> +{ > > > > >> + struct bpf_iter_process_kern *kit = (void *)it; > > > > >> + > > > > >> + kit->tsk = next_task(kit->tsk); > > > > >> + > > > > >> + return kit->tsk == &init_task ? NULL : kit->tsk; > > > > >> +} > > > > >> + > > > > >> +__bpf_kfunc void bpf_iter_process_destroy(struct bpf_iter_process *it) > > > > >> +{ > > > > >> + rcu_read_unlock(); > > > > >> +} > > > > > > > > > > This iter can be used in all ctx-s which is nice, but let's > > > > > make the verifier enforce rcu_read_lock/unlock done by bpf prog > > > > > instead of doing in the ctor/dtor of iter, since > > > > > in sleepable progs the verifier won't recognize that body is RCU CS. > > > > > We'd need to teach the verifier to allow bpf_iter_process_new() > > > > > inside in_rcu_cs() and make sure there is no rcu_read_unlock > > > > > while BPF_ITER_STATE_ACTIVE. > > > > > bpf_iter_process_destroy() would become a nop. > > > > > > > > Thanks for your review! > > > > > > > > I think bpf_iter_process_{new, next, destroy} should be protected by > > > > bpf_rcu_read_lock/unlock explicitly whether the prog is sleepable or > > > > not, right? > > > > > > Correct. By explicit bpf_rcu_read_lock() in case of sleepable progs > > > or just by using them in normal bpf progs that have implicit rcu_read_lock() > > > done before calling into them. > > > > > > > I'm not very familiar with the BPF verifier, but I believe > > > > there is still a risk in directly calling these kfuns even if > > > > in_rcu_cs() is true. > > > > > > > > Maby what we actually need here is to enforce BPF verifier to check > > > > env->cur_state->active_rcu_lock is true when we want to call these kfuncs. > > > > > > active_rcu_lock means explicit bpf_rcu_read_lock. > > > Currently we do allow bpf_rcu_read_lock in non-sleepable, but it's pointless. > > > > > > Technically we can extend the check: > > > if (in_rbtree_lock_required_cb(env) && (rcu_lock || > > > rcu_unlock)) { > > > verbose(env, "Calling > > > bpf_rcu_read_{lock,unlock} in unnecessary rbtree callback\n"); > > > return -EACCES; > > > } > > > to discourage their use in all non-sleepable, but it will break some progs. > > > > > > I think it's ok to check in_rcu_cs() to allow bpf_iter_process_*(). > > > If bpf prog adds explicit and unnecessary bpf_rcu_read_lock() around > > > the iter ops it won't do any harm. > > > Just need to make sure that rcu unlock logic: > > > } else if (rcu_unlock) { > > > bpf_for_each_reg_in_vstate(env->cur_state, > > > state, reg, ({ > > > if (reg->type & MEM_RCU) { > > > reg->type &= ~(MEM_RCU | > > > PTR_MAYBE_NULL); > > > reg->type |= PTR_UNTRUSTED; > > > } > > > })); > > > clears iter state that depends on rcu. > > > > > > I thought about changing mark_stack_slots_iter() to do > > > st->type = PTR_TO_STACK | MEM_RCU; > > > so that the above clearing logic kicks in, > > > but it might be better to have something iter specific. > > > is_iter_reg_valid_init() should probably be changed to > > > make sure reg->type is not UNTRUSTED. > > > > > > Andrii, > > > do you have better suggestions? > > > > What if we just remember inside bpf_reg_state.iter state whether > > iterator needs to be RCU protected (it's just one bit if we don't > > allow nesting rcu_read_lock()/rcu_read_unlock(), or we'd need to > > remember RCU nestedness level), and then when validating iter_next and > > iter_destroy() kfuncs, check that we are still in RCU-protected region > > (if we have nestedness, then iter->rcu_nest_level <= > > cur_rcu_nest_level, if I understand correctly). And if not, provide a > > clear and nice message. > > > > That seems straightforward enough, but am I missing anything subtle? > > > > We also need to ensure one does not do a bpf_rcu_read_unlock and > bpf_rcu_read_lock again between the iter_new and > iter_next/iter_destroy calls. Simply checking we are in an RCU > protected region will pass the verifier in such a case. Yep, you are right, what I proposed is too naive, of course. > > A simple solution might be associating an ID with the RCU CS, so make > active_rcu_lock a 32-bit ID which is monotonically increasing for each > new RCU region. Ofcourse, all of this only matters for sleepable > programs. Then check if id recorded in iter state is same on next and > destroy. Yep, I think each RCU region should ideally be tracked separately and get a unique ID. Kind of like a ref. It is some lifetime/scope, not necessarily an actual kernel object. And if/when we have it, we can grab the ID of most nested RCU scope, associate it with RCU-protected iter, and then make sure that this RCU scope is active at every next/destroy invocation.
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 2a6e9b99564b..cfbd527e3733 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -7199,4 +7199,8 @@ struct bpf_iter_css_task { __u64 __opaque[1]; } __attribute__((aligned(8))); +struct bpf_iter_process { + __u64 __opaque[1]; +} __attribute__((aligned(8))); + #endif /* _UAPI__LINUX_BPF_H__ */ diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index cf113ad24837..81a2005edc26 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -2458,6 +2458,9 @@ BTF_ID_FLAGS(func, bpf_iter_num_destroy, KF_ITER_DESTROY) BTF_ID_FLAGS(func, bpf_iter_css_task_new, KF_ITER_NEW) BTF_ID_FLAGS(func, bpf_iter_css_task_next, KF_ITER_NEXT | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_iter_css_task_destroy, KF_ITER_DESTROY) +BTF_ID_FLAGS(func, bpf_iter_process_new, KF_ITER_NEW) +BTF_ID_FLAGS(func, bpf_iter_process_next, KF_ITER_NEXT | KF_RET_NULL) +BTF_ID_FLAGS(func, bpf_iter_process_destroy, KF_ITER_DESTROY) BTF_ID_FLAGS(func, bpf_dynptr_adjust) BTF_ID_FLAGS(func, bpf_dynptr_is_null) BTF_ID_FLAGS(func, bpf_dynptr_is_rdonly) diff --git a/kernel/bpf/task_iter.c b/kernel/bpf/task_iter.c index b1bdba40b684..a6717a76c1e0 100644 --- a/kernel/bpf/task_iter.c +++ b/kernel/bpf/task_iter.c @@ -862,6 +862,37 @@ __bpf_kfunc void bpf_iter_css_task_destroy(struct bpf_iter_css_task *it) kfree(kit->css_it); } +struct bpf_iter_process_kern { + struct task_struct *tsk; +} __attribute__((aligned(8))); + +__bpf_kfunc int bpf_iter_process_new(struct bpf_iter_process *it) +{ + struct bpf_iter_process_kern *kit = (void *)it; + + BUILD_BUG_ON(sizeof(struct bpf_iter_process_kern) != sizeof(struct bpf_iter_process)); + BUILD_BUG_ON(__alignof__(struct bpf_iter_process_kern) != + __alignof__(struct bpf_iter_process)); + + rcu_read_lock(); + kit->tsk = &init_task; + return 0; +} + +__bpf_kfunc struct task_struct *bpf_iter_process_next(struct bpf_iter_process *it) +{ + struct bpf_iter_process_kern *kit = (void *)it; + + kit->tsk = next_task(kit->tsk); + + return kit->tsk == &init_task ? NULL : kit->tsk; +} + +__bpf_kfunc void bpf_iter_process_destroy(struct bpf_iter_process *it) +{ + rcu_read_unlock(); +} + DEFINE_PER_CPU(struct mmap_unlock_irq_work, mmap_unlock_work); static void do_mmap_read_unlock(struct irq_work *entry) diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 2a6e9b99564b..cfbd527e3733 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -7199,4 +7199,8 @@ struct bpf_iter_css_task { __u64 __opaque[1]; } __attribute__((aligned(8))); +struct bpf_iter_process { + __u64 __opaque[1]; +} __attribute__((aligned(8))); + #endif /* _UAPI__LINUX_BPF_H__ */ diff --git a/tools/lib/bpf/bpf_helpers.h b/tools/lib/bpf/bpf_helpers.h index f4d74b2aaddd..7d6a828d98b5 100644 --- a/tools/lib/bpf/bpf_helpers.h +++ b/tools/lib/bpf/bpf_helpers.h @@ -309,6 +309,11 @@ extern int bpf_iter_css_task_new(struct bpf_iter_css_task *it, extern struct task_struct *bpf_iter_css_task_next(struct bpf_iter_css_task *it) __weak __ksym; extern void bpf_iter_css_task_destroy(struct bpf_iter_css_task *it) __weak __ksym; +struct bpf_iter_process; +extern int bpf_iter_process_new(struct bpf_iter_process *it) __weak __ksym; +extern struct task_struct *bpf_iter_process_next(struct bpf_iter_process *it) __weak __ksym; +extern void bpf_iter_process_destroy(struct bpf_iter_process *it) __weak __ksym; + #ifndef bpf_for_each /* bpf_for_each(iter_type, cur_elem, args...) provides generic construct for * using BPF open-coded iterators without having to write mundane explicit
This patch adds kfuncs bpf_iter_process_{new,next,destroy} which allow creation and manipulation of struct bpf_iter_process in open-coded iterator style. BPF programs can use these kfuncs or through bpf_for_each macro to iterate all processes in the system. Signed-off-by: Chuyi Zhou <zhouchuyi@bytedance.com> --- include/uapi/linux/bpf.h | 4 ++++ kernel/bpf/helpers.c | 3 +++ kernel/bpf/task_iter.c | 31 +++++++++++++++++++++++++++++++ tools/include/uapi/linux/bpf.h | 4 ++++ tools/lib/bpf/bpf_helpers.h | 5 +++++ 5 files changed, 47 insertions(+)