Message ID | 20220801205039.2755281-1-haoluo@google.com (mailing list archive) |
---|---|
State | Changes Requested |
Delegated to: | BPF |
Headers | show |
Series | [bpf-next,v1] bpf, iter: clean up bpf_seq_read(). | expand |
On Mon, Aug 01, 2022 at 01:50:39PM -0700, Hao Luo wrote: SNIP > +static int do_seq_show(struct seq_file *seq, void *p, size_t offs) > +{ > + int err; > + > + WARN_ON(IS_ERR_OR_NULL(p)); > + > + err = seq->op->show(seq, p); > + if (err > 0) { > + /* object is skipped, decrease seq_num, so next > + * valid object can reuse the same seq_num. > + */ > + bpf_iter_dec_seq_num(seq); > + seq->count = offs; > + return err; > + } > + > + if (err < 0 || seq_has_overflowed(seq)) { > + seq->count = offs; > + return err ? err : -E2BIG; > + } > + > + /* err == 0 and no overflow */ > + return 0; > +} > + > +/* do_seq_stop, stops at the given object 'p'. 'p' could be an ERR or NULL. If > + * 'p' is an ERR or there was an overflow, reset seq->count to 'offs' and > + * returns error. Returns 0 otherwise. > + */ > +static int do_seq_stop(struct seq_file *seq, void *p, size_t offs) > +{ > + if (IS_ERR(p)) { > + seq->op->stop(seq, NULL); > + seq->count = offs; should we set seq->count to 0 in case of error? jirka > + return PTR_ERR(p); > + } > + > + seq->op->stop(seq, p); > + if (!p) { > + if (!seq_has_overflowed(seq)) { > + bpf_iter_done_stop(seq); > + } else { > + seq->count = offs; > + if (offs == 0) > + return -E2BIG; > + } > + } > + return 0; > +} > + > /* maximum visited objects before bailing out */ > #define MAX_ITER_OBJECTS 1000000 > SNIP
On Tue, Aug 2, 2022 at 4:14 AM Jiri Olsa <olsajiri@gmail.com> wrote: > > On Mon, Aug 01, 2022 at 01:50:39PM -0700, Hao Luo wrote: > > SNIP > > > +static int do_seq_show(struct seq_file *seq, void *p, size_t offs) > > +{ > > + int err; > > + > > + WARN_ON(IS_ERR_OR_NULL(p)); > > + > > + err = seq->op->show(seq, p); > > + if (err > 0) { > > + /* object is skipped, decrease seq_num, so next > > + * valid object can reuse the same seq_num. > > + */ > > + bpf_iter_dec_seq_num(seq); > > + seq->count = offs; > > + return err; > > + } > > + > > + if (err < 0 || seq_has_overflowed(seq)) { > > + seq->count = offs; > > + return err ? err : -E2BIG; > > + } > > + > > + /* err == 0 and no overflow */ > > + return 0; > > +} > > + > > +/* do_seq_stop, stops at the given object 'p'. 'p' could be an ERR or NULL. If > > + * 'p' is an ERR or there was an overflow, reset seq->count to 'offs' and > > + * returns error. Returns 0 otherwise. > > + */ > > +static int do_seq_stop(struct seq_file *seq, void *p, size_t offs) > > +{ > > + if (IS_ERR(p)) { > > + seq->op->stop(seq, NULL); > > + seq->count = offs; > > should we set seq->count to 0 in case of error? > Thanks Jiri. To be honest, I don't know. There are two paths that may lead to an error "p". First, seq->op->start() could return ERR, in that case, '"offs'" is zero and we set it to zero already. This is fine. The other one, seq->op->next() could return ERR. This is a case where bpf_seq_read() fails to handle right now, so I am not sure what to do. Based on my understanding reading the code, if seq->count isn't zeroed, the current read() will not copy data, but the next read() will copy data (see the "if (seq->count)" at the beginning of bpf_seq_read). If seq->count is zeroed, the data in buffer will be discarded. I don't know what is right. > jirka > > > + return PTR_ERR(p); > > + } > > + > > + seq->op->stop(seq, p); > > + if (!p) { > > + if (!seq_has_overflowed(seq)) { > > + bpf_iter_done_stop(seq); > > + } else { > > + seq->count = offs; > > + if (offs == 0) > > + return -E2BIG; > > + } > > + } > > + return 0; > > +} > > + > > /* maximum visited objects before bailing out */ > > #define MAX_ITER_OBJECTS 1000000 > > > > SNIP
On Tue, Aug 02, 2022 at 05:15:50PM -0700, Hao Luo wrote: > On Tue, Aug 2, 2022 at 4:14 AM Jiri Olsa <olsajiri@gmail.com> wrote: > > > > On Mon, Aug 01, 2022 at 01:50:39PM -0700, Hao Luo wrote: > > > > SNIP > > > > > +static int do_seq_show(struct seq_file *seq, void *p, size_t offs) > > > +{ > > > + int err; > > > + > > > + WARN_ON(IS_ERR_OR_NULL(p)); > > > + > > > + err = seq->op->show(seq, p); > > > + if (err > 0) { > > > + /* object is skipped, decrease seq_num, so next > > > + * valid object can reuse the same seq_num. > > > + */ > > > + bpf_iter_dec_seq_num(seq); > > > + seq->count = offs; > > > + return err; > > > + } > > > + > > > + if (err < 0 || seq_has_overflowed(seq)) { > > > + seq->count = offs; > > > + return err ? err : -E2BIG; > > > + } > > > + > > > + /* err == 0 and no overflow */ > > > + return 0; > > > +} > > > + > > > +/* do_seq_stop, stops at the given object 'p'. 'p' could be an ERR or NULL. If > > > + * 'p' is an ERR or there was an overflow, reset seq->count to 'offs' and > > > + * returns error. Returns 0 otherwise. > > > + */ > > > +static int do_seq_stop(struct seq_file *seq, void *p, size_t offs) > > > +{ > > > + if (IS_ERR(p)) { > > > + seq->op->stop(seq, NULL); > > > + seq->count = offs; > > > > should we set seq->count to 0 in case of error? > > > > Thanks Jiri. To be honest, I don't know. There are two paths that may > lead to an error "p". > > First, seq->op->start() could return ERR, in that case, '"offs'" is > zero and we set it to zero already. This is fine. ah right, offs is zero at that time, looks good then > > The other one, seq->op->next() could return ERR. This is a case where > bpf_seq_read() fails to handle right now, so I am not sure what to do. but maybe we don't need to set seq->count in here, like: static int do_seq_stop(struct seq_file *seq, void *p, size_t offs) { if (IS_ERR(p)) { seq->op->stop(seq, NULL); return PTR_ERR(p); } because it's already set by error path of do_seq_show > > Based on my understanding reading the code, if seq->count isn't > zeroed, the current read() will not copy data, but the next read() > will copy data (see the "if (seq->count)" at the beginning of > bpf_seq_read). If seq->count is zeroed, the data in buffer will be > discarded. I don't know what is right. I think we should return the buffer up to the point there's an error, that's why we set seq->count to previous offs value after failed show callback jirka
On Mon, Aug 01, 2022 at 01:50:39PM -0700, Hao Luo wrote: SNIP > - err = seq->op->show(seq, p); > - if (err > 0) { > - /* object is skipped, decrease seq_num, so next > - * valid object can reuse the same seq_num. > - */ > - bpf_iter_dec_seq_num(seq); > - seq->count = 0; > - } else if (err < 0 || seq_has_overflowed(seq)) { > - if (!err) > - err = -E2BIG; > - seq->op->stop(seq, p); > + err = do_seq_show(seq, p, 0); > + if (err < 0) { > + do_seq_stop(seq, p, 0); > seq->count = 0; > goto done; > } > @@ -153,7 +208,7 @@ static ssize_t bpf_seq_read(struct file *file, char __user *buf, size_t size, > num_objs++; > offs = seq->count; > p = seq->op->next(seq, p, &seq->index); > - if (pos == seq->index) { > + if (unlikely(pos == seq->index)) { > pr_info_ratelimited("buggy seq_file .next function %ps " > "did not updated position index\n", > seq->op->next); > @@ -161,7 +216,7 @@ static ssize_t bpf_seq_read(struct file *file, char __user *buf, size_t size, > } > > if (IS_ERR_OR_NULL(p)) > - break; > + goto stop; we could still keep the break here > > /* got a valid next object, increase seq_num */ > bpf_iter_inc_seq_num(seq); > @@ -172,22 +227,16 @@ static ssize_t bpf_seq_read(struct file *file, char __user *buf, size_t size, > if (num_objs >= MAX_ITER_OBJECTS) { > if (offs == 0) { > err = -EAGAIN; > - seq->op->stop(seq, p); > + do_seq_stop(seq, p, seq->count); > goto done; > } > break; > } > > - err = seq->op->show(seq, p); > - if (err > 0) { > - bpf_iter_dec_seq_num(seq); > - seq->count = offs; > - } else if (err < 0 || seq_has_overflowed(seq)) { > - seq->count = offs; > + err = do_seq_show(seq, p, offs); > + if (err < 0) { > if (offs == 0) { > - if (!err) > - err = -E2BIG; > - seq->op->stop(seq, p); > + do_seq_stop(seq, p, seq->count); > goto done; > } > break; > @@ -197,30 +246,11 @@ static ssize_t bpf_seq_read(struct file *file, char __user *buf, size_t size, > cond_resched(); > } > stop: > - offs = seq->count; > - /* bpf program called if !p */ > - seq->op->stop(seq, p); > - if (!p) { > - if (!seq_has_overflowed(seq)) { > - bpf_iter_done_stop(seq); > - } else { > - seq->count = offs; > - if (offs == 0) { > - err = -E2BIG; > - goto done; > - } > - } > - } > - > - n = min(seq->count, size); > - err = copy_to_user(buf, seq->buf, n); > - if (err) { > - err = -EFAULT; > + err = do_seq_stop(seq, p, seq->count); > + if (err) > goto done; looks like we tried to copy the data before when stop failed, now it's skipped jirka > - } > - copied = n; > - seq->count -= n; > - seq->from = n; > + > + err = do_copy_to_user(seq, buf, size, &copied); > done: > if (!copied) > copied = err; > -- > 2.37.1.455.g008518b4e5-goog >
diff --git a/kernel/bpf/bpf_iter.c b/kernel/bpf/bpf_iter.c index 7e8fd49406f6..39b5b647fdb7 100644 --- a/kernel/bpf/bpf_iter.c +++ b/kernel/bpf/bpf_iter.c @@ -77,6 +77,83 @@ static bool bpf_iter_support_resched(struct seq_file *seq) return iter_priv->tinfo->reg_info->feature & BPF_ITER_RESCHED; } +/* do_copy_to_user, copies seq->buf at offset seq->from to userspace and + * updates corresponding fields in seq. It returns -EFAULT if any error + * happens. The actual number of bytes copied is returned via the argument + * 'copied'. + */ +static int do_copy_to_user(struct seq_file *seq, char __user *buf, size_t size, + size_t *copied) +{ + size_t n; + + n = min(seq->count, size); + if (copy_to_user(buf, seq->buf + seq->from, n)) + return -EFAULT; + + seq->count -= n; + seq->from += n; + *copied = n; + return 0; +} + +/* do_seq_show, shows the given object 'p'. If 'p' is skipped or + * error happens, resets seq->count to 'offs'. + * + * Returns err > 0, indicating show() skips this object. + * Returns err = 0, indicating show() succeeds. + * Returns err < 0, indicating show() fails or overflow happened. + */ +static int do_seq_show(struct seq_file *seq, void *p, size_t offs) +{ + int err; + + WARN_ON(IS_ERR_OR_NULL(p)); + + err = seq->op->show(seq, p); + if (err > 0) { + /* object is skipped, decrease seq_num, so next + * valid object can reuse the same seq_num. + */ + bpf_iter_dec_seq_num(seq); + seq->count = offs; + return err; + } + + if (err < 0 || seq_has_overflowed(seq)) { + seq->count = offs; + return err ? err : -E2BIG; + } + + /* err == 0 and no overflow */ + return 0; +} + +/* do_seq_stop, stops at the given object 'p'. 'p' could be an ERR or NULL. If + * 'p' is an ERR or there was an overflow, reset seq->count to 'offs' and + * returns error. Returns 0 otherwise. + */ +static int do_seq_stop(struct seq_file *seq, void *p, size_t offs) +{ + if (IS_ERR(p)) { + seq->op->stop(seq, NULL); + seq->count = offs; + return PTR_ERR(p); + } + + seq->op->stop(seq, p); + if (!p) { + if (!seq_has_overflowed(seq)) { + bpf_iter_done_stop(seq); + } else { + seq->count = offs; + if (offs == 0) + return -E2BIG; + } + } + return 0; +} + /* maximum visited objects before bailing out */ #define MAX_ITER_OBJECTS 1000000 @@ -91,7 +168,7 @@ static ssize_t bpf_seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos) { struct seq_file *seq = file->private_data; - size_t n, offs, copied = 0; + size_t offs, copied = 0; int err = 0, num_objs = 0; bool can_resched; void *p; @@ -108,40 +185,18 @@ static ssize_t bpf_seq_read(struct file *file, char __user *buf, size_t size, } if (seq->count) { - n = min(seq->count, size); - err = copy_to_user(buf, seq->buf + seq->from, n); - if (err) { - err = -EFAULT; - goto done; - } - seq->count -= n; - seq->from += n; - copied = n; + err = do_copy_to_user(seq, buf, size, &copied); goto done; } seq->from = 0; p = seq->op->start(seq, &seq->index); - if (!p) + if (IS_ERR_OR_NULL(p)) goto stop; - if (IS_ERR(p)) { - err = PTR_ERR(p); - seq->op->stop(seq, p); - seq->count = 0; - goto done; - } - err = seq->op->show(seq, p); - if (err > 0) { - /* object is skipped, decrease seq_num, so next - * valid object can reuse the same seq_num. - */ - bpf_iter_dec_seq_num(seq); - seq->count = 0; - } else if (err < 0 || seq_has_overflowed(seq)) { - if (!err) - err = -E2BIG; - seq->op->stop(seq, p); + err = do_seq_show(seq, p, 0); + if (err < 0) { + do_seq_stop(seq, p, 0); seq->count = 0; goto done; } @@ -153,7 +208,7 @@ static ssize_t bpf_seq_read(struct file *file, char __user *buf, size_t size, num_objs++; offs = seq->count; p = seq->op->next(seq, p, &seq->index); - if (pos == seq->index) { + if (unlikely(pos == seq->index)) { pr_info_ratelimited("buggy seq_file .next function %ps " "did not updated position index\n", seq->op->next); @@ -161,7 +216,7 @@ static ssize_t bpf_seq_read(struct file *file, char __user *buf, size_t size, } if (IS_ERR_OR_NULL(p)) - break; + goto stop; /* got a valid next object, increase seq_num */ bpf_iter_inc_seq_num(seq); @@ -172,22 +227,16 @@ static ssize_t bpf_seq_read(struct file *file, char __user *buf, size_t size, if (num_objs >= MAX_ITER_OBJECTS) { if (offs == 0) { err = -EAGAIN; - seq->op->stop(seq, p); + do_seq_stop(seq, p, seq->count); goto done; } break; } - err = seq->op->show(seq, p); - if (err > 0) { - bpf_iter_dec_seq_num(seq); - seq->count = offs; - } else if (err < 0 || seq_has_overflowed(seq)) { - seq->count = offs; + err = do_seq_show(seq, p, offs); + if (err < 0) { if (offs == 0) { - if (!err) - err = -E2BIG; - seq->op->stop(seq, p); + do_seq_stop(seq, p, seq->count); goto done; } break; @@ -197,30 +246,11 @@ static ssize_t bpf_seq_read(struct file *file, char __user *buf, size_t size, cond_resched(); } stop: - offs = seq->count; - /* bpf program called if !p */ - seq->op->stop(seq, p); - if (!p) { - if (!seq_has_overflowed(seq)) { - bpf_iter_done_stop(seq); - } else { - seq->count = offs; - if (offs == 0) { - err = -E2BIG; - goto done; - } - } - } - - n = min(seq->count, size); - err = copy_to_user(buf, seq->buf, n); - if (err) { - err = -EFAULT; + err = do_seq_stop(seq, p, seq->count); + if (err) goto done; - } - copied = n; - seq->count -= n; - seq->from = n; + + err = do_copy_to_user(seq, buf, size, &copied); done: if (!copied) copied = err;
Refactor bpf_seq_read() by extracting some common logic into helper functions. I hope this makes bpf_seq_read() more readable. This is a refactoring patch, so no behavior change is expected. Signed-off-by: Hao Luo <haoluo@google.com> --- kernel/bpf/bpf_iter.c | 156 +++++++++++++++++++++++++----------------- 1 file changed, 93 insertions(+), 63 deletions(-)