Message ID | 20210828052006.1313788-3-davemarchevsky@fb.com (mailing list archive) |
---|---|
State | Superseded |
Delegated to: | BPF |
Headers | show |
Series | bpf: implement variadic printk helper | expand |
On Fri, Aug 27, 2021 at 10:20 PM Dave Marchevsky <davemarchevsky@fb.com> wrote: > > This helper is meant to be "bpf_trace_printk, but with proper vararg > support". Follow bpf_snprintf's example and take a u64 pseudo-vararg > array. Write to /sys/kernel/debug/tracing/trace_pipe using the same > mechanism as bpf_trace_printk. > > Signed-off-by: Dave Marchevsky <davemarchevsky@fb.com> > --- LGTM. Acked-by: Andrii Nakryiko <andrii@kernel.org> > include/linux/bpf.h | 1 + > include/uapi/linux/bpf.h | 9 ++++++ > kernel/bpf/core.c | 5 ++++ > kernel/bpf/helpers.c | 2 ++ > kernel/trace/bpf_trace.c | 52 +++++++++++++++++++++++++++++++++- > tools/include/uapi/linux/bpf.h | 9 ++++++ > 6 files changed, 77 insertions(+), 1 deletion(-) > [...]
On 8/28/21 7:20 AM, Dave Marchevsky wrote: > This helper is meant to be "bpf_trace_printk, but with proper vararg > support". Follow bpf_snprintf's example and take a u64 pseudo-vararg > array. Write to /sys/kernel/debug/tracing/trace_pipe using the same > mechanism as bpf_trace_printk. > > Signed-off-by: Dave Marchevsky <davemarchevsky@fb.com> lgtm, minor comments below: > --- > include/linux/bpf.h | 1 + > include/uapi/linux/bpf.h | 9 ++++++ > kernel/bpf/core.c | 5 ++++ > kernel/bpf/helpers.c | 2 ++ > kernel/trace/bpf_trace.c | 52 +++++++++++++++++++++++++++++++++- > tools/include/uapi/linux/bpf.h | 9 ++++++ > 6 files changed, 77 insertions(+), 1 deletion(-) > > diff --git a/include/linux/bpf.h b/include/linux/bpf.h > index be8d57e6e78a..b6c45a6cbbba 100644 > --- a/include/linux/bpf.h > +++ b/include/linux/bpf.h > @@ -1088,6 +1088,7 @@ bool bpf_prog_array_compatible(struct bpf_array *array, const struct bpf_prog *f > int bpf_prog_calc_tag(struct bpf_prog *fp); > > const struct bpf_func_proto *bpf_get_trace_printk_proto(void); > +const struct bpf_func_proto *bpf_get_trace_vprintk_proto(void); > > typedef unsigned long (*bpf_ctx_copy_t)(void *dst, const void *src, > unsigned long off, unsigned long len); > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h > index 791f31dd0abe..f171d4d33136 100644 > --- a/include/uapi/linux/bpf.h > +++ b/include/uapi/linux/bpf.h > @@ -4877,6 +4877,14 @@ union bpf_attr { > * Get the struct pt_regs associated with **task**. > * Return > * A pointer to struct pt_regs. > + * > + * u64 bpf_trace_vprintk(const char *fmt, u32 fmt_size, const void *data, u32 data_len) s/u64/long/ > + * Description > + * Behaves like **bpf_trace_printk**\ () helper, but takes an array of u64 nit: maybe for users it's more clear from description if you instead mention that data_len needs to be multiple of 8 bytes? Or somehow mention the relation with data more clearly resp. which shortcoming it addresses compared to bpf_trace_printk(), so developers can more easily parse it. > + * to format. Arguments are to be used as in **bpf_seq_printf**\ () helper. > + * Return > + * The number of bytes written to the buffer, or a negative error > + * in case of failure. > */ [...] > default: > return NULL; > } > diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c > index 10672ebc63b7..ea8358b0c748 100644 > --- a/kernel/trace/bpf_trace.c > +++ b/kernel/trace/bpf_trace.c > @@ -398,7 +398,7 @@ static const struct bpf_func_proto bpf_trace_printk_proto = { > .arg2_type = ARG_CONST_SIZE, > }; > > -const struct bpf_func_proto *bpf_get_trace_printk_proto(void) > +static void __set_printk_clr_event(void) > { > /* > * This program might be calling bpf_trace_printk, > @@ -410,10 +410,58 @@ const struct bpf_func_proto *bpf_get_trace_printk_proto(void) > */ > if (trace_set_clr_event("bpf_trace", "bpf_trace_printk", 1)) > pr_warn_ratelimited("could not enable bpf_trace_printk events"); > +} > > +const struct bpf_func_proto *bpf_get_trace_printk_proto(void) > +{ > + __set_printk_clr_event(); > return &bpf_trace_printk_proto; > } > > +BPF_CALL_4(bpf_trace_vprintk, char *, fmt, u32, fmt_size, const void *, data, > + u32, data_len) > +{ > + static char buf[BPF_TRACE_PRINTK_SIZE]; > + unsigned long flags; > + int ret, num_args; > + u32 *bin_args; > + > + if (data_len & 7 || data_len > MAX_BPRINTF_VARARGS * 8 || > + (data_len && !data)) > + return -EINVAL; > + num_args = data_len / 8; > + > + ret = bpf_bprintf_prepare(fmt, fmt_size, data, &bin_args, num_args); > + if (ret < 0) > + return ret; Given you have ARG_PTR_TO_MEM_OR_NULL for data, does this gracefully handle the case where you pass in fmt string containing e.g. %ps but data being NULL? From reading bpf_bprintf_prepare() looks like it does just fine, but might be nice to explicitly add a tiny selftest case for it while you're at it. > + raw_spin_lock_irqsave(&trace_printk_lock, flags); > + ret = bstr_printf(buf, sizeof(buf), fmt, bin_args); > + > + trace_bpf_trace_printk(buf); > + raw_spin_unlock_irqrestore(&trace_printk_lock, flags); > + > + bpf_bprintf_cleanup(); > + > + return ret; > +} Thanks, Daniel
On 9/3/21 4:00 AM, Daniel Borkmann wrote: > On 8/28/21 7:20 AM, Dave Marchevsky wrote: >> This helper is meant to be "bpf_trace_printk, but with proper vararg >> support". Follow bpf_snprintf's example and take a u64 pseudo-vararg >> array. Write to /sys/kernel/debug/tracing/trace_pipe using the same >> mechanism as bpf_trace_printk. >> >> Signed-off-by: Dave Marchevsky <davemarchevsky@fb.com> > > lgtm, minor comments below: > >> --- >> include/linux/bpf.h | 1 + >> include/uapi/linux/bpf.h | 9 ++++++ >> kernel/bpf/core.c | 5 ++++ >> kernel/bpf/helpers.c | 2 ++ >> kernel/trace/bpf_trace.c | 52 +++++++++++++++++++++++++++++++++- >> tools/include/uapi/linux/bpf.h | 9 ++++++ >> 6 files changed, 77 insertions(+), 1 deletion(-) >> >> diff --git a/include/linux/bpf.h b/include/linux/bpf.h >> index be8d57e6e78a..b6c45a6cbbba 100644 >> --- a/include/linux/bpf.h >> +++ b/include/linux/bpf.h >> @@ -1088,6 +1088,7 @@ bool bpf_prog_array_compatible(struct bpf_array *array, const struct bpf_prog *f >> int bpf_prog_calc_tag(struct bpf_prog *fp); >> const struct bpf_func_proto *bpf_get_trace_printk_proto(void); >> +const struct bpf_func_proto *bpf_get_trace_vprintk_proto(void); >> typedef unsigned long (*bpf_ctx_copy_t)(void *dst, const void *src, >> unsigned long off, unsigned long len); >> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h >> index 791f31dd0abe..f171d4d33136 100644 >> --- a/include/uapi/linux/bpf.h >> +++ b/include/uapi/linux/bpf.h >> @@ -4877,6 +4877,14 @@ union bpf_attr { >> * Get the struct pt_regs associated with **task**. >> * Return >> * A pointer to struct pt_regs. >> + * >> + * u64 bpf_trace_vprintk(const char *fmt, u32 fmt_size, const void *data, u32 data_len) > > s/u64/long/ > >> + * Description >> + * Behaves like **bpf_trace_printk**\ () helper, but takes an array of u64 > > nit: maybe for users it's more clear from description if you instead mention that data_len > needs to be multiple of 8 bytes? Or somehow mention the relation with data more clearly > resp. which shortcoming it addresses compared to bpf_trace_printk(), so developers can more > easily parse it. In a previous review pass, Andrii preferred having bpf_trace_vprintk's reference other helpers instead of copy/pasting. So in v5 (patch 9) of this patchset I've added "multiple of 8 bytes" to helper comments for bpf_seq_printf and bpf_snprintf. Added a sentence mentioning benefits of vprintk over printk in v5 (patch 3). >> + * to format. Arguments are to be used as in **bpf_seq_printf**\ () helper. >> + * Return >> + * The number of bytes written to the buffer, or a negative error >> + * in case of failure. >> */ > [...] >> default: >> return NULL; >> } >> diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c >> index 10672ebc63b7..ea8358b0c748 100644 >> --- a/kernel/trace/bpf_trace.c >> +++ b/kernel/trace/bpf_trace.c >> @@ -398,7 +398,7 @@ static const struct bpf_func_proto bpf_trace_printk_proto = { >> .arg2_type = ARG_CONST_SIZE, >> }; >> -const struct bpf_func_proto *bpf_get_trace_printk_proto(void) >> +static void __set_printk_clr_event(void) >> { >> /* >> * This program might be calling bpf_trace_printk, >> @@ -410,10 +410,58 @@ const struct bpf_func_proto *bpf_get_trace_printk_proto(void) >> */ >> if (trace_set_clr_event("bpf_trace", "bpf_trace_printk", 1)) >> pr_warn_ratelimited("could not enable bpf_trace_printk events"); >> +} >> +const struct bpf_func_proto *bpf_get_trace_printk_proto(void) >> +{ >> + __set_printk_clr_event(); >> return &bpf_trace_printk_proto; >> } >> +BPF_CALL_4(bpf_trace_vprintk, char *, fmt, u32, fmt_size, const void *, data, >> + u32, data_len) >> +{ >> + static char buf[BPF_TRACE_PRINTK_SIZE]; >> + unsigned long flags; >> + int ret, num_args; >> + u32 *bin_args; >> + >> + if (data_len & 7 || data_len > MAX_BPRINTF_VARARGS * 8 || >> + (data_len && !data)) >> + return -EINVAL; >> + num_args = data_len / 8; >> + >> + ret = bpf_bprintf_prepare(fmt, fmt_size, data, &bin_args, num_args); >> + if (ret < 0) >> + return ret; > > Given you have ARG_PTR_TO_MEM_OR_NULL for data, does this gracefully handle the > case where you pass in fmt string containing e.g. %ps but data being NULL? From > reading bpf_bprintf_prepare() looks like it does just fine, but might be nice > to explicitly add a tiny selftest case for it while you're at it. > >> + raw_spin_lock_irqsave(&trace_printk_lock, flags); >> + ret = bstr_printf(buf, sizeof(buf), fmt, bin_args); >> + >> + trace_bpf_trace_printk(buf); >> + raw_spin_unlock_irqrestore(&trace_printk_lock, flags); >> + >> + bpf_bprintf_cleanup(); >> + >> + return ret; >> +} > > Thanks, > Daniel
diff --git a/include/linux/bpf.h b/include/linux/bpf.h index be8d57e6e78a..b6c45a6cbbba 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1088,6 +1088,7 @@ bool bpf_prog_array_compatible(struct bpf_array *array, const struct bpf_prog *f int bpf_prog_calc_tag(struct bpf_prog *fp); const struct bpf_func_proto *bpf_get_trace_printk_proto(void); +const struct bpf_func_proto *bpf_get_trace_vprintk_proto(void); typedef unsigned long (*bpf_ctx_copy_t)(void *dst, const void *src, unsigned long off, unsigned long len); diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 791f31dd0abe..f171d4d33136 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -4877,6 +4877,14 @@ union bpf_attr { * Get the struct pt_regs associated with **task**. * Return * A pointer to struct pt_regs. + * + * u64 bpf_trace_vprintk(const char *fmt, u32 fmt_size, const void *data, u32 data_len) + * Description + * Behaves like **bpf_trace_printk**\ () helper, but takes an array of u64 + * to format. Arguments are to be used as in **bpf_seq_printf**\ () helper. + * Return + * The number of bytes written to the buffer, or a negative error + * in case of failure. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -5055,6 +5063,7 @@ union bpf_attr { FN(get_func_ip), \ FN(get_attach_cookie), \ FN(task_pt_regs), \ + FN(trace_vprintk), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 9f4636d021b1..6fddc13fe67f 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -2357,6 +2357,11 @@ const struct bpf_func_proto * __weak bpf_get_trace_printk_proto(void) return NULL; } +const struct bpf_func_proto * __weak bpf_get_trace_vprintk_proto(void) +{ + return NULL; +} + u64 __weak bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size, void *ctx, u64 ctx_size, bpf_ctx_copy_t ctx_copy) diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 0d969f8501e2..5f34f3dc7166 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -1425,6 +1425,8 @@ bpf_base_func_proto(enum bpf_func_id func_id) return &bpf_snprintf_proto; case BPF_FUNC_task_pt_regs: return &bpf_task_pt_regs_proto; + case BPF_FUNC_trace_vprintk: + return bpf_get_trace_vprintk_proto(); default: return NULL; } diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 10672ebc63b7..ea8358b0c748 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -398,7 +398,7 @@ static const struct bpf_func_proto bpf_trace_printk_proto = { .arg2_type = ARG_CONST_SIZE, }; -const struct bpf_func_proto *bpf_get_trace_printk_proto(void) +static void __set_printk_clr_event(void) { /* * This program might be calling bpf_trace_printk, @@ -410,10 +410,58 @@ const struct bpf_func_proto *bpf_get_trace_printk_proto(void) */ if (trace_set_clr_event("bpf_trace", "bpf_trace_printk", 1)) pr_warn_ratelimited("could not enable bpf_trace_printk events"); +} +const struct bpf_func_proto *bpf_get_trace_printk_proto(void) +{ + __set_printk_clr_event(); return &bpf_trace_printk_proto; } +BPF_CALL_4(bpf_trace_vprintk, char *, fmt, u32, fmt_size, const void *, data, + u32, data_len) +{ + static char buf[BPF_TRACE_PRINTK_SIZE]; + unsigned long flags; + int ret, num_args; + u32 *bin_args; + + if (data_len & 7 || data_len > MAX_BPRINTF_VARARGS * 8 || + (data_len && !data)) + return -EINVAL; + num_args = data_len / 8; + + ret = bpf_bprintf_prepare(fmt, fmt_size, data, &bin_args, num_args); + if (ret < 0) + return ret; + + raw_spin_lock_irqsave(&trace_printk_lock, flags); + ret = bstr_printf(buf, sizeof(buf), fmt, bin_args); + + trace_bpf_trace_printk(buf); + raw_spin_unlock_irqrestore(&trace_printk_lock, flags); + + bpf_bprintf_cleanup(); + + return ret; +} + +static const struct bpf_func_proto bpf_trace_vprintk_proto = { + .func = bpf_trace_vprintk, + .gpl_only = true, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_MEM, + .arg2_type = ARG_CONST_SIZE, + .arg3_type = ARG_PTR_TO_MEM_OR_NULL, + .arg4_type = ARG_CONST_SIZE_OR_ZERO, +}; + +const struct bpf_func_proto *bpf_get_trace_vprintk_proto(void) +{ + __set_printk_clr_event(); + return &bpf_trace_vprintk_proto; +} + BPF_CALL_5(bpf_seq_printf, struct seq_file *, m, char *, fmt, u32, fmt_size, const void *, data, u32, data_len) { @@ -1130,6 +1178,8 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_snprintf_proto; case BPF_FUNC_get_func_ip: return &bpf_get_func_ip_proto_tracing; + case BPF_FUNC_trace_vprintk: + return bpf_get_trace_vprintk_proto(); default: return bpf_base_func_proto(func_id); } diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 791f31dd0abe..f171d4d33136 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -4877,6 +4877,14 @@ union bpf_attr { * Get the struct pt_regs associated with **task**. * Return * A pointer to struct pt_regs. + * + * u64 bpf_trace_vprintk(const char *fmt, u32 fmt_size, const void *data, u32 data_len) + * Description + * Behaves like **bpf_trace_printk**\ () helper, but takes an array of u64 + * to format. Arguments are to be used as in **bpf_seq_printf**\ () helper. + * Return + * The number of bytes written to the buffer, or a negative error + * in case of failure. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -5055,6 +5063,7 @@ union bpf_attr { FN(get_func_ip), \ FN(get_attach_cookie), \ FN(task_pt_regs), \ + FN(trace_vprintk), \ /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper
This helper is meant to be "bpf_trace_printk, but with proper vararg support". Follow bpf_snprintf's example and take a u64 pseudo-vararg array. Write to /sys/kernel/debug/tracing/trace_pipe using the same mechanism as bpf_trace_printk. Signed-off-by: Dave Marchevsky <davemarchevsky@fb.com> --- include/linux/bpf.h | 1 + include/uapi/linux/bpf.h | 9 ++++++ kernel/bpf/core.c | 5 ++++ kernel/bpf/helpers.c | 2 ++ kernel/trace/bpf_trace.c | 52 +++++++++++++++++++++++++++++++++- tools/include/uapi/linux/bpf.h | 9 ++++++ 6 files changed, 77 insertions(+), 1 deletion(-)