diff mbox series

[bpf-next,2/5] bpf: add bpf_trace_vprintk helper

Message ID 20210821025837.1614098-3-davemarchevsky@fb.com (mailing list archive)
State Superseded
Delegated to: BPF
Headers show
Series bpf: implement variadic printk helper | expand

Checks

Context Check Description
netdev/cover_letter success Link
netdev/fixes_present success Link
netdev/patch_count success Link
netdev/tree_selection success Clearly marked for bpf-next
netdev/subject_prefix success Link
netdev/cc_maintainers warning 9 maintainers not CCed: kafai@fb.com quentin@isovalent.com songliubraving@fb.com rostedt@goodmis.org mingo@redhat.com haoluo@google.com joe@cilium.io kpsingh@kernel.org john.fastabend@gmail.com
netdev/source_inline success Was 0 now: 0
netdev/verify_signedoff success Link
netdev/module_param success Was 0 now: 0
netdev/build_32bit success Errors and warnings before: 11909 this patch: 11909
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/verify_fixes success Link
netdev/checkpatch warning WARNING: line length of 87 exceeds 80 columns WARNING: line length of 89 exceeds 80 columns WARNING: line length of 90 exceeds 80 columns
netdev/build_allmodconfig_warn success Errors and warnings before: 11431 this patch: 11431
netdev/header_inline success Link

Commit Message

Dave Marchevsky Aug. 21, 2021, 2:58 a.m. UTC
This helper is meant to be "bpf_trace_printk, but with proper vararg
support". Follow bpf_snprintf's example and take a u64 pseudo-vararg
array. Write to dmesg using the same mechanism as bpf_trace_printk.

Signed-off-by: Dave Marchevsky <davemarchevsky@fb.com>
---
 include/linux/bpf.h            |  1 +
 include/uapi/linux/bpf.h       | 23 +++++++++++++++
 kernel/bpf/core.c              |  5 ++++
 kernel/bpf/helpers.c           |  2 ++
 kernel/trace/bpf_trace.c       | 52 +++++++++++++++++++++++++++++++++-
 tools/include/uapi/linux/bpf.h | 23 +++++++++++++++
 6 files changed, 105 insertions(+), 1 deletion(-)

Comments

Andrii Nakryiko Aug. 24, 2021, 4:50 a.m. UTC | #1
On Fri, Aug 20, 2021 at 7:59 PM Dave Marchevsky <davemarchevsky@fb.com> wrote:
>
> This helper is meant to be "bpf_trace_printk, but with proper vararg

We have bpf_snprintf() and bpf_seq_printf() names for other BPF
helpers using the same approach. How about we call this one simply
`bpf_printf`? It will be in line with other naming, it is logical BPF
equivalent of user-space printf (which outputs to stderr, which in BPF
land is /sys/kernel/debug/tracing/trace_pipe). And it will be logical
to have a nice and short BPF_PRINTF() convenience macro provided by
libbpf.

> support". Follow bpf_snprintf's example and take a u64 pseudo-vararg
> array. Write to dmesg using the same mechanism as bpf_trace_printk.

Are you sure about the dmesg part?... bpf_trace_printk is outputting
into /sys/kernel/debug/tracing/trace_pipe.

>
> Signed-off-by: Dave Marchevsky <davemarchevsky@fb.com>
> ---
>  include/linux/bpf.h            |  1 +
>  include/uapi/linux/bpf.h       | 23 +++++++++++++++
>  kernel/bpf/core.c              |  5 ++++
>  kernel/bpf/helpers.c           |  2 ++
>  kernel/trace/bpf_trace.c       | 52 +++++++++++++++++++++++++++++++++-
>  tools/include/uapi/linux/bpf.h | 23 +++++++++++++++
>  6 files changed, 105 insertions(+), 1 deletion(-)
>
> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> index be8d57e6e78a..b6c45a6cbbba 100644
> --- a/include/linux/bpf.h
> +++ b/include/linux/bpf.h
> @@ -1088,6 +1088,7 @@ bool bpf_prog_array_compatible(struct bpf_array *array, const struct bpf_prog *f
>  int bpf_prog_calc_tag(struct bpf_prog *fp);
>
>  const struct bpf_func_proto *bpf_get_trace_printk_proto(void);
> +const struct bpf_func_proto *bpf_get_trace_vprintk_proto(void);
>
>  typedef unsigned long (*bpf_ctx_copy_t)(void *dst, const void *src,
>                                         unsigned long off, unsigned long len);
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index c4f7892edb2b..899a2649d986 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -4871,6 +4871,28 @@ union bpf_attr {
>   *     Return
>   *             Value specified by user at BPF link creation/attachment time
>   *             or 0, if it was not specified.
> + *
> + * u64 bpf_trace_vprintk(const char *fmt, u32 fmt_size, const void *data, u32 data_len)
> + *     Description
> + *             Behaves like **bpf_trace_printk**\ () helper, but takes an array of u64
> + *             to format. Supports up to 12 arguments to print in this way.

we didn't specify 12 in the description of bpf_snprintf() or
bpf_seq_printf(), so why start doing that here? For data/args format,
let's just refer to bpf_snprintf() or bpf_seq_printf(), whichever does
a better job explaining this :)


> + *             The *fmt* and *fmt_size* are for the format string itself. The *data* and
> + *             *data_len* are format string arguments.
> + *
> + *             Each format specifier in **fmt** corresponds to one u64 element
> + *             in the **data** array. For strings and pointers where pointees
> + *             are accessed, only the pointer values are stored in the *data*
> + *             array. The *data_len* is the size of *data* in bytes.
> + *             Formats **%s**, **%p{i,I}{4,6}** requires to read kernel memory.
> + *             Reading kernel memory may fail due to either invalid address or
> + *             valid address but requiring a major memory fault. If reading kernel memory
> + *             fails, the string for **%s** will be an empty string, and the ip
> + *             address for **%p{i,I}{4,6}** will be 0. Not returning error to
> + *             bpf program is consistent with what **bpf_trace_printk**\ () does for now.

This is just a copy/paste from other helpers. Let's avoid duplication
and just point people to a description in other helpers.

> + *
> + *     Return
> + *             The number of bytes written to the buffer, or a negative error
> + *             in case of failure.
>   */
>  #define __BPF_FUNC_MAPPER(FN)          \
>         FN(unspec),                     \
> @@ -5048,6 +5070,7 @@ union bpf_attr {
>         FN(timer_cancel),               \
>         FN(get_func_ip),                \
>         FN(get_attach_cookie),          \
> +       FN(trace_vprintk),              \
>         /* */
>
>  /* integer value in 'imm' field of BPF_CALL instruction selects which helper
> diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
> index 91f24c7b38a1..a137c550046c 100644
> --- a/kernel/bpf/core.c
> +++ b/kernel/bpf/core.c
> @@ -2357,6 +2357,11 @@ const struct bpf_func_proto * __weak bpf_get_trace_printk_proto(void)
>         return NULL;
>  }
>
> +const struct bpf_func_proto * __weak bpf_get_trace_vprintk_proto(void)
> +{
> +       return NULL;
> +}
> +
>  u64 __weak
>  bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size,
>                  void *ctx, u64 ctx_size, bpf_ctx_copy_t ctx_copy)
> diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
> index 5ce19b376ef7..863e5ee68558 100644
> --- a/kernel/bpf/helpers.c
> +++ b/kernel/bpf/helpers.c
> @@ -1419,6 +1419,8 @@ bpf_base_func_proto(enum bpf_func_id func_id)
>                 return &bpf_snprintf_btf_proto;
>         case BPF_FUNC_snprintf:
>                 return &bpf_snprintf_proto;
> +       case BPF_FUNC_trace_vprintk:
> +               return bpf_get_trace_vprintk_proto();
>         default:
>                 return NULL;
>         }
> diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
> index 2cf4bfa1ab7b..8b3f1ec9e082 100644
> --- a/kernel/trace/bpf_trace.c
> +++ b/kernel/trace/bpf_trace.c
> @@ -398,7 +398,7 @@ static const struct bpf_func_proto bpf_trace_printk_proto = {
>         .arg2_type      = ARG_CONST_SIZE,
>  };
>
> -const struct bpf_func_proto *bpf_get_trace_printk_proto(void)
> +static __always_inline void __set_printk_clr_event(void)

Please drop __always_inline, we only use __always_inline for
absolutely performance critical routines. Let the compiler decide.

>  {
>         /*
>          * This program might be calling bpf_trace_printk,
> @@ -410,10 +410,58 @@ const struct bpf_func_proto *bpf_get_trace_printk_proto(void)
>          */
>         if (trace_set_clr_event("bpf_trace", "bpf_trace_printk", 1))
>                 pr_warn_ratelimited("could not enable bpf_trace_printk events");
> +}
>
> +const struct bpf_func_proto *bpf_get_trace_printk_proto(void)
> +{
> +       __set_printk_clr_event();
>         return &bpf_trace_printk_proto;
>  }
>
> +BPF_CALL_4(bpf_trace_vprintk, char *, fmt, u32, fmt_size, const void *, data,
> +          u32, data_len)
> +{
> +       static char buf[BPF_TRACE_PRINTK_SIZE];
> +       unsigned long flags;
> +       int ret, num_args;
> +       u32 *bin_args;
> +
> +       if (data_len & 7 || data_len > MAX_BPRINTF_VARARGS * 8 ||
> +           (data_len && !data))
> +               return -EINVAL;
> +       num_args = data_len / 8;
> +
> +       ret = bpf_bprintf_prepare(fmt, fmt_size, data, &bin_args, num_args);
> +       if (ret < 0)
> +               return ret;
> +
> +       raw_spin_lock_irqsave(&trace_printk_lock, flags);
> +       ret = bstr_printf(buf, sizeof(buf), fmt, bin_args);
> +
> +       trace_bpf_trace_printk(buf);
> +       raw_spin_unlock_irqrestore(&trace_printk_lock, flags);
> +
> +       bpf_bprintf_cleanup();
> +
> +       return ret;
> +}
> +
> +static const struct bpf_func_proto bpf_trace_vprintk_proto = {
> +       .func           = bpf_trace_vprintk,
> +       .gpl_only       = true,
> +       .ret_type       = RET_INTEGER,
> +       .arg1_type      = ARG_PTR_TO_MEM,
> +       .arg2_type      = ARG_CONST_SIZE,
> +       .arg3_type      = ARG_PTR_TO_MEM_OR_NULL,
> +       .arg4_type      = ARG_CONST_SIZE_OR_ZERO,
> +};
> +
> +const struct bpf_func_proto *bpf_get_trace_vprintk_proto(void)
> +{
> +       __set_printk_clr_event();
> +       return &bpf_trace_vprintk_proto;
> +}
> +
>  BPF_CALL_5(bpf_seq_printf, struct seq_file *, m, char *, fmt, u32, fmt_size,
>            const void *, data, u32, data_len)
>  {
> @@ -1113,6 +1161,8 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
>                 return &bpf_snprintf_proto;
>         case BPF_FUNC_get_func_ip:
>                 return &bpf_get_func_ip_proto_tracing;
> +       case BPF_FUNC_trace_vprintk:
> +               return bpf_get_trace_vprintk_proto();
>         default:
>                 return bpf_base_func_proto(func_id);
>         }
> diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
> index c4f7892edb2b..899a2649d986 100644
> --- a/tools/include/uapi/linux/bpf.h
> +++ b/tools/include/uapi/linux/bpf.h
> @@ -4871,6 +4871,28 @@ union bpf_attr {
>   *     Return
>   *             Value specified by user at BPF link creation/attachment time
>   *             or 0, if it was not specified.
> + *
> + * u64 bpf_trace_vprintk(const char *fmt, u32 fmt_size, const void *data, u32 data_len)
> + *     Description
> + *             Behaves like **bpf_trace_printk**\ () helper, but takes an array of u64
> + *             to format. Supports up to 12 arguments to print in this way.
> + *             The *fmt* and *fmt_size* are for the format string itself. The *data* and
> + *             *data_len* are format string arguments.
> + *
> + *             Each format specifier in **fmt** corresponds to one u64 element
> + *             in the **data** array. For strings and pointers where pointees
> + *             are accessed, only the pointer values are stored in the *data*
> + *             array. The *data_len* is the size of *data* in bytes.
> + *             Formats **%s**, **%p{i,I}{4,6}** requires to read kernel memory.
> + *             Reading kernel memory may fail due to either invalid address or
> + *             valid address but requiring a major memory fault. If reading kernel memory
> + *             fails, the string for **%s** will be an empty string, and the ip
> + *             address for **%p{i,I}{4,6}** will be 0. Not returning error to
> + *             bpf program is consistent with what **bpf_trace_printk**\ () does for now.
> + *
> + *     Return
> + *             The number of bytes written to the buffer, or a negative error
> + *             in case of failure.
>   */
>  #define __BPF_FUNC_MAPPER(FN)          \
>         FN(unspec),                     \
> @@ -5048,6 +5070,7 @@ union bpf_attr {
>         FN(timer_cancel),               \
>         FN(get_func_ip),                \
>         FN(get_attach_cookie),          \
> +       FN(trace_vprintk),              \
>         /* */
>
>  /* integer value in 'imm' field of BPF_CALL instruction selects which helper
> --
> 2.30.2
>
Alexei Starovoitov Aug. 24, 2021, 5:57 p.m. UTC | #2
On Mon, Aug 23, 2021 at 9:50 PM Andrii Nakryiko
<andrii.nakryiko@gmail.com> wrote:
>
> On Fri, Aug 20, 2021 at 7:59 PM Dave Marchevsky <davemarchevsky@fb.com> wrote:
> >
> > This helper is meant to be "bpf_trace_printk, but with proper vararg
>
> We have bpf_snprintf() and bpf_seq_printf() names for other BPF
> helpers using the same approach. How about we call this one simply
> `bpf_printf`? It will be in line with other naming, it is logical BPF
> equivalent of user-space printf (which outputs to stderr, which in BPF
> land is /sys/kernel/debug/tracing/trace_pipe). And it will be logical
> to have a nice and short BPF_PRINTF() convenience macro provided by
> libbpf.
>
> > support". Follow bpf_snprintf's example and take a u64 pseudo-vararg
> > array. Write to dmesg using the same mechanism as bpf_trace_printk.
>
> Are you sure about the dmesg part?... bpf_trace_printk is outputting
> into /sys/kernel/debug/tracing/trace_pipe.

Actually I like bpf_trace_vprintk() name, since it makes it obvious that
it's a flavor of bpf_trace_printk() and its quirks that users learned
to deal with.
I would reserve bpf_printf() for the future. We might have standalone
bpf programs in the future (without user space component) and a better
equivalent
of stdin/stdout. clang -target bpf hello_world.c -o a.out; ./a.out
should print to a terminal. Such future hello world in bpf would be
using bpf_printf()
or bpf_dprintf().
Andrii Nakryiko Aug. 24, 2021, 6:02 p.m. UTC | #3
On Tue, Aug 24, 2021 at 10:57 AM Alexei Starovoitov
<alexei.starovoitov@gmail.com> wrote:
>
> On Mon, Aug 23, 2021 at 9:50 PM Andrii Nakryiko
> <andrii.nakryiko@gmail.com> wrote:
> >
> > On Fri, Aug 20, 2021 at 7:59 PM Dave Marchevsky <davemarchevsky@fb.com> wrote:
> > >
> > > This helper is meant to be "bpf_trace_printk, but with proper vararg
> >
> > We have bpf_snprintf() and bpf_seq_printf() names for other BPF
> > helpers using the same approach. How about we call this one simply
> > `bpf_printf`? It will be in line with other naming, it is logical BPF
> > equivalent of user-space printf (which outputs to stderr, which in BPF
> > land is /sys/kernel/debug/tracing/trace_pipe). And it will be logical
> > to have a nice and short BPF_PRINTF() convenience macro provided by
> > libbpf.
> >
> > > support". Follow bpf_snprintf's example and take a u64 pseudo-vararg
> > > array. Write to dmesg using the same mechanism as bpf_trace_printk.
> >
> > Are you sure about the dmesg part?... bpf_trace_printk is outputting
> > into /sys/kernel/debug/tracing/trace_pipe.
>
> Actually I like bpf_trace_vprintk() name, since it makes it obvious that

It's the inconsistency with bpf_snprintf() and bpf_seq_printf() that's
mildly annoying (it's f at the end, and no v- prefix). Maybe
bpf_trace_printf() then? Or is it too close to bpf_trace_printk()? But
either way you would be using BPF_PRINTF() macro for this. And we can
make that macro use bpf_trace_printk() transparently for <3 args, so
that new macro works on old kernels.

> it's a flavor of bpf_trace_printk() and its quirks that users learned
> to deal with.
> I would reserve bpf_printf() for the future. We might have standalone
> bpf programs in the future (without user space component) and a better
> equivalent
> of stdin/stdout. clang -target bpf hello_world.c -o a.out; ./a.out
> should print to a terminal. Such future hello world in bpf would be
> using bpf_printf()
> or bpf_dprintf().
Alexei Starovoitov Aug. 24, 2021, 6:17 p.m. UTC | #4
On Tue, Aug 24, 2021 at 11:02 AM Andrii Nakryiko
<andrii.nakryiko@gmail.com> wrote:
>
> On Tue, Aug 24, 2021 at 10:57 AM Alexei Starovoitov
> <alexei.starovoitov@gmail.com> wrote:
> >
> > On Mon, Aug 23, 2021 at 9:50 PM Andrii Nakryiko
> > <andrii.nakryiko@gmail.com> wrote:
> > >
> > > On Fri, Aug 20, 2021 at 7:59 PM Dave Marchevsky <davemarchevsky@fb.com> wrote:
> > > >
> > > > This helper is meant to be "bpf_trace_printk, but with proper vararg
> > >
> > > We have bpf_snprintf() and bpf_seq_printf() names for other BPF
> > > helpers using the same approach. How about we call this one simply
> > > `bpf_printf`? It will be in line with other naming, it is logical BPF
> > > equivalent of user-space printf (which outputs to stderr, which in BPF
> > > land is /sys/kernel/debug/tracing/trace_pipe). And it will be logical
> > > to have a nice and short BPF_PRINTF() convenience macro provided by
> > > libbpf.
> > >
> > > > support". Follow bpf_snprintf's example and take a u64 pseudo-vararg
> > > > array. Write to dmesg using the same mechanism as bpf_trace_printk.
> > >
> > > Are you sure about the dmesg part?... bpf_trace_printk is outputting
> > > into /sys/kernel/debug/tracing/trace_pipe.
> >
> > Actually I like bpf_trace_vprintk() name, since it makes it obvious that
>
> It's the inconsistency with bpf_snprintf() and bpf_seq_printf() that's
> mildly annoying (it's f at the end, and no v- prefix). Maybe
> bpf_trace_printf() then? Or is it too close to bpf_trace_printk()?

bpf_trace_printf could be ok, but see below.

> But
> either way you would be using BPF_PRINTF() macro for this. And we can
> make that macro use bpf_trace_printk() transparently for <3 args, so
> that new macro works on old kernels.

Cannot we change the existing bpf_printk() macro to work on old and new kernels?
So bpf_printk() would use bpf_trace_printf() on new and
bpf_trace_printk() on old?
I think bpf_trace_vprintk() looks cleaner in this context if we reuse
bpf_printk() macro.
Andrii Nakryiko Aug. 24, 2021, 6:24 p.m. UTC | #5
On Tue, Aug 24, 2021 at 11:17 AM Alexei Starovoitov
<alexei.starovoitov@gmail.com> wrote:
>
> On Tue, Aug 24, 2021 at 11:02 AM Andrii Nakryiko
> <andrii.nakryiko@gmail.com> wrote:
> >
> > On Tue, Aug 24, 2021 at 10:57 AM Alexei Starovoitov
> > <alexei.starovoitov@gmail.com> wrote:
> > >
> > > On Mon, Aug 23, 2021 at 9:50 PM Andrii Nakryiko
> > > <andrii.nakryiko@gmail.com> wrote:
> > > >
> > > > On Fri, Aug 20, 2021 at 7:59 PM Dave Marchevsky <davemarchevsky@fb.com> wrote:
> > > > >
> > > > > This helper is meant to be "bpf_trace_printk, but with proper vararg
> > > >
> > > > We have bpf_snprintf() and bpf_seq_printf() names for other BPF
> > > > helpers using the same approach. How about we call this one simply
> > > > `bpf_printf`? It will be in line with other naming, it is logical BPF
> > > > equivalent of user-space printf (which outputs to stderr, which in BPF
> > > > land is /sys/kernel/debug/tracing/trace_pipe). And it will be logical
> > > > to have a nice and short BPF_PRINTF() convenience macro provided by
> > > > libbpf.
> > > >
> > > > > support". Follow bpf_snprintf's example and take a u64 pseudo-vararg
> > > > > array. Write to dmesg using the same mechanism as bpf_trace_printk.
> > > >
> > > > Are you sure about the dmesg part?... bpf_trace_printk is outputting
> > > > into /sys/kernel/debug/tracing/trace_pipe.
> > >
> > > Actually I like bpf_trace_vprintk() name, since it makes it obvious that
> >
> > It's the inconsistency with bpf_snprintf() and bpf_seq_printf() that's
> > mildly annoying (it's f at the end, and no v- prefix). Maybe
> > bpf_trace_printf() then? Or is it too close to bpf_trace_printk()?
>
> bpf_trace_printf could be ok, but see below.
>
> > But
> > either way you would be using BPF_PRINTF() macro for this. And we can
> > make that macro use bpf_trace_printk() transparently for <3 args, so
> > that new macro works on old kernels.
>
> Cannot we change the existing bpf_printk() macro to work on old and new kernels?

Only if we break backwards compatibility. And I only know how to
detect the presence of new helper with CO-RE, which automatically
makes any BPF program using this macro CO-RE-dependent, which might
not be what users want (vmlinux BTF is still not universally
available). If I could do something like that without breaking change
and without CO-RE, I'd update bpf_printk() to use `const char *fmt`
for format string a long time ago. But adding CO-RE dependency for
bpf_printk() seems like a no-go.

> So bpf_printk() would use bpf_trace_printf() on new and
> bpf_trace_printk() on old?
> I think bpf_trace_vprintk() looks cleaner in this context if we reuse
> bpf_printk() macro.
Alexei Starovoitov Aug. 24, 2021, 9 p.m. UTC | #6
On Tue, Aug 24, 2021 at 11:24 AM Andrii Nakryiko
<andrii.nakryiko@gmail.com> wrote:
>
> On Tue, Aug 24, 2021 at 11:17 AM Alexei Starovoitov
> <alexei.starovoitov@gmail.com> wrote:
> >
> > On Tue, Aug 24, 2021 at 11:02 AM Andrii Nakryiko
> > <andrii.nakryiko@gmail.com> wrote:
> > >
> > > On Tue, Aug 24, 2021 at 10:57 AM Alexei Starovoitov
> > > <alexei.starovoitov@gmail.com> wrote:
> > > >
> > > > On Mon, Aug 23, 2021 at 9:50 PM Andrii Nakryiko
> > > > <andrii.nakryiko@gmail.com> wrote:
> > > > >
> > > > > On Fri, Aug 20, 2021 at 7:59 PM Dave Marchevsky <davemarchevsky@fb.com> wrote:
> > > > > >
> > > > > > This helper is meant to be "bpf_trace_printk, but with proper vararg
> > > > >
> > > > > We have bpf_snprintf() and bpf_seq_printf() names for other BPF
> > > > > helpers using the same approach. How about we call this one simply
> > > > > `bpf_printf`? It will be in line with other naming, it is logical BPF
> > > > > equivalent of user-space printf (which outputs to stderr, which in BPF
> > > > > land is /sys/kernel/debug/tracing/trace_pipe). And it will be logical
> > > > > to have a nice and short BPF_PRINTF() convenience macro provided by
> > > > > libbpf.
> > > > >
> > > > > > support". Follow bpf_snprintf's example and take a u64 pseudo-vararg
> > > > > > array. Write to dmesg using the same mechanism as bpf_trace_printk.
> > > > >
> > > > > Are you sure about the dmesg part?... bpf_trace_printk is outputting
> > > > > into /sys/kernel/debug/tracing/trace_pipe.
> > > >
> > > > Actually I like bpf_trace_vprintk() name, since it makes it obvious that
> > >
> > > It's the inconsistency with bpf_snprintf() and bpf_seq_printf() that's
> > > mildly annoying (it's f at the end, and no v- prefix). Maybe
> > > bpf_trace_printf() then? Or is it too close to bpf_trace_printk()?
> >
> > bpf_trace_printf could be ok, but see below.
> >
> > > But
> > > either way you would be using BPF_PRINTF() macro for this. And we can
> > > make that macro use bpf_trace_printk() transparently for <3 args, so
> > > that new macro works on old kernels.
> >
> > Cannot we change the existing bpf_printk() macro to work on old and new kernels?
>
> Only if we break backwards compatibility. And I only know how to
> detect the presence of new helper with CO-RE, which automatically
> makes any BPF program using this macro CO-RE-dependent, which might
> not be what users want (vmlinux BTF is still not universally
> available). If I could do something like that without breaking change
> and without CO-RE, I'd update bpf_printk() to use `const char *fmt`
> for format string a long time ago. But adding CO-RE dependency for
> bpf_printk() seems like a no-go.

I see. Naming is the hardest.
I think Dave's current choice of lower case bpf_vprintk() macro and
bpf_trace_vprintk()
helper fits the existing bpf_printk/bpf_trace_printk the best.
Yes, it's inconsistent with BPF_SEQ_PRINTF/BPF_SNPRINTF,
but consistent with trace_printk. Whichever way we go it will be inconsistent.
Stylistically I like the lower case macro, since it doesn't scream at me.
Andrii Nakryiko Aug. 24, 2021, 9:24 p.m. UTC | #7
On Tue, Aug 24, 2021 at 2:00 PM Alexei Starovoitov
<alexei.starovoitov@gmail.com> wrote:
>
> On Tue, Aug 24, 2021 at 11:24 AM Andrii Nakryiko
> <andrii.nakryiko@gmail.com> wrote:
> >
> > On Tue, Aug 24, 2021 at 11:17 AM Alexei Starovoitov
> > <alexei.starovoitov@gmail.com> wrote:
> > >
> > > On Tue, Aug 24, 2021 at 11:02 AM Andrii Nakryiko
> > > <andrii.nakryiko@gmail.com> wrote:
> > > >
> > > > On Tue, Aug 24, 2021 at 10:57 AM Alexei Starovoitov
> > > > <alexei.starovoitov@gmail.com> wrote:
> > > > >
> > > > > On Mon, Aug 23, 2021 at 9:50 PM Andrii Nakryiko
> > > > > <andrii.nakryiko@gmail.com> wrote:
> > > > > >
> > > > > > On Fri, Aug 20, 2021 at 7:59 PM Dave Marchevsky <davemarchevsky@fb.com> wrote:
> > > > > > >
> > > > > > > This helper is meant to be "bpf_trace_printk, but with proper vararg
> > > > > >
> > > > > > We have bpf_snprintf() and bpf_seq_printf() names for other BPF
> > > > > > helpers using the same approach. How about we call this one simply
> > > > > > `bpf_printf`? It will be in line with other naming, it is logical BPF
> > > > > > equivalent of user-space printf (which outputs to stderr, which in BPF
> > > > > > land is /sys/kernel/debug/tracing/trace_pipe). And it will be logical
> > > > > > to have a nice and short BPF_PRINTF() convenience macro provided by
> > > > > > libbpf.
> > > > > >
> > > > > > > support". Follow bpf_snprintf's example and take a u64 pseudo-vararg
> > > > > > > array. Write to dmesg using the same mechanism as bpf_trace_printk.
> > > > > >
> > > > > > Are you sure about the dmesg part?... bpf_trace_printk is outputting
> > > > > > into /sys/kernel/debug/tracing/trace_pipe.
> > > > >
> > > > > Actually I like bpf_trace_vprintk() name, since it makes it obvious that
> > > >
> > > > It's the inconsistency with bpf_snprintf() and bpf_seq_printf() that's
> > > > mildly annoying (it's f at the end, and no v- prefix). Maybe
> > > > bpf_trace_printf() then? Or is it too close to bpf_trace_printk()?
> > >
> > > bpf_trace_printf could be ok, but see below.
> > >
> > > > But
> > > > either way you would be using BPF_PRINTF() macro for this. And we can
> > > > make that macro use bpf_trace_printk() transparently for <3 args, so
> > > > that new macro works on old kernels.
> > >
> > > Cannot we change the existing bpf_printk() macro to work on old and new kernels?
> >
> > Only if we break backwards compatibility. And I only know how to
> > detect the presence of new helper with CO-RE, which automatically
> > makes any BPF program using this macro CO-RE-dependent, which might
> > not be what users want (vmlinux BTF is still not universally
> > available). If I could do something like that without breaking change
> > and without CO-RE, I'd update bpf_printk() to use `const char *fmt`
> > for format string a long time ago. But adding CO-RE dependency for
> > bpf_printk() seems like a no-go.
>
> I see. Naming is the hardest.
> I think Dave's current choice of lower case bpf_vprintk() macro and
> bpf_trace_vprintk()
> helper fits the existing bpf_printk/bpf_trace_printk the best.
> Yes, it's inconsistent with BPF_SEQ_PRINTF/BPF_SNPRINTF,
> but consistent with trace_printk. Whichever way we go it will be inconsistent.
> Stylistically I like the lower case macro, since it doesn't scream at me.

Ok, it's fine. Even more so because we don't need a new macro, we can
just extend the existing bpf_printk() macro to automatically pick
bpf_trace_printk() if more than 3 arguments is provided.

Dave, you'll have to solve a bit of a puzzle macro-wise, but it's
possible to use either bpf_trace_printk() or bpf_trace_vprintk()
transparently for the user.

The only downside is that for <3 args, for backwards compatibility,
we'd have to stick to

char ___fmt[] = fmt;

vs more efficient

static const char ___fmt[] = fmt;

But I'm thinking it might be time to finally make this improvement. We
can also allow users to fallback to less efficient ways for really old
kernels with some extra flag, like so

#ifdef BPF_NO_GLOBAL_DATA
char ___fmt[] = fmt;
#else
static const char ___fmt[] = fmt;
#end

Thoughts?
Alexei Starovoitov Aug. 24, 2021, 9:28 p.m. UTC | #8
On Tue, Aug 24, 2021 at 2:24 PM Andrii Nakryiko
<andrii.nakryiko@gmail.com> wrote:
>
> On Tue, Aug 24, 2021 at 2:00 PM Alexei Starovoitov
> <alexei.starovoitov@gmail.com> wrote:
> >
> > On Tue, Aug 24, 2021 at 11:24 AM Andrii Nakryiko
> > <andrii.nakryiko@gmail.com> wrote:
> > >
> > > On Tue, Aug 24, 2021 at 11:17 AM Alexei Starovoitov
> > > <alexei.starovoitov@gmail.com> wrote:
> > > >
> > > > On Tue, Aug 24, 2021 at 11:02 AM Andrii Nakryiko
> > > > <andrii.nakryiko@gmail.com> wrote:
> > > > >
> > > > > On Tue, Aug 24, 2021 at 10:57 AM Alexei Starovoitov
> > > > > <alexei.starovoitov@gmail.com> wrote:
> > > > > >
> > > > > > On Mon, Aug 23, 2021 at 9:50 PM Andrii Nakryiko
> > > > > > <andrii.nakryiko@gmail.com> wrote:
> > > > > > >
> > > > > > > On Fri, Aug 20, 2021 at 7:59 PM Dave Marchevsky <davemarchevsky@fb.com> wrote:
> > > > > > > >
> > > > > > > > This helper is meant to be "bpf_trace_printk, but with proper vararg
> > > > > > >
> > > > > > > We have bpf_snprintf() and bpf_seq_printf() names for other BPF
> > > > > > > helpers using the same approach. How about we call this one simply
> > > > > > > `bpf_printf`? It will be in line with other naming, it is logical BPF
> > > > > > > equivalent of user-space printf (which outputs to stderr, which in BPF
> > > > > > > land is /sys/kernel/debug/tracing/trace_pipe). And it will be logical
> > > > > > > to have a nice and short BPF_PRINTF() convenience macro provided by
> > > > > > > libbpf.
> > > > > > >
> > > > > > > > support". Follow bpf_snprintf's example and take a u64 pseudo-vararg
> > > > > > > > array. Write to dmesg using the same mechanism as bpf_trace_printk.
> > > > > > >
> > > > > > > Are you sure about the dmesg part?... bpf_trace_printk is outputting
> > > > > > > into /sys/kernel/debug/tracing/trace_pipe.
> > > > > >
> > > > > > Actually I like bpf_trace_vprintk() name, since it makes it obvious that
> > > > >
> > > > > It's the inconsistency with bpf_snprintf() and bpf_seq_printf() that's
> > > > > mildly annoying (it's f at the end, and no v- prefix). Maybe
> > > > > bpf_trace_printf() then? Or is it too close to bpf_trace_printk()?
> > > >
> > > > bpf_trace_printf could be ok, but see below.
> > > >
> > > > > But
> > > > > either way you would be using BPF_PRINTF() macro for this. And we can
> > > > > make that macro use bpf_trace_printk() transparently for <3 args, so
> > > > > that new macro works on old kernels.
> > > >
> > > > Cannot we change the existing bpf_printk() macro to work on old and new kernels?
> > >
> > > Only if we break backwards compatibility. And I only know how to
> > > detect the presence of new helper with CO-RE, which automatically
> > > makes any BPF program using this macro CO-RE-dependent, which might
> > > not be what users want (vmlinux BTF is still not universally
> > > available). If I could do something like that without breaking change
> > > and without CO-RE, I'd update bpf_printk() to use `const char *fmt`
> > > for format string a long time ago. But adding CO-RE dependency for
> > > bpf_printk() seems like a no-go.
> >
> > I see. Naming is the hardest.
> > I think Dave's current choice of lower case bpf_vprintk() macro and
> > bpf_trace_vprintk()
> > helper fits the existing bpf_printk/bpf_trace_printk the best.
> > Yes, it's inconsistent with BPF_SEQ_PRINTF/BPF_SNPRINTF,
> > but consistent with trace_printk. Whichever way we go it will be inconsistent.
> > Stylistically I like the lower case macro, since it doesn't scream at me.
>
> Ok, it's fine. Even more so because we don't need a new macro, we can
> just extend the existing bpf_printk() macro to automatically pick
> bpf_trace_printk() if more than 3 arguments is provided.
>
> Dave, you'll have to solve a bit of a puzzle macro-wise, but it's
> possible to use either bpf_trace_printk() or bpf_trace_vprintk()
> transparently for the user.
>
> The only downside is that for <3 args, for backwards compatibility,
> we'd have to stick to
>
> char ___fmt[] = fmt;
>
> vs more efficient
>
> static const char ___fmt[] = fmt;
>
> But I'm thinking it might be time to finally make this improvement. We
> can also allow users to fallback to less efficient ways for really old
> kernels with some extra flag, like so
>
> #ifdef BPF_NO_GLOBAL_DATA
> char ___fmt[] = fmt;
> #else
> static const char ___fmt[] = fmt;
> #end
>
> Thoughts?

+1 from me for the latter assuming macro magic is possible.
diff mbox series

Patch

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index be8d57e6e78a..b6c45a6cbbba 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1088,6 +1088,7 @@  bool bpf_prog_array_compatible(struct bpf_array *array, const struct bpf_prog *f
 int bpf_prog_calc_tag(struct bpf_prog *fp);
 
 const struct bpf_func_proto *bpf_get_trace_printk_proto(void);
+const struct bpf_func_proto *bpf_get_trace_vprintk_proto(void);
 
 typedef unsigned long (*bpf_ctx_copy_t)(void *dst, const void *src,
 					unsigned long off, unsigned long len);
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index c4f7892edb2b..899a2649d986 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -4871,6 +4871,28 @@  union bpf_attr {
  * 	Return
  *		Value specified by user at BPF link creation/attachment time
  *		or 0, if it was not specified.
+ *
+ * u64 bpf_trace_vprintk(const char *fmt, u32 fmt_size, const void *data, u32 data_len)
+ *	Description
+ *		Behaves like **bpf_trace_printk**\ () helper, but takes an array of u64
+ *		to format. Supports up to 12 arguments to print in this way.
+ *		The *fmt* and *fmt_size* are for the format string itself. The *data* and
+ *		*data_len* are format string arguments.
+ *
+ *		Each format specifier in **fmt** corresponds to one u64 element
+ *		in the **data** array. For strings and pointers where pointees
+ *		are accessed, only the pointer values are stored in the *data*
+ *		array. The *data_len* is the size of *data* in bytes.
+ *		Formats **%s**, **%p{i,I}{4,6}** requires to read kernel memory.
+ *		Reading kernel memory may fail due to either invalid address or
+ *		valid address but requiring a major memory fault. If reading kernel memory
+ *		fails, the string for **%s** will be an empty string, and the ip
+ *		address for **%p{i,I}{4,6}** will be 0. Not returning error to
+ *		bpf program is consistent with what **bpf_trace_printk**\ () does for now.
+ *
+ *	Return
+ *		The number of bytes written to the buffer, or a negative error
+ *		in case of failure.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -5048,6 +5070,7 @@  union bpf_attr {
 	FN(timer_cancel),		\
 	FN(get_func_ip),		\
 	FN(get_attach_cookie),		\
+	FN(trace_vprintk),		\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 91f24c7b38a1..a137c550046c 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -2357,6 +2357,11 @@  const struct bpf_func_proto * __weak bpf_get_trace_printk_proto(void)
 	return NULL;
 }
 
+const struct bpf_func_proto * __weak bpf_get_trace_vprintk_proto(void)
+{
+	return NULL;
+}
+
 u64 __weak
 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size,
 		 void *ctx, u64 ctx_size, bpf_ctx_copy_t ctx_copy)
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 5ce19b376ef7..863e5ee68558 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -1419,6 +1419,8 @@  bpf_base_func_proto(enum bpf_func_id func_id)
 		return &bpf_snprintf_btf_proto;
 	case BPF_FUNC_snprintf:
 		return &bpf_snprintf_proto;
+	case BPF_FUNC_trace_vprintk:
+		return bpf_get_trace_vprintk_proto();
 	default:
 		return NULL;
 	}
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 2cf4bfa1ab7b..8b3f1ec9e082 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -398,7 +398,7 @@  static const struct bpf_func_proto bpf_trace_printk_proto = {
 	.arg2_type	= ARG_CONST_SIZE,
 };
 
-const struct bpf_func_proto *bpf_get_trace_printk_proto(void)
+static __always_inline void __set_printk_clr_event(void)
 {
 	/*
 	 * This program might be calling bpf_trace_printk,
@@ -410,10 +410,58 @@  const struct bpf_func_proto *bpf_get_trace_printk_proto(void)
 	 */
 	if (trace_set_clr_event("bpf_trace", "bpf_trace_printk", 1))
 		pr_warn_ratelimited("could not enable bpf_trace_printk events");
+}
 
+const struct bpf_func_proto *bpf_get_trace_printk_proto(void)
+{
+	__set_printk_clr_event();
 	return &bpf_trace_printk_proto;
 }
 
+BPF_CALL_4(bpf_trace_vprintk, char *, fmt, u32, fmt_size, const void *, data,
+	   u32, data_len)
+{
+	static char buf[BPF_TRACE_PRINTK_SIZE];
+	unsigned long flags;
+	int ret, num_args;
+	u32 *bin_args;
+
+	if (data_len & 7 || data_len > MAX_BPRINTF_VARARGS * 8 ||
+	    (data_len && !data))
+		return -EINVAL;
+	num_args = data_len / 8;
+
+	ret = bpf_bprintf_prepare(fmt, fmt_size, data, &bin_args, num_args);
+	if (ret < 0)
+		return ret;
+
+	raw_spin_lock_irqsave(&trace_printk_lock, flags);
+	ret = bstr_printf(buf, sizeof(buf), fmt, bin_args);
+
+	trace_bpf_trace_printk(buf);
+	raw_spin_unlock_irqrestore(&trace_printk_lock, flags);
+
+	bpf_bprintf_cleanup();
+
+	return ret;
+}
+
+static const struct bpf_func_proto bpf_trace_vprintk_proto = {
+	.func		= bpf_trace_vprintk,
+	.gpl_only	= true,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_MEM,
+	.arg2_type	= ARG_CONST_SIZE,
+	.arg3_type	= ARG_PTR_TO_MEM_OR_NULL,
+	.arg4_type	= ARG_CONST_SIZE_OR_ZERO,
+};
+
+const struct bpf_func_proto *bpf_get_trace_vprintk_proto(void)
+{
+	__set_printk_clr_event();
+	return &bpf_trace_vprintk_proto;
+}
+
 BPF_CALL_5(bpf_seq_printf, struct seq_file *, m, char *, fmt, u32, fmt_size,
 	   const void *, data, u32, data_len)
 {
@@ -1113,6 +1161,8 @@  bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		return &bpf_snprintf_proto;
 	case BPF_FUNC_get_func_ip:
 		return &bpf_get_func_ip_proto_tracing;
+	case BPF_FUNC_trace_vprintk:
+		return bpf_get_trace_vprintk_proto();
 	default:
 		return bpf_base_func_proto(func_id);
 	}
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index c4f7892edb2b..899a2649d986 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -4871,6 +4871,28 @@  union bpf_attr {
  * 	Return
  *		Value specified by user at BPF link creation/attachment time
  *		or 0, if it was not specified.
+ *
+ * u64 bpf_trace_vprintk(const char *fmt, u32 fmt_size, const void *data, u32 data_len)
+ *	Description
+ *		Behaves like **bpf_trace_printk**\ () helper, but takes an array of u64
+ *		to format. Supports up to 12 arguments to print in this way.
+ *		The *fmt* and *fmt_size* are for the format string itself. The *data* and
+ *		*data_len* are format string arguments.
+ *
+ *		Each format specifier in **fmt** corresponds to one u64 element
+ *		in the **data** array. For strings and pointers where pointees
+ *		are accessed, only the pointer values are stored in the *data*
+ *		array. The *data_len* is the size of *data* in bytes.
+ *		Formats **%s**, **%p{i,I}{4,6}** requires to read kernel memory.
+ *		Reading kernel memory may fail due to either invalid address or
+ *		valid address but requiring a major memory fault. If reading kernel memory
+ *		fails, the string for **%s** will be an empty string, and the ip
+ *		address for **%p{i,I}{4,6}** will be 0. Not returning error to
+ *		bpf program is consistent with what **bpf_trace_printk**\ () does for now.
+ *
+ *	Return
+ *		The number of bytes written to the buffer, or a negative error
+ *		in case of failure.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -5048,6 +5070,7 @@  union bpf_attr {
 	FN(timer_cancel),		\
 	FN(get_func_ip),		\
 	FN(get_attach_cookie),		\
+	FN(trace_vprintk),		\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper