diff mbox series

[bpf-next,v3,1/6] bpf: Factorize bpf_trace_printk and bpf_seq_printf

Message ID 20210412153754.235500-2-revest@chromium.org (mailing list archive)
State Superseded
Delegated to: BPF
Headers show
Series Add a snprintf eBPF helper | expand

Checks

Context Check Description
netdev/cover_letter success Link
netdev/fixes_present success Link
netdev/patch_count success Link
netdev/tree_selection success Clearly marked for bpf-next
netdev/subject_prefix success Link
netdev/cc_maintainers warning 6 maintainers not CCed: netdev@vger.kernel.org mingo@redhat.com kafai@fb.com rostedt@goodmis.org john.fastabend@gmail.com songliubraving@fb.com
netdev/source_inline success Was 0 now: 0
netdev/verify_signedoff success Link
netdev/module_param success Was 0 now: 0
netdev/build_32bit success Errors and warnings before: 33 this patch: 8
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/verify_fixes success Link
netdev/checkpatch warning CHECK: Alignment should match open parenthesis CHECK: Please use a blank line after function/struct/union/enum declarations WARNING: line length of 81 exceeds 80 columns
netdev/build_allmodconfig_warn success Errors and warnings before: 33 this patch: 8
netdev/header_inline success Link

Commit Message

Florent Revest April 12, 2021, 3:37 p.m. UTC
Two helpers (trace_printk and seq_printf) have very similar
implementations of format string parsing and a third one is coming
(snprintf). To avoid code duplication and make the code easier to
maintain, this moves the operations associated with format string
parsing (validation and argument sanitization) into one generic
function.

The implementation of the two existing helpers already drifted quite a
bit so unifying them entailed a lot of changes:

- bpf_trace_printk always expected fmt[fmt_size] to be the terminating
  NULL character, this is no longer true, the first 0 is terminating.
- bpf_trace_printk now supports %% (which produces the percentage char).
- bpf_trace_printk now skips width formating fields.
- bpf_trace_printk now supports the X modifier (capital hexadecimal).
- bpf_trace_printk now supports %pK, %px, %pB, %pi4, %pI4, %pi6 and %pI6
- argument casting on 32 bit has been simplified into one macro and
  using an enum instead of obscure int increments.

- bpf_seq_printf now uses bpf_trace_copy_string instead of
  strncpy_from_kernel_nofault and handles the %pks %pus specifiers.
- bpf_seq_printf now prints longs correctly on 32 bit architectures.

- both were changed to use a global per-cpu tmp buffer instead of one
  stack buffer for trace_printk and 6 small buffers for seq_printf.
- to avoid per-cpu buffer usage conflict, these helpers disable
  preemption while the per-cpu buffer is in use.
- both helpers now support the %ps and %pS specifiers to print symbols.

Signed-off-by: Florent Revest <revest@chromium.org>
---
 kernel/trace/bpf_trace.c | 529 ++++++++++++++++++---------------------
 1 file changed, 248 insertions(+), 281 deletions(-)

Comments

Andrii Nakryiko April 13, 2021, 11:01 p.m. UTC | #1
On Mon, Apr 12, 2021 at 8:38 AM Florent Revest <revest@chromium.org> wrote:
>
> Two helpers (trace_printk and seq_printf) have very similar
> implementations of format string parsing and a third one is coming
> (snprintf). To avoid code duplication and make the code easier to
> maintain, this moves the operations associated with format string
> parsing (validation and argument sanitization) into one generic
> function.
>
> The implementation of the two existing helpers already drifted quite a
> bit so unifying them entailed a lot of changes:
>
> - bpf_trace_printk always expected fmt[fmt_size] to be the terminating
>   NULL character, this is no longer true, the first 0 is terminating.
> - bpf_trace_printk now supports %% (which produces the percentage char).
> - bpf_trace_printk now skips width formating fields.
> - bpf_trace_printk now supports the X modifier (capital hexadecimal).
> - bpf_trace_printk now supports %pK, %px, %pB, %pi4, %pI4, %pi6 and %pI6
> - argument casting on 32 bit has been simplified into one macro and
>   using an enum instead of obscure int increments.
>
> - bpf_seq_printf now uses bpf_trace_copy_string instead of
>   strncpy_from_kernel_nofault and handles the %pks %pus specifiers.
> - bpf_seq_printf now prints longs correctly on 32 bit architectures.
>
> - both were changed to use a global per-cpu tmp buffer instead of one
>   stack buffer for trace_printk and 6 small buffers for seq_printf.
> - to avoid per-cpu buffer usage conflict, these helpers disable
>   preemption while the per-cpu buffer is in use.
> - both helpers now support the %ps and %pS specifiers to print symbols.
>
> Signed-off-by: Florent Revest <revest@chromium.org>
> ---
>  kernel/trace/bpf_trace.c | 529 ++++++++++++++++++---------------------
>  1 file changed, 248 insertions(+), 281 deletions(-)
>

[...]

> +/* Per-cpu temp buffers which can be used by printf-like helpers for %s or %p
> + */
> +#define MAX_PRINTF_BUF_LEN     512
> +
> +struct bpf_printf_buf {
> +       char tmp_buf[MAX_PRINTF_BUF_LEN];
> +};
> +static DEFINE_PER_CPU(struct bpf_printf_buf, bpf_printf_buf);
> +static DEFINE_PER_CPU(int, bpf_printf_buf_used);
> +
> +static int try_get_fmt_tmp_buf(char **tmp_buf)
>  {
> -       static char buf[BPF_TRACE_PRINTK_SIZE];
> -       unsigned long flags;
> -       va_list ap;
> -       int ret;
> +       struct bpf_printf_buf *bufs = this_cpu_ptr(&bpf_printf_buf);

why doing this_cpu_ptr() if below (if *tmp_buf case), you will not use
it. just a waste of CPU, no?

> +       int used;
>
> -       raw_spin_lock_irqsave(&trace_printk_lock, flags);
> -       va_start(ap, fmt);
> -       ret = vsnprintf(buf, sizeof(buf), fmt, ap);
> -       va_end(ap);
> -       /* vsnprintf() will not append null for zero-length strings */
> -       if (ret == 0)
> -               buf[0] = '\0';
> -       trace_bpf_trace_printk(buf);
> -       raw_spin_unlock_irqrestore(&trace_printk_lock, flags);
> +       if (*tmp_buf)
> +               return 0;
>
> -       return ret;
> +       preempt_disable();
> +       used = this_cpu_inc_return(bpf_printf_buf_used);
> +       if (WARN_ON_ONCE(used > 1)) {
> +               this_cpu_dec(bpf_printf_buf_used);
> +               return -EBUSY;
> +       }

get bufs pointer here instead?

> +       *tmp_buf = bufs->tmp_buf;
> +
> +       return 0;
> +}
> +
> +static void put_fmt_tmp_buf(void)
> +{
> +       if (this_cpu_read(bpf_printf_buf_used)) {
> +               this_cpu_dec(bpf_printf_buf_used);
> +               preempt_enable();
> +       }
>  }
>
>  /*
> - * Only limited trace_printk() conversion specifiers allowed:
> - * %d %i %u %x %ld %li %lu %lx %lld %lli %llu %llx %p %pB %pks %pus %s
> + * bpf_parse_fmt_str - Generic pass on format strings for printf-like helpers
> + *
> + * Returns a negative value if fmt is an invalid format string or 0 otherwise.
> + *
> + * This can be used in two ways:
> + * - Format string verification only: when final_args and mod are NULL
> + * - Arguments preparation: in addition to the above verification, it writes in
> + *   final_args a copy of raw_args where pointers from BPF have been sanitized
> + *   into pointers safe to use by snprintf. This also writes in the mod array
> + *   the size requirement of each argument, usable by BPF_CAST_FMT_ARG for ex.
> + *
> + * In argument preparation mode, if 0 is returned, safe temporary buffers are
> + * allocated and put_fmt_tmp_buf should be called to free them after use.
>   */
> -BPF_CALL_5(bpf_trace_printk, char *, fmt, u32, fmt_size, u64, arg1,
> -          u64, arg2, u64, arg3)
> -{
> -       int i, mod[3] = {}, fmt_cnt = 0;
> -       char buf[64], fmt_ptype;
> -       void *unsafe_ptr = NULL;
> -       bool str_seen = false;
> +int bpf_printf_prepare(char *fmt, u32 fmt_size, const u64 *raw_args,
> +                       u64 *final_args, enum bpf_printf_mod_type *mod,
> +                       u32 num_args)
> +{
> +       int err, i, curr_specifier = 0, copy_size;
> +       char *unsafe_ptr = NULL, *tmp_buf = NULL;
> +       size_t tmp_buf_len = MAX_PRINTF_BUF_LEN;
> +       enum bpf_printf_mod_type current_mod;
> +       u64 current_arg;

naming consistency: current_arg vs curr_specifier? maybe just cur_arg
and cur_spec?

> +       char fmt_ptype;
> +
> +       if ((final_args && !mod) || (mod && !final_args))

nit: same check:

if (!!final_args != !!mod)

> +               return -EINVAL;
>
> -       /*
> -        * bpf_check()->check_func_arg()->check_stack_boundary()
> -        * guarantees that fmt points to bpf program stack,
> -        * fmt_size bytes of it were initialized and fmt_size > 0
> -        */
> -       if (fmt[--fmt_size] != 0)
> +       fmt_size = (strnchr(fmt, fmt_size, 0) - fmt);

extra ()

> +       if (!fmt_size)

hm... strnchr() will return NULL if the character is not found, so
fmt_size will be some non-zero value (due to - fmt), how is this
supposed to work?

some negative tests are clearly missing, it seems, if you didn't catch this


>                 return -EINVAL;
>
> -       /* check format string for allowed specifiers */
>         for (i = 0; i < fmt_size; i++) {
> -               if ((!isprint(fmt[i]) && !isspace(fmt[i])) || !isascii(fmt[i]))
> -                       return -EINVAL;
> +               if ((!isprint(fmt[i]) && !isspace(fmt[i])) || !isascii(fmt[i])) {
> +                       err = -EINVAL;
> +                       goto out;
> +               }
>
>                 if (fmt[i] != '%')
>                         continue;
>
> -               if (fmt_cnt >= 3)
> -                       return -EINVAL;
> +               if (fmt[i + 1] == '%') {
> +                       i++;
> +                       continue;
> +               }
> +
> +               if (curr_specifier >= num_args) {
> +                       err = -EINVAL;
> +                       goto out;
> +               }
>
>                 /* fmt[i] != 0 && fmt[last] == 0, so we can access fmt[i + 1] */

a bit outdated comment, last doesn't exist anymore. I think the
comment is trying to say that fmt[i + 1] can be read because in the
worst case it will be a final zero terminator (which we checked
above).

>                 i++;
> -               if (fmt[i] == 'l') {
> -                       mod[fmt_cnt]++;
> +

[...]

> +       err = 0;
> +out:
> +       put_fmt_tmp_buf();

so you are putting tmp_buf unconditionally, even when there was no
error. That seems wrong? Should this be:

if (err)
    put_fmt_tmp_buf()

?

> +       return err;
> +}
> +

[...]
Florent Revest April 14, 2021, 9:56 a.m. UTC | #2
On Wed, Apr 14, 2021 at 1:01 AM Andrii Nakryiko
<andrii.nakryiko@gmail.com> wrote:
> On Mon, Apr 12, 2021 at 8:38 AM Florent Revest <revest@chromium.org> wrote:
> > +/* Per-cpu temp buffers which can be used by printf-like helpers for %s or %p
> > + */
> > +#define MAX_PRINTF_BUF_LEN     512
> > +
> > +struct bpf_printf_buf {
> > +       char tmp_buf[MAX_PRINTF_BUF_LEN];
> > +};
> > +static DEFINE_PER_CPU(struct bpf_printf_buf, bpf_printf_buf);
> > +static DEFINE_PER_CPU(int, bpf_printf_buf_used);
> > +
> > +static int try_get_fmt_tmp_buf(char **tmp_buf)
> >  {
> > -       static char buf[BPF_TRACE_PRINTK_SIZE];
> > -       unsigned long flags;
> > -       va_list ap;
> > -       int ret;
> > +       struct bpf_printf_buf *bufs = this_cpu_ptr(&bpf_printf_buf);
>
> why doing this_cpu_ptr() if below (if *tmp_buf case), you will not use
> it. just a waste of CPU, no?

Sure I can move it past the conditions.

> > +       int used;
> >
> > -       raw_spin_lock_irqsave(&trace_printk_lock, flags);
> > -       va_start(ap, fmt);
> > -       ret = vsnprintf(buf, sizeof(buf), fmt, ap);
> > -       va_end(ap);
> > -       /* vsnprintf() will not append null for zero-length strings */
> > -       if (ret == 0)
> > -               buf[0] = '\0';
> > -       trace_bpf_trace_printk(buf);
> > -       raw_spin_unlock_irqrestore(&trace_printk_lock, flags);
> > +       if (*tmp_buf)
> > +               return 0;
> >
> > -       return ret;
> > +       preempt_disable();
> > +       used = this_cpu_inc_return(bpf_printf_buf_used);
> > +       if (WARN_ON_ONCE(used > 1)) {
> > +               this_cpu_dec(bpf_printf_buf_used);
> > +               return -EBUSY;
> > +       }
>
> get bufs pointer here instead?

Okay :)

> > +       *tmp_buf = bufs->tmp_buf;
> > +
> > +       return 0;
> > +}
> > +
> > +static void put_fmt_tmp_buf(void)
> > +{
> > +       if (this_cpu_read(bpf_printf_buf_used)) {
> > +               this_cpu_dec(bpf_printf_buf_used);
> > +               preempt_enable();
> > +       }
> >  }
> >
> >  /*
> > - * Only limited trace_printk() conversion specifiers allowed:
> > - * %d %i %u %x %ld %li %lu %lx %lld %lli %llu %llx %p %pB %pks %pus %s
> > + * bpf_parse_fmt_str - Generic pass on format strings for printf-like helpers
> > + *
> > + * Returns a negative value if fmt is an invalid format string or 0 otherwise.
> > + *
> > + * This can be used in two ways:
> > + * - Format string verification only: when final_args and mod are NULL
> > + * - Arguments preparation: in addition to the above verification, it writes in
> > + *   final_args a copy of raw_args where pointers from BPF have been sanitized
> > + *   into pointers safe to use by snprintf. This also writes in the mod array
> > + *   the size requirement of each argument, usable by BPF_CAST_FMT_ARG for ex.
> > + *
> > + * In argument preparation mode, if 0 is returned, safe temporary buffers are
> > + * allocated and put_fmt_tmp_buf should be called to free them after use.
> >   */
> > -BPF_CALL_5(bpf_trace_printk, char *, fmt, u32, fmt_size, u64, arg1,
> > -          u64, arg2, u64, arg3)
> > -{
> > -       int i, mod[3] = {}, fmt_cnt = 0;
> > -       char buf[64], fmt_ptype;
> > -       void *unsafe_ptr = NULL;
> > -       bool str_seen = false;
> > +int bpf_printf_prepare(char *fmt, u32 fmt_size, const u64 *raw_args,
> > +                       u64 *final_args, enum bpf_printf_mod_type *mod,
> > +                       u32 num_args)
> > +{
> > +       int err, i, curr_specifier = 0, copy_size;
> > +       char *unsafe_ptr = NULL, *tmp_buf = NULL;
> > +       size_t tmp_buf_len = MAX_PRINTF_BUF_LEN;
> > +       enum bpf_printf_mod_type current_mod;
> > +       u64 current_arg;
>
> naming consistency: current_arg vs curr_specifier? maybe just cur_arg
> and cur_spec?

Ahah, you're right again :)

> > +       char fmt_ptype;
> > +
> > +       if ((final_args && !mod) || (mod && !final_args))
>
> nit: same check:
>
> if (!!final_args != !!mod)

Fancy! :)

> > +               return -EINVAL;
> >
> > -       /*
> > -        * bpf_check()->check_func_arg()->check_stack_boundary()
> > -        * guarantees that fmt points to bpf program stack,
> > -        * fmt_size bytes of it were initialized and fmt_size > 0
> > -        */
> > -       if (fmt[--fmt_size] != 0)
> > +       fmt_size = (strnchr(fmt, fmt_size, 0) - fmt);
>
> extra ()

Oops!

> > +       if (!fmt_size)
>
> hm... strnchr() will return NULL if the character is not found, so
> fmt_size will be some non-zero value (due to - fmt), how is this
> supposed to work?

Ugh!

> some negative tests are clearly missing, it seems, if you didn't catch this

Agree

> >                 return -EINVAL;
> >
> > -       /* check format string for allowed specifiers */
> >         for (i = 0; i < fmt_size; i++) {
> > -               if ((!isprint(fmt[i]) && !isspace(fmt[i])) || !isascii(fmt[i]))
> > -                       return -EINVAL;
> > +               if ((!isprint(fmt[i]) && !isspace(fmt[i])) || !isascii(fmt[i])) {
> > +                       err = -EINVAL;
> > +                       goto out;
> > +               }
> >
> >                 if (fmt[i] != '%')
> >                         continue;
> >
> > -               if (fmt_cnt >= 3)
> > -                       return -EINVAL;
> > +               if (fmt[i + 1] == '%') {
> > +                       i++;
> > +                       continue;
> > +               }
> > +
> > +               if (curr_specifier >= num_args) {
> > +                       err = -EINVAL;
> > +                       goto out;
> > +               }
> >
> >                 /* fmt[i] != 0 && fmt[last] == 0, so we can access fmt[i + 1] */
>
> a bit outdated comment, last doesn't exist anymore. I think the
> comment is trying to say that fmt[i + 1] can be read because in the
> worst case it will be a final zero terminator (which we checked
> above).

Yes that's the idea. I will rewrite it as a sentence if "last" is confusing.

> > +       err = 0;
> > +out:
> > +       put_fmt_tmp_buf();
>
> so you are putting tmp_buf unconditionally, even when there was no
> error. That seems wrong? Should this be:
>
> if (err)
>     put_fmt_tmp_buf()
>
> ?

Yeah the naming is unfortunate, as discussed in the other patch, I
will rename that to bpf_pintf_cleanup instead. It's not clear from the
name that it only "puts" if the buffer was already gotten.
Florent Revest April 14, 2021, 10:56 a.m. UTC | #3
On Wed, Apr 14, 2021 at 11:56 AM Florent Revest <revest@chromium.org> wrote:
> On Wed, Apr 14, 2021 at 1:01 AM Andrii Nakryiko
> <andrii.nakryiko@gmail.com> wrote:
> > On Mon, Apr 12, 2021 at 8:38 AM Florent Revest <revest@chromium.org> wrote:
> > > +       err = 0;
> > > +out:
> > > +       put_fmt_tmp_buf();
> >
> > so you are putting tmp_buf unconditionally, even when there was no
> > error. That seems wrong? Should this be:
> >
> > if (err)
> >     put_fmt_tmp_buf()
> >
> > ?
>
> Yeah the naming is unfortunate, as discussed in the other patch, I
> will rename that to bpf_pintf_cleanup instead. It's not clear from the
> name that it only "puts" if the buffer was already gotten.

Ah, sorry I see what you meant! Indeed, my mistake. :|
diff mbox series

Patch

diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 0d23755c2747..3ce9aeee6681 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -372,7 +372,7 @@  static const struct bpf_func_proto *bpf_get_probe_write_proto(void)
 	return &bpf_probe_write_user_proto;
 }
 
-static void bpf_trace_copy_string(char *buf, void *unsafe_ptr, char fmt_ptype,
+static int bpf_trace_copy_string(char *buf, void *unsafe_ptr, char fmt_ptype,
 		size_t bufsz)
 {
 	void __user *user_ptr = (__force void __user *)unsafe_ptr;
@@ -382,178 +382,292 @@  static void bpf_trace_copy_string(char *buf, void *unsafe_ptr, char fmt_ptype,
 	switch (fmt_ptype) {
 	case 's':
 #ifdef CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
-		if ((unsigned long)unsafe_ptr < TASK_SIZE) {
-			strncpy_from_user_nofault(buf, user_ptr, bufsz);
-			break;
-		}
+		if ((unsigned long)unsafe_ptr < TASK_SIZE)
+			return strncpy_from_user_nofault(buf, user_ptr, bufsz);
 		fallthrough;
 #endif
 	case 'k':
-		strncpy_from_kernel_nofault(buf, unsafe_ptr, bufsz);
-		break;
+		return strncpy_from_kernel_nofault(buf, unsafe_ptr, bufsz);
 	case 'u':
-		strncpy_from_user_nofault(buf, user_ptr, bufsz);
-		break;
+		return strncpy_from_user_nofault(buf, user_ptr, bufsz);
 	}
+
+	return -EINVAL;
 }
 
 static DEFINE_RAW_SPINLOCK(trace_printk_lock);
 
-#define BPF_TRACE_PRINTK_SIZE   1024
+enum bpf_printf_mod_type {
+	BPF_PRINTF_INT,
+	BPF_PRINTF_LONG,
+	BPF_PRINTF_LONG_LONG,
+};
+
+/* Workaround for getting va_list handling working with different argument type
+ * combinations generically for 32 and 64 bit archs.
+ */
+#define BPF_CAST_FMT_ARG(arg_nb, args, mod)				\
+	(mod[arg_nb] == BPF_PRINTF_LONG_LONG ||				\
+	 (mod[arg_nb] == BPF_PRINTF_LONG && __BITS_PER_LONG == 64)	\
+	  ? (u64)args[arg_nb]						\
+	  : (u32)args[arg_nb])
 
-static __printf(1, 0) int bpf_do_trace_printk(const char *fmt, ...)
+/* Per-cpu temp buffers which can be used by printf-like helpers for %s or %p
+ */
+#define MAX_PRINTF_BUF_LEN	512
+
+struct bpf_printf_buf {
+	char tmp_buf[MAX_PRINTF_BUF_LEN];
+};
+static DEFINE_PER_CPU(struct bpf_printf_buf, bpf_printf_buf);
+static DEFINE_PER_CPU(int, bpf_printf_buf_used);
+
+static int try_get_fmt_tmp_buf(char **tmp_buf)
 {
-	static char buf[BPF_TRACE_PRINTK_SIZE];
-	unsigned long flags;
-	va_list ap;
-	int ret;
+	struct bpf_printf_buf *bufs = this_cpu_ptr(&bpf_printf_buf);
+	int used;
 
-	raw_spin_lock_irqsave(&trace_printk_lock, flags);
-	va_start(ap, fmt);
-	ret = vsnprintf(buf, sizeof(buf), fmt, ap);
-	va_end(ap);
-	/* vsnprintf() will not append null for zero-length strings */
-	if (ret == 0)
-		buf[0] = '\0';
-	trace_bpf_trace_printk(buf);
-	raw_spin_unlock_irqrestore(&trace_printk_lock, flags);
+	if (*tmp_buf)
+		return 0;
 
-	return ret;
+	preempt_disable();
+	used = this_cpu_inc_return(bpf_printf_buf_used);
+	if (WARN_ON_ONCE(used > 1)) {
+		this_cpu_dec(bpf_printf_buf_used);
+		return -EBUSY;
+	}
+	*tmp_buf = bufs->tmp_buf;
+
+	return 0;
+}
+
+static void put_fmt_tmp_buf(void)
+{
+	if (this_cpu_read(bpf_printf_buf_used)) {
+		this_cpu_dec(bpf_printf_buf_used);
+		preempt_enable();
+	}
 }
 
 /*
- * Only limited trace_printk() conversion specifiers allowed:
- * %d %i %u %x %ld %li %lu %lx %lld %lli %llu %llx %p %pB %pks %pus %s
+ * bpf_parse_fmt_str - Generic pass on format strings for printf-like helpers
+ *
+ * Returns a negative value if fmt is an invalid format string or 0 otherwise.
+ *
+ * This can be used in two ways:
+ * - Format string verification only: when final_args and mod are NULL
+ * - Arguments preparation: in addition to the above verification, it writes in
+ *   final_args a copy of raw_args where pointers from BPF have been sanitized
+ *   into pointers safe to use by snprintf. This also writes in the mod array
+ *   the size requirement of each argument, usable by BPF_CAST_FMT_ARG for ex.
+ *
+ * In argument preparation mode, if 0 is returned, safe temporary buffers are
+ * allocated and put_fmt_tmp_buf should be called to free them after use.
  */
-BPF_CALL_5(bpf_trace_printk, char *, fmt, u32, fmt_size, u64, arg1,
-	   u64, arg2, u64, arg3)
-{
-	int i, mod[3] = {}, fmt_cnt = 0;
-	char buf[64], fmt_ptype;
-	void *unsafe_ptr = NULL;
-	bool str_seen = false;
+int bpf_printf_prepare(char *fmt, u32 fmt_size, const u64 *raw_args,
+			u64 *final_args, enum bpf_printf_mod_type *mod,
+			u32 num_args)
+{
+	int err, i, curr_specifier = 0, copy_size;
+	char *unsafe_ptr = NULL, *tmp_buf = NULL;
+	size_t tmp_buf_len = MAX_PRINTF_BUF_LEN;
+	enum bpf_printf_mod_type current_mod;
+	u64 current_arg;
+	char fmt_ptype;
+
+	if ((final_args && !mod) || (mod && !final_args))
+		return -EINVAL;
 
-	/*
-	 * bpf_check()->check_func_arg()->check_stack_boundary()
-	 * guarantees that fmt points to bpf program stack,
-	 * fmt_size bytes of it were initialized and fmt_size > 0
-	 */
-	if (fmt[--fmt_size] != 0)
+	fmt_size = (strnchr(fmt, fmt_size, 0) - fmt);
+	if (!fmt_size)
 		return -EINVAL;
 
-	/* check format string for allowed specifiers */
 	for (i = 0; i < fmt_size; i++) {
-		if ((!isprint(fmt[i]) && !isspace(fmt[i])) || !isascii(fmt[i]))
-			return -EINVAL;
+		if ((!isprint(fmt[i]) && !isspace(fmt[i])) || !isascii(fmt[i])) {
+			err = -EINVAL;
+			goto out;
+		}
 
 		if (fmt[i] != '%')
 			continue;
 
-		if (fmt_cnt >= 3)
-			return -EINVAL;
+		if (fmt[i + 1] == '%') {
+			i++;
+			continue;
+		}
+
+		if (curr_specifier >= num_args) {
+			err = -EINVAL;
+			goto out;
+		}
 
 		/* fmt[i] != 0 && fmt[last] == 0, so we can access fmt[i + 1] */
 		i++;
-		if (fmt[i] == 'l') {
-			mod[fmt_cnt]++;
+
+		/* skip optional "[0 +-][num]" width formatting field */
+		while (fmt[i] == '0' || fmt[i] == '+'  || fmt[i] == '-' ||
+		       fmt[i] == ' ')
 			i++;
-		} else if (fmt[i] == 'p') {
-			mod[fmt_cnt]++;
-			if ((fmt[i + 1] == 'k' ||
-			     fmt[i + 1] == 'u') &&
+		if (fmt[i] >= '1' && fmt[i] <= '9') {
+			i++;
+			while (fmt[i] >= '0' && fmt[i] <= '9')
+				i++;
+		}
+
+		if (fmt[i] == 'p') {
+			current_mod = BPF_PRINTF_LONG;
+
+			if ((fmt[i + 1] == 'k' || fmt[i + 1] == 'u') &&
 			    fmt[i + 2] == 's') {
 				fmt_ptype = fmt[i + 1];
 				i += 2;
 				goto fmt_str;
 			}
 
-			if (fmt[i + 1] == 'B') {
-				i++;
+			if (fmt[i + 1] == 0 || isspace(fmt[i + 1]) ||
+			    ispunct(fmt[i + 1]) || fmt[i + 1] == 'K' ||
+			    fmt[i + 1] == 'x' || fmt[i + 1] == 'B' ||
+			    fmt[i + 1] == 's' || fmt[i + 1] == 'S') {
+				/* just kernel pointers */
+				if (final_args)
+					current_arg = raw_args[curr_specifier];
 				goto fmt_next;
 			}
 
-			/* disallow any further format extensions */
-			if (fmt[i + 1] != 0 &&
-			    !isspace(fmt[i + 1]) &&
-			    !ispunct(fmt[i + 1]))
-				return -EINVAL;
+			/* only support "%pI4", "%pi4", "%pI6" and "%pi6". */
+			if ((fmt[i + 1] != 'i' && fmt[i + 1] != 'I') ||
+			    (fmt[i + 2] != '4' && fmt[i + 2] != '6')) {
+				err = -EINVAL;
+				goto out;
+			}
+
+			i += 2;
+			if (!final_args)
+				goto fmt_next;
+
+			if (try_get_fmt_tmp_buf(&tmp_buf)) {
+				err = -EBUSY;
+				goto out;
+			}
+
+			copy_size = (fmt[i + 2] == '4') ? 4 : 16;
+			if (tmp_buf_len < copy_size) {
+				err = -ENOSPC;
+				goto out;
+			}
+
+			unsafe_ptr = (char *)(long)raw_args[curr_specifier];
+			err = copy_from_kernel_nofault(tmp_buf, unsafe_ptr,
+						       copy_size);
+			if (err < 0)
+				memset(tmp_buf, 0, copy_size);
+			current_arg = (u64)(long)tmp_buf;
+			tmp_buf += copy_size;
+			tmp_buf_len -= copy_size;
 
 			goto fmt_next;
 		} else if (fmt[i] == 's') {
-			mod[fmt_cnt]++;
+			current_mod = BPF_PRINTF_LONG;
 			fmt_ptype = fmt[i];
 fmt_str:
-			if (str_seen)
-				/* allow only one '%s' per fmt string */
-				return -EINVAL;
-			str_seen = true;
-
 			if (fmt[i + 1] != 0 &&
 			    !isspace(fmt[i + 1]) &&
-			    !ispunct(fmt[i + 1]))
-				return -EINVAL;
+			    !ispunct(fmt[i + 1])) {
+				err = -EINVAL;
+				goto out;
+			}
 
-			switch (fmt_cnt) {
-			case 0:
-				unsafe_ptr = (void *)(long)arg1;
-				arg1 = (long)buf;
-				break;
-			case 1:
-				unsafe_ptr = (void *)(long)arg2;
-				arg2 = (long)buf;
-				break;
-			case 2:
-				unsafe_ptr = (void *)(long)arg3;
-				arg3 = (long)buf;
-				break;
+			if (!final_args)
+				goto fmt_next;
+
+			if (try_get_fmt_tmp_buf(&tmp_buf)) {
+				err = -EBUSY;
+				goto out;
+			}
+
+			if (!tmp_buf_len) {
+				err = -ENOSPC;
+				goto out;
 			}
 
-			bpf_trace_copy_string(buf, unsafe_ptr, fmt_ptype,
-					sizeof(buf));
+			unsafe_ptr = (char *)(long)raw_args[curr_specifier];
+			err = bpf_trace_copy_string(tmp_buf, unsafe_ptr,
+						    fmt_ptype, tmp_buf_len);
+			if (err < 0) {
+				tmp_buf[0] = '\0';
+				err = 1;
+			}
+
+			current_arg = (u64)(long)tmp_buf;
+			tmp_buf += err;
+			tmp_buf_len -= err;
+
 			goto fmt_next;
 		}
 
+		current_mod = BPF_PRINTF_INT;
+
+		if (fmt[i] == 'l') {
+			current_mod = BPF_PRINTF_LONG;
+			i++;
+		}
 		if (fmt[i] == 'l') {
-			mod[fmt_cnt]++;
+			current_mod = BPF_PRINTF_LONG_LONG;
 			i++;
 		}
 
-		if (fmt[i] != 'i' && fmt[i] != 'd' &&
-		    fmt[i] != 'u' && fmt[i] != 'x')
-			return -EINVAL;
+		if (fmt[i] != 'i' && fmt[i] != 'd' && fmt[i] != 'u' &&
+		    fmt[i] != 'x' && fmt[i] != 'X') {
+			err = -EINVAL;
+			goto out;
+		}
+
+		if (final_args)
+			current_arg = raw_args[curr_specifier];
 fmt_next:
-		fmt_cnt++;
+		if (final_args) {
+			mod[curr_specifier] = current_mod;
+			final_args[curr_specifier] = current_arg;
+		}
+		curr_specifier++;
 	}
 
-/* Horrid workaround for getting va_list handling working with different
- * argument type combinations generically for 32 and 64 bit archs.
- */
-#define __BPF_TP_EMIT()	__BPF_ARG3_TP()
-#define __BPF_TP(...)							\
-	bpf_do_trace_printk(fmt, ##__VA_ARGS__)
-
-#define __BPF_ARG1_TP(...)						\
-	((mod[0] == 2 || (mod[0] == 1 && __BITS_PER_LONG == 64))	\
-	  ? __BPF_TP(arg1, ##__VA_ARGS__)				\
-	  : ((mod[0] == 1 || (mod[0] == 0 && __BITS_PER_LONG == 32))	\
-	      ? __BPF_TP((long)arg1, ##__VA_ARGS__)			\
-	      : __BPF_TP((u32)arg1, ##__VA_ARGS__)))
-
-#define __BPF_ARG2_TP(...)						\
-	((mod[1] == 2 || (mod[1] == 1 && __BITS_PER_LONG == 64))	\
-	  ? __BPF_ARG1_TP(arg2, ##__VA_ARGS__)				\
-	  : ((mod[1] == 1 || (mod[1] == 0 && __BITS_PER_LONG == 32))	\
-	      ? __BPF_ARG1_TP((long)arg2, ##__VA_ARGS__)		\
-	      : __BPF_ARG1_TP((u32)arg2, ##__VA_ARGS__)))
-
-#define __BPF_ARG3_TP(...)						\
-	((mod[2] == 2 || (mod[2] == 1 && __BITS_PER_LONG == 64))	\
-	  ? __BPF_ARG2_TP(arg3, ##__VA_ARGS__)				\
-	  : ((mod[2] == 1 || (mod[2] == 0 && __BITS_PER_LONG == 32))	\
-	      ? __BPF_ARG2_TP((long)arg3, ##__VA_ARGS__)		\
-	      : __BPF_ARG2_TP((u32)arg3, ##__VA_ARGS__)))
-
-	return __BPF_TP_EMIT();
+	err = 0;
+out:
+	put_fmt_tmp_buf();
+	return err;
+}
+
+#define MAX_TRACE_PRINTK_VARARGS	3
+#define BPF_TRACE_PRINTK_SIZE		1024
+
+BPF_CALL_5(bpf_trace_printk, char *, fmt, u32, fmt_size, u64, arg1,
+	   u64, arg2, u64, arg3)
+{
+	u64 args[MAX_TRACE_PRINTK_VARARGS] = { arg1, arg2, arg3 };
+	enum bpf_printf_mod_type mod[MAX_TRACE_PRINTK_VARARGS];
+	static char buf[BPF_TRACE_PRINTK_SIZE];
+	unsigned long flags;
+	int ret;
+
+	ret = bpf_printf_prepare(fmt, fmt_size, args, args, mod,
+				 MAX_TRACE_PRINTK_VARARGS);
+	if (ret < 0)
+		return ret;
+
+	ret = snprintf(buf, sizeof(buf), fmt, BPF_CAST_FMT_ARG(0, args, mod),
+		BPF_CAST_FMT_ARG(1, args, mod), BPF_CAST_FMT_ARG(2, args, mod));
+	/* snprintf() will not append null for zero-length strings */
+	if (ret == 0)
+		buf[0] = '\0';
+
+	raw_spin_lock_irqsave(&trace_printk_lock, flags);
+	trace_bpf_trace_printk(buf);
+	raw_spin_unlock_irqrestore(&trace_printk_lock, flags);
+
+	put_fmt_tmp_buf();
+
+	return ret;
 }
 
 static const struct bpf_func_proto bpf_trace_printk_proto = {
@@ -581,184 +695,37 @@  const struct bpf_func_proto *bpf_get_trace_printk_proto(void)
 }
 
 #define MAX_SEQ_PRINTF_VARARGS		12
-#define MAX_SEQ_PRINTF_MAX_MEMCPY	6
-#define MAX_SEQ_PRINTF_STR_LEN		128
-
-struct bpf_seq_printf_buf {
-	char buf[MAX_SEQ_PRINTF_MAX_MEMCPY][MAX_SEQ_PRINTF_STR_LEN];
-};
-static DEFINE_PER_CPU(struct bpf_seq_printf_buf, bpf_seq_printf_buf);
-static DEFINE_PER_CPU(int, bpf_seq_printf_buf_used);
 
 BPF_CALL_5(bpf_seq_printf, struct seq_file *, m, char *, fmt, u32, fmt_size,
 	   const void *, data, u32, data_len)
 {
-	int err = -EINVAL, fmt_cnt = 0, memcpy_cnt = 0;
-	int i, buf_used, copy_size, num_args;
-	u64 params[MAX_SEQ_PRINTF_VARARGS];
-	struct bpf_seq_printf_buf *bufs;
-	const u64 *args = data;
-
-	buf_used = this_cpu_inc_return(bpf_seq_printf_buf_used);
-	if (WARN_ON_ONCE(buf_used > 1)) {
-		err = -EBUSY;
-		goto out;
-	}
-
-	bufs = this_cpu_ptr(&bpf_seq_printf_buf);
-
-	/*
-	 * bpf_check()->check_func_arg()->check_stack_boundary()
-	 * guarantees that fmt points to bpf program stack,
-	 * fmt_size bytes of it were initialized and fmt_size > 0
-	 */
-	if (fmt[--fmt_size] != 0)
-		goto out;
-
-	if (data_len & 7)
-		goto out;
-
-	for (i = 0; i < fmt_size; i++) {
-		if (fmt[i] == '%') {
-			if (fmt[i + 1] == '%')
-				i++;
-			else if (!data || !data_len)
-				goto out;
-		}
-	}
+	enum bpf_printf_mod_type mod[MAX_SEQ_PRINTF_VARARGS];
+	u64 args[MAX_SEQ_PRINTF_VARARGS];
+	int err, num_args;
 
+	if (data_len & 7 || data_len > MAX_SEQ_PRINTF_VARARGS * 8 ||
+	    (data_len && !data))
+		return -EINVAL;
 	num_args = data_len / 8;
 
-	/* check format string for allowed specifiers */
-	for (i = 0; i < fmt_size; i++) {
-		/* only printable ascii for now. */
-		if ((!isprint(fmt[i]) && !isspace(fmt[i])) || !isascii(fmt[i])) {
-			err = -EINVAL;
-			goto out;
-		}
-
-		if (fmt[i] != '%')
-			continue;
-
-		if (fmt[i + 1] == '%') {
-			i++;
-			continue;
-		}
-
-		if (fmt_cnt >= MAX_SEQ_PRINTF_VARARGS) {
-			err = -E2BIG;
-			goto out;
-		}
-
-		if (fmt_cnt >= num_args) {
-			err = -EINVAL;
-			goto out;
-		}
-
-		/* fmt[i] != 0 && fmt[last] == 0, so we can access fmt[i + 1] */
-		i++;
-
-		/* skip optional "[0 +-][num]" width formating field */
-		while (fmt[i] == '0' || fmt[i] == '+'  || fmt[i] == '-' ||
-		       fmt[i] == ' ')
-			i++;
-		if (fmt[i] >= '1' && fmt[i] <= '9') {
-			i++;
-			while (fmt[i] >= '0' && fmt[i] <= '9')
-				i++;
-		}
-
-		if (fmt[i] == 's') {
-			void *unsafe_ptr;
-
-			/* try our best to copy */
-			if (memcpy_cnt >= MAX_SEQ_PRINTF_MAX_MEMCPY) {
-				err = -E2BIG;
-				goto out;
-			}
-
-			unsafe_ptr = (void *)(long)args[fmt_cnt];
-			err = strncpy_from_kernel_nofault(bufs->buf[memcpy_cnt],
-					unsafe_ptr, MAX_SEQ_PRINTF_STR_LEN);
-			if (err < 0)
-				bufs->buf[memcpy_cnt][0] = '\0';
-			params[fmt_cnt] = (u64)(long)bufs->buf[memcpy_cnt];
-
-			fmt_cnt++;
-			memcpy_cnt++;
-			continue;
-		}
-
-		if (fmt[i] == 'p') {
-			if (fmt[i + 1] == 0 ||
-			    fmt[i + 1] == 'K' ||
-			    fmt[i + 1] == 'x' ||
-			    fmt[i + 1] == 'B') {
-				/* just kernel pointers */
-				params[fmt_cnt] = args[fmt_cnt];
-				fmt_cnt++;
-				continue;
-			}
-
-			/* only support "%pI4", "%pi4", "%pI6" and "%pi6". */
-			if (fmt[i + 1] != 'i' && fmt[i + 1] != 'I') {
-				err = -EINVAL;
-				goto out;
-			}
-			if (fmt[i + 2] != '4' && fmt[i + 2] != '6') {
-				err = -EINVAL;
-				goto out;
-			}
-
-			if (memcpy_cnt >= MAX_SEQ_PRINTF_MAX_MEMCPY) {
-				err = -E2BIG;
-				goto out;
-			}
-
-
-			copy_size = (fmt[i + 2] == '4') ? 4 : 16;
-
-			err = copy_from_kernel_nofault(bufs->buf[memcpy_cnt],
-						(void *) (long) args[fmt_cnt],
-						copy_size);
-			if (err < 0)
-				memset(bufs->buf[memcpy_cnt], 0, copy_size);
-			params[fmt_cnt] = (u64)(long)bufs->buf[memcpy_cnt];
-
-			i += 2;
-			fmt_cnt++;
-			memcpy_cnt++;
-			continue;
-		}
-
-		if (fmt[i] == 'l') {
-			i++;
-			if (fmt[i] == 'l')
-				i++;
-		}
-
-		if (fmt[i] != 'i' && fmt[i] != 'd' &&
-		    fmt[i] != 'u' && fmt[i] != 'x' &&
-		    fmt[i] != 'X') {
-			err = -EINVAL;
-			goto out;
-		}
-
-		params[fmt_cnt] = args[fmt_cnt];
-		fmt_cnt++;
-	}
+	err = bpf_printf_prepare(fmt, fmt_size, data, args, mod, num_args);
+	if (err < 0)
+		return err;
 
 	/* Maximumly we can have MAX_SEQ_PRINTF_VARARGS parameter, just give
 	 * all of them to seq_printf().
 	 */
-	seq_printf(m, fmt, params[0], params[1], params[2], params[3],
-		   params[4], params[5], params[6], params[7], params[8],
-		   params[9], params[10], params[11]);
+	seq_printf(m, fmt, BPF_CAST_FMT_ARG(0, args, mod),
+		BPF_CAST_FMT_ARG(1, args, mod), BPF_CAST_FMT_ARG(2, args, mod),
+		BPF_CAST_FMT_ARG(3, args, mod), BPF_CAST_FMT_ARG(4, args, mod),
+		BPF_CAST_FMT_ARG(5, args, mod), BPF_CAST_FMT_ARG(6, args, mod),
+		BPF_CAST_FMT_ARG(7, args, mod), BPF_CAST_FMT_ARG(8, args, mod),
+		BPF_CAST_FMT_ARG(9, args, mod), BPF_CAST_FMT_ARG(10, args, mod),
+		BPF_CAST_FMT_ARG(11, args, mod));
 
-	err = seq_has_overflowed(m) ? -EOVERFLOW : 0;
-out:
-	this_cpu_dec(bpf_seq_printf_buf_used);
-	return err;
+	put_fmt_tmp_buf();
+
+	return seq_has_overflowed(m) ? -EOVERFLOW : 0;
 }
 
 BTF_ID_LIST_SINGLE(btf_seq_file_ids, struct, seq_file)