diff mbox series

[v4,05/34] vsprintf: %pf(%p)

Message ID 20220620004233.3805-6-kent.overstreet@gmail.com (mailing list archive)
State New
Headers show
Series Printbufs - new data structure for building strings | expand

Commit Message

Kent Overstreet June 20, 2022, 12:42 a.m. UTC
This implements two new format strings: both do the same thing, one more
compatible with current gcc format string checking, the other that we'd
like to standardize:

 %pf(%p) - more compatible
 %(%p)  - more prettier

Both can take variable numbers of arguments, i.e. %(%p,%p,%p).

They're used to indicate that snprintf or pr_buf should interpret the
next argument as a pretty-printer function to call, and subsequent
arguments within the parentheses should be passed to the pretty-printer.

A pretty printer takes as its first argument a printbuf, and then zero
or more pointer arguments - integer arguments are not (currently) supported.

Example usage:

static void foo_to_text(struct printbuf *out, struct foo *foo)
{
	pr_buf(out, "bar=%u baz=%u", foo->bar, foo->baz);
}

printf("%(%p)", foo_to_text, foo);

The goal is to replace most of our %p format extensions with this
interface, and to move pretty-printers out of the core vsprintf.c code -
this will get us better organization and better discoverability (you'll
be able to cscope to pretty printer calls!), as well as eliminate a lot
of dispatch code in vsprintf.c.

Currently, we can only call pretty printers with pointer arguments. This
could be changed to also allow at least integer arguments in the future
by using libffi.

Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
Reviewed-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
 Documentation/core-api/printk-formats.rst | 22 ++++++
 lib/test_printf.c                         | 27 ++++++++
 lib/vsprintf.c                            | 83 ++++++++++++++++++++++-
 3 files changed, 130 insertions(+), 2 deletions(-)

Comments

Rasmus Villemoes June 21, 2022, 7:04 a.m. UTC | #1
On 20/06/2022 02.42, Kent Overstreet wrote:
> This implements two new format strings: both do the same thing, one more
> compatible with current gcc format string checking, the other that we'd
> like to standardize:
> 
>  %pf(%p) - more compatible
>  %(%p)  - more prettier

No.

> diff --git a/Documentation/core-api/printk-formats.rst b/Documentation/core-api/printk-formats.rst
> index 5e89497ba3..8fc0b62af1 100644
> --- a/Documentation/core-api/printk-formats.rst
> +++ b/Documentation/core-api/printk-formats.rst
> @@ -625,6 +625,28 @@ Examples::
>  	%p4cc	Y10  little-endian (0x20303159)
>  	%p4cc	NV12 big-endian (0xb231564e)
>  
> +Calling a pretty printer function
> +---------------------------------
> +
> +::
> +
> +        %pf(%p)     pretty printer function taking one argument
> +        %pf(%p,%p)  pretty printer function taking two arguments
> +
> +For calling generic pretty printers. A pretty printer is a function that takes
> +as its first argument a pointer to a printbuf, and then zero or more additional
> +pointer arguments. For example:
> +
> +        void foo_to_text(struct printbuf *out, struct foo *foo)
> +        {
> +                pr_buf(out, "bar=%u baz=%u", foo->bar, foo->baz);
> +        }
> +
> +        printf("%pf(%p)", foo_to_text, foo);
> +
> +Note that a pretty-printer may not sleep, if called from printk(). If called
> +from pr_buf() or sprintf() there are no such restrictions.

I know what you're trying to say, but if the sprintf() call itself is
from a non-sleepable context this is obviously not true. So please just
make the rule "A pretty-printer must not sleep.". That's much simpler
and less error-prone. Otherwise I guarantee you that somebody is going
to add a sleeping pretty-printer for their own need, use it in a couple
of safe places, and then somebody wants to add a printk() in that driver
and sees "hey, I can get all this state dumped very easily with this
pretty-printer".

> diff --git a/lib/vsprintf.c b/lib/vsprintf.c
> index 7b24714674..5afa74dda5 100644
> --- a/lib/vsprintf.c
> +++ b/lib/vsprintf.c
> @@ -436,7 +436,8 @@ enum format_type {
>  	FORMAT_TYPE_UINT,
>  	FORMAT_TYPE_INT,
>  	FORMAT_TYPE_SIZE_T,
> -	FORMAT_TYPE_PTRDIFF
> +	FORMAT_TYPE_PTRDIFF,
> +	FORMAT_TYPE_FN,
>  };
>  
>  struct printf_spec {
> @@ -2520,7 +2521,16 @@ int format_decode(const char *fmt, struct printf_spec *spec)
>  		return ++fmt - start;
>  
>  	case 'p':
> -		spec->type = FORMAT_TYPE_PTR;
> +		fmt++;
> +		if (fmt[0] == 'f' &&
> +		    fmt[1] == '(') {
> +			fmt += 2;
> +			spec->type = FORMAT_TYPE_FN;
> +		} else
> +			spec->type = FORMAT_TYPE_PTR;
> +		return fmt - start;
> +	case '(':
> +		spec->type = FORMAT_TYPE_FN;
>  		return ++fmt - start;

NAK. Don't implement something that will never be tested nor used.
There's not a snowball's chance in hell that we'll ever build the kernel
without -Wformat.

>  
> +static void call_prt_fn(struct printbuf *out, void *fn, void **fn_args, unsigned nr_args)
> +{
> +	typedef void (*printf_fn_0)(struct printbuf *);
> +	typedef void (*printf_fn_1)(struct printbuf *, void *);
> +	typedef void (*printf_fn_2)(struct printbuf *, void *, void *);
> +	typedef void (*printf_fn_3)(struct printbuf *, void *, void *, void *);
> +	typedef void (*printf_fn_4)(struct printbuf *, void *, void *, void *, void *);
> +	typedef void (*printf_fn_5)(struct printbuf *, void *, void *, void *, void *, void *);
> +	typedef void (*printf_fn_6)(struct printbuf *, void *, void *, void *, void *, void *, void *);
> +	typedef void (*printf_fn_7)(struct printbuf *, void *, void *, void *, void *, void *, void *, void *);
> +	typedef void (*printf_fn_8)(struct printbuf *, void *, void *, void *, void *, void *, void *, void *, void *);

Sorry, but this is way too ugly, and the prospect of at some point in
the future invoking libffi to do something even naster... eww. We do not
need more functions with completely generic prototypes with no
typechecking and making it extremely hard to teach one of our static
analyzers (smatch has some %pX checking) to do that typechecking.

There are at least two ways you can achieve this passing of a variable
number of arguments with proper types.

(1) Each pretty-printer comes with a struct wrapping up its real
arguments and a macro for creating a compound literal passing those
arguments.

struct foo_pp {
  void (*func)(struct printbuf *pb, void *ctx); /* always first */
  int x;
  long y;
};
void foo_pp(struct printbuf *pb, void *ctx)
{
  struct foo_pp *f = ctx;
  pr_printf(pb, "%d %ld", f->x, f->y);
}

#define FOO_PP(_x, _y) (struct foo_pp){.func = foo_pp, .x = (_x), .y = (_y)}

printk("bla bla %pf\n", &FOO_PP(aa, bb));

(2) Let the pretty-printer itself extract the varargs it expects. To
portably pass around a va_list by reference it needs to be wrapped, so
this would be

struct wva { va_list ap; };

void foo_pp(struct printbuf *pb, struct wva *w)
{
  int x = va_arg(w->ap, int);
  long y = va_arg(w->ap, long);
  pr_printf(pb, "%d %ld", x, y);
}

printk("bla bla %pf(%d, %ld)\n", foo_pp, aa, bb)

with the core printf implementation internally using such a wrapped
va_list, and after a %pf( relying on the pretty-printer having consumed
the arguments up until the closing ). It would probably be a good idea
to give the pretty-printer a pointer to that opening '(' or one-past-it
as well so it could do a sanity check.

So the "core" implementation would be (or the printbuf version of the same)

wsnprintf(char *buf, size_t len, const char *fmt, struct wva *w)
{ ... }

while e.g. snprintf and vsnprintf would be

snprintf(char *buf, size_t len, const char *fmt, ...)
{
  struct wva w;
  int ret;
  va_start(w.ap, fmt);
  ret = wsnprintf(buf, len, fmt, &w);
  va_end(w.ap);
  return ret;
}
vsnprintf(char *buf, size_t len, const char *fmt, struct va_list ap)
{
  struct wva w;
  int ret;
  va_copy(w.ap, ap);
  ret = wsnprintf(buf, len, fmt, &w);
  va_end(w.ap);
  return ret;
}

[snprintf could as usual be implemented in terms of vsnprintf, but the
above would eliminate the state copying].

Rasmus
Kent Overstreet June 21, 2022, 7:51 a.m. UTC | #2
On Tue, Jun 21, 2022 at 09:04:38AM +0200, Rasmus Villemoes wrote:
> On 20/06/2022 02.42, Kent Overstreet wrote:
> > +Note that a pretty-printer may not sleep, if called from printk(). If called
> > +from pr_buf() or sprintf() there are no such restrictions.
> 
> I know what you're trying to say, but if the sprintf() call itself is
> from a non-sleepable context this is obviously not true. So please just
> make the rule "A pretty-printer must not sleep.". That's much simpler
> and less error-prone. Otherwise I guarantee you that somebody is going
> to add a sleeping pretty-printer for their own need, use it in a couple
> of safe places, and then somebody wants to add a printk() in that driver
> and sees "hey, I can get all this state dumped very easily with this
> pretty-printer".

Kernel programmers are used to having to consider the context they're in and
what the functions they're calling might do, a pretty-printer being called
indirectly via sprintf() is absolutely no different. 

> >  struct printf_spec {
> > @@ -2520,7 +2521,16 @@ int format_decode(const char *fmt, struct printf_spec *spec)
> >  		return ++fmt - start;
> >  
> >  	case 'p':
> > -		spec->type = FORMAT_TYPE_PTR;
> > +		fmt++;
> > +		if (fmt[0] == 'f' &&
> > +		    fmt[1] == '(') {
> > +			fmt += 2;
> > +			spec->type = FORMAT_TYPE_FN;
> > +		} else
> > +			spec->type = FORMAT_TYPE_PTR;
> > +		return fmt - start;
> > +	case '(':
> > +		spec->type = FORMAT_TYPE_FN;
> >  		return ++fmt - start;
> 
> NAK. Don't implement something that will never be tested nor used.
> There's not a snowball's chance in hell that we'll ever build the kernel
> without -Wformat.

We're not stopping here. Matthew is taking this to WG14 and I'll be working on
adding this functionality to glibc next, and %() is the syntax we intend to take
to the working group.

But the working group is naturally going to want to see that a working
implementation of it exists.

> Sorry, but this is way too ugly, and the prospect of at some point in
> the future invoking libffi to do something even naster... eww. We do not
> need more functions with completely generic prototypes with no
> typechecking and making it extremely hard to teach one of our static
> analyzers (smatch has some %pX checking) to do that typechecking.
> 
> There are at least two ways you can achieve this passing of a variable
> number of arguments with proper types.
> 
> (1) Each pretty-printer comes with a struct wrapping up its real
> arguments and a macro for creating a compound literal passing those
> arguments.
> 
> struct foo_pp {
>   void (*func)(struct printbuf *pb, void *ctx); /* always first */
>   int x;
>   long y;
> };
> void foo_pp(struct printbuf *pb, void *ctx)
> {
>   struct foo_pp *f = ctx;
>   pr_printf(pb, "%d %ld", f->x, f->y);
> }
> 
> #define FOO_PP(_x, _y) (struct foo_pp){.func = foo_pp, .x = (_x), .y = (_y)}
> 
> printk("bla bla %pf\n", &FOO_PP(aa, bb));

Hellllllllll no.

All that's missing right now is gcc checking that the function signature matches
the args specified within the (). That will come. No way in hell am I going to
implement some half baked hacked up macro crap - I intend to do this right.

> (2) Let the pretty-printer itself extract the varargs it expects. To
> portably pass around a va_list by reference it needs to be wrapped, so
> this would be
> 
> struct wva { va_list ap; };
> 
> void foo_pp(struct printbuf *pb, struct wva *w)
> {
>   int x = va_arg(w->ap, int);
>   long y = va_arg(w->ap, long);
>   pr_printf(pb, "%d %ld", x, y);
> }
> 
> printk("bla bla %pf(%d, %ld)\n", foo_pp, aa, bb)
> 
> with the core printf implementation internally using such a wrapped
> va_list, and after a %pf( relying on the pretty-printer having consumed
> the arguments up until the closing ). It would probably be a good idea
> to give the pretty-printer a pointer to that opening '(' or one-past-it
> as well so it could do a sanity check.

Also no. Varargs is terrible in most situations, and should only be used for
functions that actually want to take variable numbers of arguments - that
doesn't apply to pretty printers.
Rasmus Villemoes June 21, 2022, 8:47 a.m. UTC | #3
On 21/06/2022 09.51, Kent Overstreet wrote:

>>>  struct printf_spec {
>>> @@ -2520,7 +2521,16 @@ int format_decode(const char *fmt, struct printf_spec *spec)
>>>  		return ++fmt - start;
>>>  
>>>  	case 'p':
>>> -		spec->type = FORMAT_TYPE_PTR;
>>> +		fmt++;
>>> +		if (fmt[0] == 'f' &&
>>> +		    fmt[1] == '(') {
>>> +			fmt += 2;
>>> +			spec->type = FORMAT_TYPE_FN;
>>> +		} else
>>> +			spec->type = FORMAT_TYPE_PTR;
>>> +		return fmt - start;
>>> +	case '(':
>>> +		spec->type = FORMAT_TYPE_FN;
>>>  		return ++fmt - start;
>>
>> NAK. Don't implement something that will never be tested nor used.
>> There's not a snowball's chance in hell that we'll ever build the kernel
>> without -Wformat.
> 
> We're not stopping here. Matthew is taking this to WG14 and I'll be working on
> adding this functionality to glibc next, and %() is the syntax we intend to take
> to the working group.
> 
> But the working group is naturally going to want to see that a working
> implementation of it exists.

OK. But implementation in glibc, musl and/or the kernel is much much
less interesting than support in gcc and clang for accepting %( in the
first place, and preferably also for not just treating %( as a synonym
for %p but actually checking that the argument is a function pointer and
 that its signature matches the arguments between (). Once _that_ has
landed in gcc 24 and the kernel requirements have been bumped to that
can we consider adding "%(" format strings (and the necessary support).

Yes, I appreciate the chicken-and-egg situation. No, I don't think the
mainline kernel is a suitable place for proof-of-concept and highly
experimental code.

Rasmus
David Laight June 21, 2022, 11:11 a.m. UTC | #4
...
> > +Calling a pretty printer function
> > +---------------------------------
> > +
> > +::
> > +
> > +        %pf(%p)     pretty printer function taking one argument
> > +        %pf(%p,%p)  pretty printer function taking two arguments
> > +
> > +For calling generic pretty printers. A pretty printer is a function that takes
> > +as its first argument a pointer to a printbuf, and then zero or more additional
> > +pointer arguments. For example:
> > +
> > +        void foo_to_text(struct printbuf *out, struct foo *foo)
> > +        {
> > +                pr_buf(out, "bar=%u baz=%u", foo->bar, foo->baz);
> > +        }
> > +
> > +        printf("%pf(%p)", foo_to_text, foo);
> > +
> > +Note that a pretty-printer may not sleep, if called from printk(). If called
> > +from pr_buf() or sprintf() there are no such restrictions.

I've lost the original email :-)

If you are going to implement this foo_to_text() needs to
be passed the field width, precision and flags.

Is there a real use for multiple arguments %pf(%p,%p) that can't
be implemented by requiring the caller use a relay structure?
That (sort of) solves the problem of people expecting to be
able to pass integers though.

An alternative would be using an array of a union type
to pass through the values extracted from the original va_list.

Or pass the first as a pointer (to get the possibility of compile
time format checking and any others as a union[].

	David

-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
Registration No: 1397386 (Wales)
diff mbox series

Patch

diff --git a/Documentation/core-api/printk-formats.rst b/Documentation/core-api/printk-formats.rst
index 5e89497ba3..8fc0b62af1 100644
--- a/Documentation/core-api/printk-formats.rst
+++ b/Documentation/core-api/printk-formats.rst
@@ -625,6 +625,28 @@  Examples::
 	%p4cc	Y10  little-endian (0x20303159)
 	%p4cc	NV12 big-endian (0xb231564e)
 
+Calling a pretty printer function
+---------------------------------
+
+::
+
+        %pf(%p)     pretty printer function taking one argument
+        %pf(%p,%p)  pretty printer function taking two arguments
+
+For calling generic pretty printers. A pretty printer is a function that takes
+as its first argument a pointer to a printbuf, and then zero or more additional
+pointer arguments. For example:
+
+        void foo_to_text(struct printbuf *out, struct foo *foo)
+        {
+                pr_buf(out, "bar=%u baz=%u", foo->bar, foo->baz);
+        }
+
+        printf("%pf(%p)", foo_to_text, foo);
+
+Note that a pretty-printer may not sleep, if called from printk(). If called
+from pr_buf() or sprintf() there are no such restrictions.
+
 Thanks
 ======
 
diff --git a/lib/test_printf.c b/lib/test_printf.c
index 07309c45f3..e3de52da91 100644
--- a/lib/test_printf.c
+++ b/lib/test_printf.c
@@ -9,6 +9,7 @@ 
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/printk.h>
+#include <linux/printbuf.h>
 #include <linux/random.h>
 #include <linux/rtc.h>
 #include <linux/slab.h>
@@ -783,6 +784,31 @@  test_pointer(void)
 	fourcc_pointer();
 }
 
+static void printf_test_fn_0(struct printbuf *out)
+{
+	prt_str(out, "0");
+}
+
+static void printf_test_fn_1(struct printbuf *out, void *p)
+{
+	int *i = p;
+
+	prt_printf(out, "%i", *i);
+}
+
+static void __init
+test_fn(void)
+{
+	int i = 1;
+
+	test("0", "%pf()",   printf_test_fn_0);
+	test("1", "%pf(%p)", printf_test_fn_1, &i);
+	/*
+	 * Not tested, so we don't fail the build with -Werror:
+	 */
+	//test("1", "%(%p)", printf_test_fn, &i);
+}
+
 static void __init selftest(void)
 {
 	alloced_buffer = kmalloc(BUF_SIZE + 2*PAD_SIZE, GFP_KERNEL);
@@ -794,6 +820,7 @@  static void __init selftest(void)
 	test_number();
 	test_string();
 	test_pointer();
+	test_fn();
 
 	kfree(alloced_buffer);
 }
diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index 7b24714674..5afa74dda5 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -436,7 +436,8 @@  enum format_type {
 	FORMAT_TYPE_UINT,
 	FORMAT_TYPE_INT,
 	FORMAT_TYPE_SIZE_T,
-	FORMAT_TYPE_PTRDIFF
+	FORMAT_TYPE_PTRDIFF,
+	FORMAT_TYPE_FN,
 };
 
 struct printf_spec {
@@ -2520,7 +2521,16 @@  int format_decode(const char *fmt, struct printf_spec *spec)
 		return ++fmt - start;
 
 	case 'p':
-		spec->type = FORMAT_TYPE_PTR;
+		fmt++;
+		if (fmt[0] == 'f' &&
+		    fmt[1] == '(') {
+			fmt += 2;
+			spec->type = FORMAT_TYPE_FN;
+		} else
+			spec->type = FORMAT_TYPE_PTR;
+		return fmt - start;
+	case '(':
+		spec->type = FORMAT_TYPE_FN;
 		return ++fmt - start;
 
 	case '%':
@@ -2602,6 +2612,49 @@  set_precision(struct printf_spec *spec, int prec)
 	}
 }
 
+static void call_prt_fn(struct printbuf *out, void *fn, void **fn_args, unsigned nr_args)
+{
+	typedef void (*printf_fn_0)(struct printbuf *);
+	typedef void (*printf_fn_1)(struct printbuf *, void *);
+	typedef void (*printf_fn_2)(struct printbuf *, void *, void *);
+	typedef void (*printf_fn_3)(struct printbuf *, void *, void *, void *);
+	typedef void (*printf_fn_4)(struct printbuf *, void *, void *, void *, void *);
+	typedef void (*printf_fn_5)(struct printbuf *, void *, void *, void *, void *, void *);
+	typedef void (*printf_fn_6)(struct printbuf *, void *, void *, void *, void *, void *, void *);
+	typedef void (*printf_fn_7)(struct printbuf *, void *, void *, void *, void *, void *, void *, void *);
+	typedef void (*printf_fn_8)(struct printbuf *, void *, void *, void *, void *, void *, void *, void *, void *);
+
+	switch (nr_args) {
+	case 0:
+		((printf_fn_0)fn)(out);
+		break;
+	case 1:
+		((printf_fn_1)fn)(out, fn_args[0]);
+		break;
+	case 2:
+		((printf_fn_2)fn)(out, fn_args[0], fn_args[1]);
+		break;
+	case 3:
+		((printf_fn_3)fn)(out, fn_args[0], fn_args[1], fn_args[2]);
+		break;
+	case 4:
+		((printf_fn_4)fn)(out, fn_args[0], fn_args[1], fn_args[2], fn_args[3]);
+		break;
+	case 5:
+		((printf_fn_5)fn)(out, fn_args[0], fn_args[1], fn_args[2], fn_args[3], fn_args[4]);
+		break;
+	case 6:
+		((printf_fn_6)fn)(out, fn_args[0], fn_args[1], fn_args[2], fn_args[3], fn_args[4], fn_args[5]);
+		break;
+	case 7:
+		((printf_fn_7)fn)(out, fn_args[0], fn_args[1], fn_args[2], fn_args[3], fn_args[4], fn_args[5], fn_args[6]);
+		break;
+	case 8:
+		((printf_fn_8)fn)(out, fn_args[0], fn_args[1], fn_args[2], fn_args[3], fn_args[4], fn_args[5], fn_args[6], fn_args[7]);
+		break;
+	}
+}
+
 /**
  * prt_vprintf - Format a string, outputting to a printbuf
  * @out: The printbuf to output to
@@ -2665,6 +2718,32 @@  void prt_vprintf(struct printbuf *out, const char *fmt, va_list args)
 				fmt++;
 			break;
 
+		case FORMAT_TYPE_FN: {
+			unsigned nr_args = 0;
+			void *fn_args[8];
+			void *fn = va_arg(args, void *);
+
+			while (*fmt != ')') {
+				if (nr_args) {
+					if (fmt[0] != ',')
+						goto out;
+					fmt++;
+				}
+
+				if (fmt[0] != '%' || fmt[1] != 'p')
+					goto out;
+				fmt += 2;
+
+				if (WARN_ON_ONCE(nr_args == ARRAY_SIZE(fn_args)))
+					goto out;
+				fn_args[nr_args++] = va_arg(args, void *);
+			}
+
+			call_prt_fn(out, fn, fn_args, nr_args);
+			fmt++; /* past trailing ) */
+			break;
+		}
+
 		case FORMAT_TYPE_PERCENT_CHAR:
 			__prt_char(out, '%');
 			break;