diff mbox series

[bpf-next,v1,1/3] libbpf: btf_dump can produce explicitly sized ints

Message ID c37e39653b133b230dee3b393a07b4def697b61a.1644453291.git.delyank@fb.com (mailing list archive)
State Changes Requested
Delegated to: BPF
Headers show
Series Avoid typedef size mismatches in skeletons | expand

Checks

Context Check Description
bpf/vmtest-bpf-next-PR fail PR summary
netdev/tree_selection success Clearly marked for bpf-next
netdev/fixes_present success Fixes tag not required for -next series
netdev/subject_prefix success Link
netdev/cover_letter success Series has a cover letter
netdev/patch_count success Link
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 0 this patch: 0
netdev/cc_maintainers warning 6 maintainers not CCed: kpsingh@kernel.org john.fastabend@gmail.com kafai@fb.com songliubraving@fb.com yhs@fb.com netdev@vger.kernel.org
netdev/build_clang success Errors and warnings before: 0 this patch: 0
netdev/module_param success Was 0 now: 0
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 0 this patch: 0
netdev/checkpatch warning CHECK: Please use a blank line after function/struct/union/enum declarations WARNING: line length of 81 exceeds 80 columns WARNING: line length of 84 exceeds 80 columns WARNING: line length of 86 exceeds 80 columns
netdev/kdoc success Errors and warnings before: 12 this patch: 12
netdev/source_inline success Was 0 now: 0
bpf/vmtest-bpf-next fail VM_Test

Commit Message

Delyan Kratunov Feb. 10, 2022, 12:36 a.m. UTC
When emitting type declations, btf_dump can now optionally rename
int types (including typedefs) to standard types with explicit sizes.
Types like pid_t get renamed but types like __u32, char, and _Bool
are left alone to preserve cast semantics in as many pre-existing
programs as possible.

This option is useful when generating data structures on a system where
types may differ due to arch differences or just userspace and bpf program
disagreeing on the definition of a typedef.

Signed-off-by: Delyan Kratunov <delyank@fb.com>
---
 tools/lib/bpf/btf.h      |  4 +-
 tools/lib/bpf/btf_dump.c | 80 +++++++++++++++++++++++++++++++++++++++-
 2 files changed, 82 insertions(+), 2 deletions(-)

--
2.34.1

Comments

Andrii Nakryiko Feb. 10, 2022, 10:35 p.m. UTC | #1
On Wed, Feb 9, 2022 at 4:37 PM Delyan Kratunov <delyank@fb.com> wrote:
>
> When emitting type declations, btf_dump can now optionally rename
> int types (including typedefs) to standard types with explicit sizes.
> Types like pid_t get renamed but types like __u32, char, and _Bool
> are left alone to preserve cast semantics in as many pre-existing
> programs as possible.
>
> This option is useful when generating data structures on a system where
> types may differ due to arch differences or just userspace and bpf program
> disagreeing on the definition of a typedef.
>
> Signed-off-by: Delyan Kratunov <delyank@fb.com>
> ---
>  tools/lib/bpf/btf.h      |  4 +-
>  tools/lib/bpf/btf_dump.c | 80 +++++++++++++++++++++++++++++++++++++++-
>  2 files changed, 82 insertions(+), 2 deletions(-)
>
> diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h
> index 951ac7475794..dbd41bf93b13 100644
> --- a/tools/lib/bpf/btf.h
> +++ b/tools/lib/bpf/btf.h
> @@ -347,9 +347,11 @@ struct btf_dump_emit_type_decl_opts {
>         int indent_level;
>         /* strip all the const/volatile/restrict mods */
>         bool strip_mods;
> +       /* normalize int fields to (u)?int(16|32|64)_t types */
> +       bool sizedints;

let's stick to consistent snake_case naming convention

let's also call it what the comment calls it :) "normalize_ints" ?

>         size_t :0;
>  };
> -#define btf_dump_emit_type_decl_opts__last_field strip_mods
> +#define btf_dump_emit_type_decl_opts__last_field sizedints
>
>  LIBBPF_API int
>  btf_dump__emit_type_decl(struct btf_dump *d, __u32 id,
> diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c
> index 07ebe70d3a30..56bafacf1cbd 100644
> --- a/tools/lib/bpf/btf_dump.c
> +++ b/tools/lib/bpf/btf_dump.c
> @@ -81,6 +81,7 @@ struct btf_dump {
>         void *cb_ctx;
>         int ptr_sz;
>         bool strip_mods;
> +       bool sizedints;
>         bool skip_anon_defs;
>         int last_id;
>
> @@ -1130,7 +1131,9 @@ int btf_dump__emit_type_decl(struct btf_dump *d, __u32 id,
>         fname = OPTS_GET(opts, field_name, "");
>         lvl = OPTS_GET(opts, indent_level, 0);
>         d->strip_mods = OPTS_GET(opts, strip_mods, false);
> +       d->sizedints = OPTS_GET(opts, sizedints, false);
>         btf_dump_emit_type_decl(d, id, fname, lvl);
> +       d->sizedints = false;
>         d->strip_mods = false;
>         return 0;
>  }
> @@ -1263,6 +1266,34 @@ static void btf_dump_emit_name(const struct btf_dump *d,
>         btf_dump_printf(d, "%s%s", separate ? " " : "", name);
>  }
>
> +/* Encode custom heurstics to find char types since BTF_INT_CHAR is never set. */

typo: heuristic

> +static bool btf_is_char(const struct btf_dump *d, const struct btf_type *t)
> +{
> +       return btf_is_int(t) &&
> +              t->size == 1 &&
> +              strcmp(btf_name_of(d, t->name_off), "char") == 0;
> +}
> +
> +static bool btf_is_bool(const struct btf_type *t)
> +{
> +       return btf_is_int(t) && (btf_int_encoding(t) & BTF_INT_BOOL);
> +}
> +
> +/* returns true if type is of the '__[su](8|16|32|64)' type */
> +static bool btf_is_kernel_sizedint(const struct btf_dump *d, const struct btf_type *t)
> +{
> +       const char *name = btf_name_of(d, t->name_off);
> +
> +       return strcmp(name, "__s8") == 0 ||
> +              strcmp(name, "__u8") == 0 ||
> +              strcmp(name, "__s16") == 0 ||
> +              strcmp(name, "__u16") == 0 ||
> +              strcmp(name, "__s32") == 0 ||
> +              strcmp(name, "__u32") == 0 ||
> +              strcmp(name, "__s64") == 0 ||
> +              strcmp(name, "__u64") == 0;
> +}

So I thought about this a bit, I see how there could be a difference
of %lld vs %ld and such, but I think normalize should mean normalize
all ints, without any exceptions for kernel aliases. Let's keep the
rule simple: everything but char and bool gets converted to its
corresponding standard integer alias.

Worst case few users might need to adjust their printf specifier after
seeing a compiler warning.

> +
>  static void btf_dump_emit_type_chain(struct btf_dump *d,
>                                      struct id_stack *decls,
>                                      const char *fname, int lvl)
> @@ -1277,10 +1308,12 @@ static void btf_dump_emit_type_chain(struct btf_dump *d,
>          * don't want to prepend space for that last pointer.
>          */
>         bool last_was_ptr = true;
> -       const struct btf_type *t;
> +       const struct btf_type *t, *rest;
>         const char *name;
>         __u16 kind;
>         __u32 id;
> +       __u8 intenc;
> +       int restypeid;
>

same about variable naming conventions

>         while (decls->cnt) {
>                 id = decls->ids[--decls->cnt];
> @@ -1295,8 +1328,51 @@ static void btf_dump_emit_type_chain(struct btf_dump *d,
>                 t = btf__type_by_id(d->btf, id);
>                 kind = btf_kind(t);

just move it after the if () to not re-assign kind again inside the if

>
> +               /* If we're asked to produce stdint declarations, we need
> +                * to only do that in the following cases:
> +                *  - int types other than char and _Bool

let's make it the only case

> +                *  - typedefs to int types (including char and _Bool) except
> +                *    kernel types like __s16/__u32/etc.
> +                *
> +                * If a typedef resolves to char or _Bool, we do want to use
> +                * the resolved type instead of the stdint types (i.e. char
> +                * instead of int8_t) because the stdint types are explicitly
> +                * signed/unsigned, which affects pointer casts.
> +                *
> +                * If the typedef is of the __s32 variety, we leave it as-is
> +                * due to incompatibilities in e.g. s64 vs int64_t definitions
> +                * (one is `long long` on x86_64 and the other is not).
> +                *
> +                * Unfortunately, the BTF type info never includes BTF_INT_CHAR,
> +                * so we use a size comparison to avoid chars and
> +                * BTF_INT_BOOL to avoid bools.
> +                */
> +               if (d->sizedints && kind == BTF_KIND_TYPEDEF &&
> +                   !btf_is_kernel_sizedint(d, t)) {
> +                       restypeid = btf__resolve_type(d->btf, id);

we use skip_mods_and_typedefs() everywhere for this, it returns
btf_type (and optionally type_id as well), much more convenient (and
it can't fail, so no need for extra error handling)

also please use btf_is_typedef(t)

> +                       if (restypeid >= 0) {
> +                               rest = btf__type_by_id(d->btf, restypeid);
> +                               if (rest && btf_is_int(rest)) {
> +                                       t = rest;
> +                                       kind = btf_kind(rest);
> +                               }
> +                       }
> +               }
> +
>                 switch (kind) {
>                 case BTF_KIND_INT:
> +                       btf_dump_emit_mods(d, decls);
> +                       if (d->sizedints && !btf_is_bool(t) && !btf_is_char(d, t)) {
> +                               intenc = btf_int_encoding(t);
> +                               btf_dump_printf(d,
> +                                               intenc & BTF_INT_SIGNED ?
> +                                               "int%d_t" : "uint%d_t",
> +                                               t->size * 8);
> +                       } else {
> +                               name = btf_name_of(d, t->name_off);
> +                               btf_dump_printf(d, "%s", name);
> +                       }
> +                       break;
>                 case BTF_KIND_FLOAT:
>                         btf_dump_emit_mods(d, decls);
>                         name = btf_name_of(d, t->name_off);
> @@ -1469,7 +1545,9 @@ static void btf_dump_emit_type_cast(struct btf_dump *d, __u32 id,
>
>         d->skip_anon_defs = true;
>         d->strip_mods = true;
> +       d->sizedints = true;

this is a different use case, let's not normalize anything unconditionally

>         btf_dump_emit_type_decl(d, id, "", 0);
> +       d->sizedints = false;
>         d->strip_mods = false;
>         d->skip_anon_defs = false;
>
> --
> 2.34.1
Delyan Kratunov Feb. 10, 2022, 11:40 p.m. UTC | #2
On Thu, 2022-02-10 at 14:35 -0800, Andrii Nakryiko wrote:
> let's stick to consistent snake_case naming convention
> 
> let's also call it what the comment calls it :) "normalize_ints" ?
> 
Sure, naming is hard.

> So I thought about this a bit, I see how there could be a difference
> of %lld vs %ld and such, but I think normalize should mean normalize
> all ints, without any exceptions for kernel aliases. Let's keep the
> rule simple: everything but char and bool gets converted to its
> corresponding standard integer alias.
> 
> Worst case few users might need to adjust their printf specifier after
> seeing a compiler warning.

When I first tried to do this, there were a couple of ways in which it was
problematic:

1. format specifiers are all over selftests
2. passing uint64_t* to things expecting u64* in the userspace code

Ultimately, this felt like a good compromise between keeping existing code
working and also avoiding the major pitfalls of non-sized types.
Another question here is whether a minor release of libbpf should be allowed to
break the build of applications using -Werror.

We could potentially gate this behavior behind a runtime arg but that's not
exactly a "pit of success" type of design.

> this is a different use case, let's not normalize anything unconditionally

My bad, meant to remove this before submitting.
diff mbox series

Patch

diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h
index 951ac7475794..dbd41bf93b13 100644
--- a/tools/lib/bpf/btf.h
+++ b/tools/lib/bpf/btf.h
@@ -347,9 +347,11 @@  struct btf_dump_emit_type_decl_opts {
 	int indent_level;
 	/* strip all the const/volatile/restrict mods */
 	bool strip_mods;
+	/* normalize int fields to (u)?int(16|32|64)_t types */
+	bool sizedints;
 	size_t :0;
 };
-#define btf_dump_emit_type_decl_opts__last_field strip_mods
+#define btf_dump_emit_type_decl_opts__last_field sizedints

 LIBBPF_API int
 btf_dump__emit_type_decl(struct btf_dump *d, __u32 id,
diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c
index 07ebe70d3a30..56bafacf1cbd 100644
--- a/tools/lib/bpf/btf_dump.c
+++ b/tools/lib/bpf/btf_dump.c
@@ -81,6 +81,7 @@  struct btf_dump {
 	void *cb_ctx;
 	int ptr_sz;
 	bool strip_mods;
+	bool sizedints;
 	bool skip_anon_defs;
 	int last_id;

@@ -1130,7 +1131,9 @@  int btf_dump__emit_type_decl(struct btf_dump *d, __u32 id,
 	fname = OPTS_GET(opts, field_name, "");
 	lvl = OPTS_GET(opts, indent_level, 0);
 	d->strip_mods = OPTS_GET(opts, strip_mods, false);
+	d->sizedints = OPTS_GET(opts, sizedints, false);
 	btf_dump_emit_type_decl(d, id, fname, lvl);
+	d->sizedints = false;
 	d->strip_mods = false;
 	return 0;
 }
@@ -1263,6 +1266,34 @@  static void btf_dump_emit_name(const struct btf_dump *d,
 	btf_dump_printf(d, "%s%s", separate ? " " : "", name);
 }

+/* Encode custom heurstics to find char types since BTF_INT_CHAR is never set. */
+static bool btf_is_char(const struct btf_dump *d, const struct btf_type *t)
+{
+	return btf_is_int(t) &&
+	       t->size == 1 &&
+	       strcmp(btf_name_of(d, t->name_off), "char") == 0;
+}
+
+static bool btf_is_bool(const struct btf_type *t)
+{
+	return btf_is_int(t) && (btf_int_encoding(t) & BTF_INT_BOOL);
+}
+
+/* returns true if type is of the '__[su](8|16|32|64)' type */
+static bool btf_is_kernel_sizedint(const struct btf_dump *d, const struct btf_type *t)
+{
+	const char *name = btf_name_of(d, t->name_off);
+
+	return strcmp(name, "__s8") == 0 ||
+	       strcmp(name, "__u8") == 0 ||
+	       strcmp(name, "__s16") == 0 ||
+	       strcmp(name, "__u16") == 0 ||
+	       strcmp(name, "__s32") == 0 ||
+	       strcmp(name, "__u32") == 0 ||
+	       strcmp(name, "__s64") == 0 ||
+	       strcmp(name, "__u64") == 0;
+}
+
 static void btf_dump_emit_type_chain(struct btf_dump *d,
 				     struct id_stack *decls,
 				     const char *fname, int lvl)
@@ -1277,10 +1308,12 @@  static void btf_dump_emit_type_chain(struct btf_dump *d,
 	 * don't want to prepend space for that last pointer.
 	 */
 	bool last_was_ptr = true;
-	const struct btf_type *t;
+	const struct btf_type *t, *rest;
 	const char *name;
 	__u16 kind;
 	__u32 id;
+	__u8 intenc;
+	int restypeid;

 	while (decls->cnt) {
 		id = decls->ids[--decls->cnt];
@@ -1295,8 +1328,51 @@  static void btf_dump_emit_type_chain(struct btf_dump *d,
 		t = btf__type_by_id(d->btf, id);
 		kind = btf_kind(t);

+		/* If we're asked to produce stdint declarations, we need
+		 * to only do that in the following cases:
+		 *  - int types other than char and _Bool
+		 *  - typedefs to int types (including char and _Bool) except
+		 *    kernel types like __s16/__u32/etc.
+		 *
+		 * If a typedef resolves to char or _Bool, we do want to use
+		 * the resolved type instead of the stdint types (i.e. char
+		 * instead of int8_t) because the stdint types are explicitly
+		 * signed/unsigned, which affects pointer casts.
+		 *
+		 * If the typedef is of the __s32 variety, we leave it as-is
+		 * due to incompatibilities in e.g. s64 vs int64_t definitions
+		 * (one is `long long` on x86_64 and the other is not).
+		 *
+		 * Unfortunately, the BTF type info never includes BTF_INT_CHAR,
+		 * so we use a size comparison to avoid chars and
+		 * BTF_INT_BOOL to avoid bools.
+		 */
+		if (d->sizedints && kind == BTF_KIND_TYPEDEF &&
+		    !btf_is_kernel_sizedint(d, t)) {
+			restypeid = btf__resolve_type(d->btf, id);
+			if (restypeid >= 0) {
+				rest = btf__type_by_id(d->btf, restypeid);
+				if (rest && btf_is_int(rest)) {
+					t = rest;
+					kind = btf_kind(rest);
+				}
+			}
+		}
+
 		switch (kind) {
 		case BTF_KIND_INT:
+			btf_dump_emit_mods(d, decls);
+			if (d->sizedints && !btf_is_bool(t) && !btf_is_char(d, t)) {
+				intenc = btf_int_encoding(t);
+				btf_dump_printf(d,
+						intenc & BTF_INT_SIGNED ?
+						"int%d_t" : "uint%d_t",
+						t->size * 8);
+			} else {
+				name = btf_name_of(d, t->name_off);
+				btf_dump_printf(d, "%s", name);
+			}
+			break;
 		case BTF_KIND_FLOAT:
 			btf_dump_emit_mods(d, decls);
 			name = btf_name_of(d, t->name_off);
@@ -1469,7 +1545,9 @@  static void btf_dump_emit_type_cast(struct btf_dump *d, __u32 id,

 	d->skip_anon_defs = true;
 	d->strip_mods = true;
+	d->sizedints = true;
 	btf_dump_emit_type_decl(d, id, "", 0);
+	d->sizedints = false;
 	d->strip_mods = false;
 	d->skip_anon_defs = false;