diff mbox series

[dwarves,2/2] btf_encoder: Normalize array index type for parallel dwarf loading case

Message ID 20220511220249.525908-1-yhs@fb.com (mailing list archive)
State Superseded
Headers show
Series [dwarves,1/2] libbpf: Sync with latest libbpf repo | expand

Checks

Context Check Description
netdev/tree_selection success Not a local patch

Commit Message

Yonghong Song May 11, 2022, 10:02 p.m. UTC
With latest llvm15 built kernel (make -j LLVM=1), I hit the following
error when build selftests (make -C tools/testing/selftests/bpf -j LLVM=1):
  In file included from skeleton/pid_iter.bpf.c:3:
  .../selftests/bpf/tools/build/bpftool/vmlinux.h:84050:9: error: unknown type name
       '__builtin_va_list___2'; did you mean '__builtin_va_list'?
  typedef __builtin_va_list___2 va_list___2;
          ^~~~~~~~~~~~~~~~~~~~~
          __builtin_va_list
  note: '__builtin_va_list' declared here
  In file included from skeleton/profiler.bpf.c:3:
  .../selftests/bpf/tools/build/bpftool/vmlinux.h:84050:9: error: unknown type name
       '__builtin_va_list__ _2'; did you mean '__builtin_va_list'?
  typedef __builtin_va_list___2 va_list___2;
          ^~~~~~~~~~~~~~~~~~~~~
          __builtin_va_list
  note: '__builtin_va_list' declared here

The error can be easily explained with after-dedup vmlinux btf:
  [21] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED
  [2300] STRUCT '__va_list_tag' size=24 vlen=4
        'gp_offset' type_id=2 bits_offset=0
        'fp_offset' type_id=2 bits_offset=32
        'overflow_arg_area' type_id=32 bits_offset=64
        'reg_save_area' type_id=32 bits_offset=128
  [2308] TYPEDEF 'va_list' type_id=2309
  [2309] TYPEDEF '__builtin_va_list' type_id=2310
  [2310] ARRAY '(anon)' type_id=2300 index_type_id=21 nr_elems=1

  [5289] PTR '(anon)' type_id=2308
  [158520] STRUCT 'warn_args' size=32 vlen=2
        'fmt' type_id=14 bits_offset=0
        'args' type_id=2308 bits_offset=64
  [27299] INT '__ARRAY_SIZE_TYPE__' size=4 bits_offset=0 nr_bits=32 encoding=(none)
  [34590] TYPEDEF '__builtin_va_list' type_id=34591
  [34591] ARRAY '(anon)' type_id=2300 index_type_id=27299 nr_elems=1

The typedef __builtin_va_list is a builtin type for the compiler.
In the above case, two typedef __builtin_va_list are generated.
The reason is due to different array index_type_id. This happened
when pahole is running with more than one jobs when parsing dwarf
and generating btfs.

Function btf_encoder__encode_cu() is used to do btf encoding for
each cu. The function will try to find an "int" type for the cu
if it is available, otherwise, it will create a special type
with name __ARRAY_SIZE_TYPE__. For example,
  file1: yes 'int' type
  file2: no 'int' type

In serial mode, file1 is processed first, followed by file2.
both will have 'int' type as the array index type since file2
will inherit the index type from file1.

In parallel mode though, arrays in file1 will have index type 'int',
and arrays in file2 wil have index type '__ARRAY_SIZE_TYPE__'.
This will prevent some legitimate dedup and may have generated
vmlinux.h having compilation error.

This patch fixed the issue by normalizing all array_index types
to be the first array_index type in the whole btf.

Signed-off-by: Yonghong Song <yhs@fb.com>
---
 btf_encoder.c | 24 +++++++++++++++++++++---
 btf_encoder.h |  2 +-
 pahole.c      |  2 +-
 3 files changed, 23 insertions(+), 5 deletions(-)

Comments

Andrii Nakryiko May 12, 2022, 12:32 a.m. UTC | #1
On Wed, May 11, 2022 at 3:02 PM Yonghong Song <yhs@fb.com> wrote:
>
> With latest llvm15 built kernel (make -j LLVM=1), I hit the following
> error when build selftests (make -C tools/testing/selftests/bpf -j LLVM=1):
>   In file included from skeleton/pid_iter.bpf.c:3:
>   .../selftests/bpf/tools/build/bpftool/vmlinux.h:84050:9: error: unknown type name
>        '__builtin_va_list___2'; did you mean '__builtin_va_list'?
>   typedef __builtin_va_list___2 va_list___2;
>           ^~~~~~~~~~~~~~~~~~~~~
>           __builtin_va_list
>   note: '__builtin_va_list' declared here
>   In file included from skeleton/profiler.bpf.c:3:
>   .../selftests/bpf/tools/build/bpftool/vmlinux.h:84050:9: error: unknown type name
>        '__builtin_va_list__ _2'; did you mean '__builtin_va_list'?
>   typedef __builtin_va_list___2 va_list___2;
>           ^~~~~~~~~~~~~~~~~~~~~
>           __builtin_va_list
>   note: '__builtin_va_list' declared here
>
> The error can be easily explained with after-dedup vmlinux btf:
>   [21] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED
>   [2300] STRUCT '__va_list_tag' size=24 vlen=4
>         'gp_offset' type_id=2 bits_offset=0
>         'fp_offset' type_id=2 bits_offset=32
>         'overflow_arg_area' type_id=32 bits_offset=64
>         'reg_save_area' type_id=32 bits_offset=128
>   [2308] TYPEDEF 'va_list' type_id=2309
>   [2309] TYPEDEF '__builtin_va_list' type_id=2310
>   [2310] ARRAY '(anon)' type_id=2300 index_type_id=21 nr_elems=1
>
>   [5289] PTR '(anon)' type_id=2308
>   [158520] STRUCT 'warn_args' size=32 vlen=2
>         'fmt' type_id=14 bits_offset=0
>         'args' type_id=2308 bits_offset=64
>   [27299] INT '__ARRAY_SIZE_TYPE__' size=4 bits_offset=0 nr_bits=32 encoding=(none)
>   [34590] TYPEDEF '__builtin_va_list' type_id=34591
>   [34591] ARRAY '(anon)' type_id=2300 index_type_id=27299 nr_elems=1
>
> The typedef __builtin_va_list is a builtin type for the compiler.
> In the above case, two typedef __builtin_va_list are generated.
> The reason is due to different array index_type_id. This happened
> when pahole is running with more than one jobs when parsing dwarf
> and generating btfs.
>
> Function btf_encoder__encode_cu() is used to do btf encoding for
> each cu. The function will try to find an "int" type for the cu
> if it is available, otherwise, it will create a special type
> with name __ARRAY_SIZE_TYPE__. For example,
>   file1: yes 'int' type
>   file2: no 'int' type
>
> In serial mode, file1 is processed first, followed by file2.
> both will have 'int' type as the array index type since file2
> will inherit the index type from file1.
>
> In parallel mode though, arrays in file1 will have index type 'int',
> and arrays in file2 wil have index type '__ARRAY_SIZE_TYPE__'.
> This will prevent some legitimate dedup and may have generated
> vmlinux.h having compilation error.
>

I think it is two separate problems.

1. Maybe instead of this generating __ARRAY_SIZE_TYPE__ we should
generate proper 'int' type?

2. __builtin_va_list___2 shouldn't have happened, it's libbpf bug.
Libbpf handles __builtin_va_list specially (see
btf_dump_is_blacklisted()), so we need to fix libbpf to not get
confused if there are two __builtin_va_list copies in BTF.

> This patch fixed the issue by normalizing all array_index types
> to be the first array_index type in the whole btf.
>
> Signed-off-by: Yonghong Song <yhs@fb.com>
> ---
>  btf_encoder.c | 24 +++++++++++++++++++++---
>  btf_encoder.h |  2 +-
>  pahole.c      |  2 +-
>  3 files changed, 23 insertions(+), 5 deletions(-)
>

[...]
Yonghong Song May 12, 2022, 4:12 a.m. UTC | #2
On 5/11/22 5:32 PM, Andrii Nakryiko wrote:
> On Wed, May 11, 2022 at 3:02 PM Yonghong Song <yhs@fb.com> wrote:
>>
>> With latest llvm15 built kernel (make -j LLVM=1), I hit the following
>> error when build selftests (make -C tools/testing/selftests/bpf -j LLVM=1):
>>    In file included from skeleton/pid_iter.bpf.c:3:
>>    .../selftests/bpf/tools/build/bpftool/vmlinux.h:84050:9: error: unknown type name
>>         '__builtin_va_list___2'; did you mean '__builtin_va_list'?
>>    typedef __builtin_va_list___2 va_list___2;
>>            ^~~~~~~~~~~~~~~~~~~~~
>>            __builtin_va_list
>>    note: '__builtin_va_list' declared here
>>    In file included from skeleton/profiler.bpf.c:3:
>>    .../selftests/bpf/tools/build/bpftool/vmlinux.h:84050:9: error: unknown type name
>>         '__builtin_va_list__ _2'; did you mean '__builtin_va_list'?
>>    typedef __builtin_va_list___2 va_list___2;
>>            ^~~~~~~~~~~~~~~~~~~~~
>>            __builtin_va_list
>>    note: '__builtin_va_list' declared here
>>
>> The error can be easily explained with after-dedup vmlinux btf:
>>    [21] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED
>>    [2300] STRUCT '__va_list_tag' size=24 vlen=4
>>          'gp_offset' type_id=2 bits_offset=0
>>          'fp_offset' type_id=2 bits_offset=32
>>          'overflow_arg_area' type_id=32 bits_offset=64
>>          'reg_save_area' type_id=32 bits_offset=128
>>    [2308] TYPEDEF 'va_list' type_id=2309
>>    [2309] TYPEDEF '__builtin_va_list' type_id=2310
>>    [2310] ARRAY '(anon)' type_id=2300 index_type_id=21 nr_elems=1
>>
>>    [5289] PTR '(anon)' type_id=2308
>>    [158520] STRUCT 'warn_args' size=32 vlen=2
>>          'fmt' type_id=14 bits_offset=0
>>          'args' type_id=2308 bits_offset=64
>>    [27299] INT '__ARRAY_SIZE_TYPE__' size=4 bits_offset=0 nr_bits=32 encoding=(none)
>>    [34590] TYPEDEF '__builtin_va_list' type_id=34591
>>    [34591] ARRAY '(anon)' type_id=2300 index_type_id=27299 nr_elems=1
>>
>> The typedef __builtin_va_list is a builtin type for the compiler.
>> In the above case, two typedef __builtin_va_list are generated.
>> The reason is due to different array index_type_id. This happened
>> when pahole is running with more than one jobs when parsing dwarf
>> and generating btfs.
>>
>> Function btf_encoder__encode_cu() is used to do btf encoding for
>> each cu. The function will try to find an "int" type for the cu
>> if it is available, otherwise, it will create a special type
>> with name __ARRAY_SIZE_TYPE__. For example,
>>    file1: yes 'int' type
>>    file2: no 'int' type
>>
>> In serial mode, file1 is processed first, followed by file2.
>> both will have 'int' type as the array index type since file2
>> will inherit the index type from file1.
>>
>> In parallel mode though, arrays in file1 will have index type 'int',
>> and arrays in file2 wil have index type '__ARRAY_SIZE_TYPE__'.
>> This will prevent some legitimate dedup and may have generated
>> vmlinux.h having compilation error.
>>
> 
> I think it is two separate problems.
> 
> 1. Maybe instead of this generating __ARRAY_SIZE_TYPE__ we should
> generate proper 'int' type?

This should work. Will post v2 with this.

> 
> 2. __builtin_va_list___2 shouldn't have happened, it's libbpf bug.
> Libbpf handles __builtin_va_list specially (see
> btf_dump_is_blacklisted()), so we need to fix libbpf to not get
> confused if there are two __builtin_va_list copies in BTF.

I checked code. the libbpf prevents generating
    typedef <...> __builtin_va_list
since __builtin_va_list is a builtin type.

Here, due to __ARRAY_SIZE_TYPE__ problem, the following are generated
in vmlinux.h.

typedef __builtin_va_list va_list;
typedef __builtin_va_list___2 va_list___2;

since __builtin_va_list appears twice in the BTF.
But due to the libbpf implementation to skip
    typedef <...> __builtin_va_list

We don't have __builtin_va_list___2 defined and this
caused compilation error.

Although we could workaround the issue in libbpf
such that if the typedef is in the format of
   typedef __builtin_va_list<...> <other_type>
we should just emit
   typedef __builtin_va_list <other_type>

But fixing the issue in pahole is much better since
we won't have va_list___2 any more.

> 
>> This patch fixed the issue by normalizing all array_index types
>> to be the first array_index type in the whole btf.
>>
>> Signed-off-by: Yonghong Song <yhs@fb.com>
>> ---
>>   btf_encoder.c | 24 +++++++++++++++++++++---
>>   btf_encoder.h |  2 +-
>>   pahole.c      |  2 +-
>>   3 files changed, 23 insertions(+), 5 deletions(-)
>>
> 
> [...]
Andrii Nakryiko May 12, 2022, 10:49 p.m. UTC | #3
On Wed, May 11, 2022 at 9:13 PM Yonghong Song <yhs@fb.com> wrote:
>
>
>
> On 5/11/22 5:32 PM, Andrii Nakryiko wrote:
> > On Wed, May 11, 2022 at 3:02 PM Yonghong Song <yhs@fb.com> wrote:
> >>
> >> With latest llvm15 built kernel (make -j LLVM=1), I hit the following
> >> error when build selftests (make -C tools/testing/selftests/bpf -j LLVM=1):
> >>    In file included from skeleton/pid_iter.bpf.c:3:
> >>    .../selftests/bpf/tools/build/bpftool/vmlinux.h:84050:9: error: unknown type name
> >>         '__builtin_va_list___2'; did you mean '__builtin_va_list'?
> >>    typedef __builtin_va_list___2 va_list___2;
> >>            ^~~~~~~~~~~~~~~~~~~~~
> >>            __builtin_va_list
> >>    note: '__builtin_va_list' declared here
> >>    In file included from skeleton/profiler.bpf.c:3:
> >>    .../selftests/bpf/tools/build/bpftool/vmlinux.h:84050:9: error: unknown type name
> >>         '__builtin_va_list__ _2'; did you mean '__builtin_va_list'?
> >>    typedef __builtin_va_list___2 va_list___2;
> >>            ^~~~~~~~~~~~~~~~~~~~~
> >>            __builtin_va_list
> >>    note: '__builtin_va_list' declared here
> >>
> >> The error can be easily explained with after-dedup vmlinux btf:
> >>    [21] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED
> >>    [2300] STRUCT '__va_list_tag' size=24 vlen=4
> >>          'gp_offset' type_id=2 bits_offset=0
> >>          'fp_offset' type_id=2 bits_offset=32
> >>          'overflow_arg_area' type_id=32 bits_offset=64
> >>          'reg_save_area' type_id=32 bits_offset=128
> >>    [2308] TYPEDEF 'va_list' type_id=2309
> >>    [2309] TYPEDEF '__builtin_va_list' type_id=2310
> >>    [2310] ARRAY '(anon)' type_id=2300 index_type_id=21 nr_elems=1
> >>
> >>    [5289] PTR '(anon)' type_id=2308
> >>    [158520] STRUCT 'warn_args' size=32 vlen=2
> >>          'fmt' type_id=14 bits_offset=0
> >>          'args' type_id=2308 bits_offset=64
> >>    [27299] INT '__ARRAY_SIZE_TYPE__' size=4 bits_offset=0 nr_bits=32 encoding=(none)
> >>    [34590] TYPEDEF '__builtin_va_list' type_id=34591
> >>    [34591] ARRAY '(anon)' type_id=2300 index_type_id=27299 nr_elems=1
> >>
> >> The typedef __builtin_va_list is a builtin type for the compiler.
> >> In the above case, two typedef __builtin_va_list are generated.
> >> The reason is due to different array index_type_id. This happened
> >> when pahole is running with more than one jobs when parsing dwarf
> >> and generating btfs.
> >>
> >> Function btf_encoder__encode_cu() is used to do btf encoding for
> >> each cu. The function will try to find an "int" type for the cu
> >> if it is available, otherwise, it will create a special type
> >> with name __ARRAY_SIZE_TYPE__. For example,
> >>    file1: yes 'int' type
> >>    file2: no 'int' type
> >>
> >> In serial mode, file1 is processed first, followed by file2.
> >> both will have 'int' type as the array index type since file2
> >> will inherit the index type from file1.
> >>
> >> In parallel mode though, arrays in file1 will have index type 'int',
> >> and arrays in file2 wil have index type '__ARRAY_SIZE_TYPE__'.
> >> This will prevent some legitimate dedup and may have generated
> >> vmlinux.h having compilation error.
> >>
> >
> > I think it is two separate problems.
> >
> > 1. Maybe instead of this generating __ARRAY_SIZE_TYPE__ we should
> > generate proper 'int' type?
>
> This should work. Will post v2 with this.
>
> >
> > 2. __builtin_va_list___2 shouldn't have happened, it's libbpf bug.
> > Libbpf handles __builtin_va_list specially (see
> > btf_dump_is_blacklisted()), so we need to fix libbpf to not get
> > confused if there are two __builtin_va_list copies in BTF.
>
> I checked code. the libbpf prevents generating
>     typedef <...> __builtin_va_list
> since __builtin_va_list is a builtin type.
>
> Here, due to __ARRAY_SIZE_TYPE__ problem, the following are generated
> in vmlinux.h.
>
> typedef __builtin_va_list va_list;
> typedef __builtin_va_list___2 va_list___2;
>
> since __builtin_va_list appears twice in the BTF.
> But due to the libbpf implementation to skip
>     typedef <...> __builtin_va_list
>
> We don't have __builtin_va_list___2 defined and this
> caused compilation error.
>
> Although we could workaround the issue in libbpf
> such that if the typedef is in the format of
>    typedef __builtin_va_list<...> <other_type>
> we should just emit
>    typedef __builtin_va_list <other_type>
>
> But fixing the issue in pahole is much better since
> we won't have va_list___2 any more.

Sounds good, let's do it in pahole, thanks!

>
> >
> >> This patch fixed the issue by normalizing all array_index types
> >> to be the first array_index type in the whole btf.
> >>
> >> Signed-off-by: Yonghong Song <yhs@fb.com>
> >> ---
> >>   btf_encoder.c | 24 +++++++++++++++++++++---
> >>   btf_encoder.h |  2 +-
> >>   pahole.c      |  2 +-
> >>   3 files changed, 23 insertions(+), 5 deletions(-)
> >>
> >
> > [...]
diff mbox series

Patch

diff --git a/btf_encoder.c b/btf_encoder.c
index 1a42094..6164a3d 100644
--- a/btf_encoder.c
+++ b/btf_encoder.c
@@ -1056,17 +1056,35 @@  out:
 	return err;
 }
 
-int btf_encoder__encode(struct btf_encoder *encoder)
+int btf_encoder__encode(struct btf_encoder *encoder, bool normalize_array_index_tid)
 {
-	int err;
+	int i, err, nr_types, index_type_id = 0;
 
 	if (gobuffer__size(&encoder->percpu_secinfo) != 0)
 		btf_encoder__add_datasec(encoder, PERCPU_SECTION);
 
 	/* Empty file, nothing to do, so... done! */
-	if (btf__type_cnt(encoder->btf) == 1)
+	nr_types = btf__type_cnt(encoder->btf);
+	if (nr_types == 1)
 		return 0;
 
+	if (normalize_array_index_tid) {
+		for (i = 1; i < nr_types; i++) {
+			/* remove the 'const' qualifier so the index_type can be changed. */
+			struct btf_type *t = (struct btf_type *)btf__type_by_id(encoder->btf, i);
+			struct btf_array *arr_info;
+
+			if (!btf_is_array(t))
+				continue;
+
+			arr_info = btf_array(t);
+			if (index_type_id == 0)
+				index_type_id = arr_info->index_type;
+			else
+				arr_info->index_type = index_type_id;
+		}
+	}
+
 	if (btf__dedup(encoder->btf, NULL)) {
 		fprintf(stderr, "%s: btf__dedup failed!\n", __func__);
 		return -1;
diff --git a/btf_encoder.h b/btf_encoder.h
index 339fae2..9a4c79e 100644
--- a/btf_encoder.h
+++ b/btf_encoder.h
@@ -19,7 +19,7 @@  struct list_head;
 struct btf_encoder *btf_encoder__new(struct cu *cu, const char *detached_filename, struct btf *base_btf, bool skip_encoding_vars, bool force, bool gen_floats, bool verbose);
 void btf_encoder__delete(struct btf_encoder *encoder);
 
-int btf_encoder__encode(struct btf_encoder *encoder);
+int btf_encoder__encode(struct btf_encoder *encoder, bool normalize_array_index_tid);
 
 int btf_encoder__encode_cu(struct btf_encoder *encoder, struct cu *cu);
 
diff --git a/pahole.c b/pahole.c
index 78caa08..2c3b2ac 100644
--- a/pahole.c
+++ b/pahole.c
@@ -3530,7 +3530,7 @@  try_sole_arg_as_class_names:
 	header = NULL;
 
 	if (btf_encode && btf_encoder) { // maybe all CUs were filtered out and thus we don't have an encoder?
-		err = btf_encoder__encode(btf_encoder);
+		err = btf_encoder__encode(btf_encoder, conf_load.nr_jobs > 1);
 		if (err) {
 			fputs("Failed to encode BTF\n", stderr);
 			goto out_cus_delete;