diff mbox series

[V8,1/4] Add dso__is_pie call to identify ELF PIE

Message ID 18bb5281c3648a67bff61ce44966d3dfd7c09b4f.1725667397.git.scclevenger@os.amperecomputing.com (mailing list archive)
State New, archived
Headers show
Series arm-cs-trace-disasm.py/perf must accommodate non-zero DSO text offset | expand

Commit Message

Steve Clevenger Sept. 9, 2024, 9:30 p.m. UTC
Add dso__is_pie global to read the .dynamic section DT_FLAGS_1 entry for
the DF_1_PIE flag. This identifies position executable code.

Signed-off-by: Steve Clevenger <scclevenger@os.amperecomputing.com>
Reviewed-by: Leo Yan <leo.yan@arm.com>
---
 tools/perf/util/symbol-elf.c | 61 ++++++++++++++++++++++++++++++++++++
 tools/perf/util/symbol.h     |  1 +
 2 files changed, 62 insertions(+)

Comments

Leo Yan Oct. 9, 2024, 9:13 p.m. UTC | #1
On 9/9/2024 10:30 PM, Steve Clevenger wrote:
> 
> Add dso__is_pie global to read the .dynamic section DT_FLAGS_1 entry for
> the DF_1_PIE flag. This identifies position executable code.
> 
> Signed-off-by: Steve Clevenger <scclevenger@os.amperecomputing.com>
> Reviewed-by: Leo Yan <leo.yan@arm.com>
> ---
>  tools/perf/util/symbol-elf.c | 61 ++++++++++++++++++++++++++++++++++++
>  tools/perf/util/symbol.h     |  1 +
>  2 files changed, 62 insertions(+)
> 
> diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c
> index e398abfd13a0..babe47976922 100644
> --- a/tools/perf/util/symbol-elf.c
> +++ b/tools/perf/util/symbol-elf.c
> @@ -662,6 +662,67 @@ static int dso__synthesize_plt_got_symbols(struct dso *dso, Elf *elf,
>         return err;
>  }
> 
> +/*
> + * Check dynamic section DT_FLAGS_1 for a Position Independent
> + * Executable (PIE).
> + */
> +bool dso__is_pie(struct dso *dso)
> +{
> +       Elf *elf = NULL;
> +       Elf_Scn *scn = NULL;
> +       GElf_Ehdr ehdr;
> +       GElf_Shdr shdr;
> +       bool is_pie = false;
> +       char dso_path[PATH_MAX];
> +       int fd = -1;
> +
> +       if (!dso || (elf_version(EV_CURRENT) == EV_NONE))
> +               goto exit;      // false
> +
> +       dso__build_id_filename(dso, dso_path, sizeof(dso_path), false);
> +
> +       fd = open(dso_path, O_RDONLY);
> +
> +       if (fd < 0) {
> +               pr_debug("%s: cannot read cached %s.\n", __func__, dso_path);
> +               goto exit;      // false
> +       }
> +
> +       elf = elf_begin(fd, ELF_C_READ, NULL);
> +       gelf_getehdr(elf, &ehdr);
> +
> +       if (ehdr.e_type == ET_DYN) {
> +               Elf_Data *data;
> +               GElf_Dyn *entry;
> +               int n_entries = shdr.sh_size / sizeof(GElf_Dyn);

I took time to play this series on Arm64 machine and found issue in above
sentence. As the 'shdr' strucutre is read out from elf_section_by_name()
below, but it calculates the entries before reading out the section header.
Therefore, I observed that program will not be detected as PIE executable due
to 'n_entries' is 0.

With fixing this bug, then I observed the regression caused by patch 02.

Below are steps:

- Build test program with below command:

  # gcc -pie -Wl,-z,relro,-z,now -o test test.c
  # readelf readelf -a test | grep FLAGS
  0x000000000000001e (FLAGS)              BIND_NOW
  0x000000006ffffffb (FLAGS_1)            Flags: NOW PIE

  The test program is a simple infinite loop. I run this program in a docker
  container with Fedora 40.

- Record trace data:

  # perf record -e cycles -p 149531
  # perf script

            test  149531 483168.078485:       4986 cycles:      aaaacde401b4
[unknown] (/home/test)
            test  149531 483168.078564:     134207 cycles:      aaaacde401b4
[unknown] (/home/test)
            test  149531 483168.079305:    1257097 cycles:      aaaacde401b4
[unknown] (/home/test)

  You can see it fails to parse symbol and print out [unknown].

  After I reverted patch 02 in this series, then:

  # perf script
            test  149531 483168.078485:       4986 cycles:      aaaacde401b4
main+0xc (/home/test)
            test  149531 483168.078564:     134207 cycles:      aaaacde401b4
main+0xc (/home/test)
            test  149531 483168.079305:    1257097 cycles:      aaaacde401b4
main+0xc (/home/test)

Not sure if I miss anything for PIE executable, seems to me, we should drop
the first two patches and just pass pg_off to python script?

Thanks,
Leo

> +
> +               scn = elf_section_by_name(elf, &ehdr, &shdr, ".dynamic", NULL);
> +               if (!scn)
> +                       goto exit_close;        // false
> +
> +               data = (Elf_Data *) elf_getdata(scn, NULL);
> +               if (!data || !data->d_buf)
> +                       goto exit_close;        // false
> +
> +               // check DT_FLAGS_1
> +               for (int i = 0; i < n_entries; i++) {
> +                       entry = ((GElf_Dyn *) data->d_buf) + i;
> +                       if (entry->d_tag == DT_FLAGS_1) {
> +                               if ((entry->d_un.d_val & DF_1_PIE) != 0) {
> +                                       is_pie = true;
> +                                       break;
> +                               }
> +                       }
> +               } // end for
> +       }
> +
> +exit_close:
> +       elf_end(elf);
> +       close(fd);
> +exit:
> +       return is_pie;
> +}
> +
>  /*
>   * We need to check if we have a .dynsym, so that we can handle the
>   * .plt, synthesizing its symbols, that aren't on the symtabs (be it
> diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
> index 3fb5d146d9b1..33ea2596ce31 100644
> --- a/tools/perf/util/symbol.h
> +++ b/tools/perf/util/symbol.h
> @@ -127,6 +127,7 @@ void dso__insert_symbol(struct dso *dso,
>                         struct symbol *sym);
>  void dso__delete_symbol(struct dso *dso,
>                         struct symbol *sym);
> +bool dso__is_pie(struct dso *dso);
> 
>  struct symbol *dso__find_symbol(struct dso *dso, u64 addr);
>  struct symbol *dso__find_symbol_nocache(struct dso *dso, u64 addr);
> --
> 2.44.0
>
Steve Clevenger Oct. 10, 2024, 4:30 p.m. UTC | #2
On 10/9/2024 2:13 PM, Leo Yan wrote:
> On 9/9/2024 10:30 PM, Steve Clevenger wrote:
>>
>> Add dso__is_pie global to read the .dynamic section DT_FLAGS_1 entry for
>> the DF_1_PIE flag. This identifies position executable code.
>>
>> Signed-off-by: Steve Clevenger <scclevenger@os.amperecomputing.com>
>> Reviewed-by: Leo Yan <leo.yan@arm.com>
>> ---
>>  tools/perf/util/symbol-elf.c | 61 ++++++++++++++++++++++++++++++++++++
>>  tools/perf/util/symbol.h     |  1 +
>>  2 files changed, 62 insertions(+)
>>
>> diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c
>> index e398abfd13a0..babe47976922 100644
>> --- a/tools/perf/util/symbol-elf.c
>> +++ b/tools/perf/util/symbol-elf.c
>> @@ -662,6 +662,67 @@ static int dso__synthesize_plt_got_symbols(struct dso *dso, Elf *elf,
>>         return err;
>>  }
>>
>> +/*
>> + * Check dynamic section DT_FLAGS_1 for a Position Independent
>> + * Executable (PIE).
>> + */
>> +bool dso__is_pie(struct dso *dso)
>> +{
>> +       Elf *elf = NULL;
>> +       Elf_Scn *scn = NULL;
>> +       GElf_Ehdr ehdr;
>> +       GElf_Shdr shdr;
>> +       bool is_pie = false;
>> +       char dso_path[PATH_MAX];
>> +       int fd = -1;
>> +
>> +       if (!dso || (elf_version(EV_CURRENT) == EV_NONE))
>> +               goto exit;      // false
>> +
>> +       dso__build_id_filename(dso, dso_path, sizeof(dso_path), false);
>> +
>> +       fd = open(dso_path, O_RDONLY);
>> +
>> +       if (fd < 0) {
>> +               pr_debug("%s: cannot read cached %s.\n", __func__, dso_path);
>> +               goto exit;      // false
>> +       }
>> +
>> +       elf = elf_begin(fd, ELF_C_READ, NULL);
>> +       gelf_getehdr(elf, &ehdr);
>> +
>> +       if (ehdr.e_type == ET_DYN) {
>> +               Elf_Data *data;
>> +               GElf_Dyn *entry;
>> +               int n_entries = shdr.sh_size / sizeof(GElf_Dyn);
> 
> I took time to play this series on Arm64 machine and found issue in above
> sentence. As the 'shdr' strucutre is read out from elf_section_by_name()
> below, but it calculates the entries before reading out the section header.
> Therefore, I observed that program will not be detected as PIE executable due
> to 'n_entries' is 0.
> 
> With fixing this bug, then I observed the regression caused by patch 02.
> 
> Below are steps:
> 
> - Build test program with below command:
> 
>   # gcc -pie -Wl,-z,relro,-z,now -o test test.c
>   # readelf readelf -a test | grep FLAGS
>   0x000000000000001e (FLAGS)              BIND_NOW
>   0x000000006ffffffb (FLAGS_1)            Flags: NOW PIE
> 
>   The test program is a simple infinite loop. I run this program in a docker
>   container with Fedora 40.
> 
> - Record trace data:
> 
>   # perf record -e cycles -p 149531
>   # perf script
> 
>             test  149531 483168.078485:       4986 cycles:      aaaacde401b4
> [unknown] (/home/test)
>             test  149531 483168.078564:     134207 cycles:      aaaacde401b4
> [unknown] (/home/test)
>             test  149531 483168.079305:    1257097 cycles:      aaaacde401b4
> [unknown] (/home/test)
> 
>   You can see it fails to parse symbol and print out [unknown].
> 
>   After I reverted patch 02 in this series, then:
> 
>   # perf script
>             test  149531 483168.078485:       4986 cycles:      aaaacde401b4
> main+0xc (/home/test)
>             test  149531 483168.078564:     134207 cycles:      aaaacde401b4
> main+0xc (/home/test)
>             test  149531 483168.079305:    1257097 cycles:      aaaacde401b4
> main+0xc (/home/test)
> 
> Not sure if I miss anything for PIE executable, seems to me, we should drop
> the first two patches and just pass pg_off to python script?
> > Thanks,
> Leo

Hi Leo,

I verified the perf/arm-cs-trace-disasm.py script patch results for
CoreSight user and kernel trace using Fedora distros 37 and higher. I
did little to test non-CoreSight trace, but I do see the dso__is_pie
initialization error, and the problem revealed by your test program. As
a side effect, the problem would contribute to the CoreSight user
instruction test passes.

The arm-cs-trace-disasm.py python script has a legacy IF block to force
'dso_vm_start' to zero for kernel code. Similarly, map_pgoff gets forced
to zero here. See the adjacent comment pertaining to memory offsets for
kernel dso and executable file dso. This, I think, is the root of the
misunderstanding with regard to the PIE check. I see the python IF
comparison includes a check for dso_start == 0x400000 but I see no
information as to why this hardcoded value is relevant. Do you know?

perf has (or now has) the information (e.g. map__rip_2objdump,
map__objdump_2mem) needed to make all objdump address adjustments prior
to calling the script. The script could even be made redundant as
suggested in an earlier thread. In the meantime, Ampere needs this
working so the patch implementation has to work with the current
perf/script implementation.

I agree the revised approach is to pass pgoff to the updated script
unmodified.This means patch 3 will pass map_pgoff unmodified by
MAPPING_TYPE into the dictionary. Patch 1-2 goes away, and patch 4
remains as-is.

Steve

> 
>> +
>> +               scn = elf_section_by_name(elf, &ehdr, &shdr, ".dynamic", NULL);
>> +               if (!scn)
>> +                       goto exit_close;        // false
>> +
>> +               data = (Elf_Data *) elf_getdata(scn, NULL);
>> +               if (!data || !data->d_buf)
>> +                       goto exit_close;        // false
>> +
>> +               // check DT_FLAGS_1
>> +               for (int i = 0; i < n_entries; i++) {
>> +                       entry = ((GElf_Dyn *) data->d_buf) + i;
>> +                       if (entry->d_tag == DT_FLAGS_1) {
>> +                               if ((entry->d_un.d_val & DF_1_PIE) != 0) {
>> +                                       is_pie = true;
>> +                                       break;
>> +                               }
>> +                       }
>> +               } // end for
>> +       }
>> +
>> +exit_close:
>> +       elf_end(elf);
>> +       close(fd);
>> +exit:
>> +       return is_pie;
>> +}
>> +
>>  /*
>>   * We need to check if we have a .dynsym, so that we can handle the
>>   * .plt, synthesizing its symbols, that aren't on the symtabs (be it
>> diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
>> index 3fb5d146d9b1..33ea2596ce31 100644
>> --- a/tools/perf/util/symbol.h
>> +++ b/tools/perf/util/symbol.h
>> @@ -127,6 +127,7 @@ void dso__insert_symbol(struct dso *dso,
>>                         struct symbol *sym);
>>  void dso__delete_symbol(struct dso *dso,
>>                         struct symbol *sym);
>> +bool dso__is_pie(struct dso *dso);
>>
>>  struct symbol *dso__find_symbol(struct dso *dso, u64 addr);
>>  struct symbol *dso__find_symbol_nocache(struct dso *dso, u64 addr);
>> --
>> 2.44.0
>>
diff mbox series

Patch

diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c
index e398abfd13a0..babe47976922 100644
--- a/tools/perf/util/symbol-elf.c
+++ b/tools/perf/util/symbol-elf.c
@@ -662,6 +662,67 @@  static int dso__synthesize_plt_got_symbols(struct dso *dso, Elf *elf,
 	return err;
 }
 
+/*
+ * Check dynamic section DT_FLAGS_1 for a Position Independent
+ * Executable (PIE).
+ */
+bool dso__is_pie(struct dso *dso)
+{
+	Elf *elf = NULL;
+	Elf_Scn *scn = NULL;
+	GElf_Ehdr ehdr;
+	GElf_Shdr shdr;
+	bool is_pie = false;
+	char dso_path[PATH_MAX];
+	int fd = -1;
+
+	if (!dso || (elf_version(EV_CURRENT) == EV_NONE))
+		goto exit;	// false
+
+	dso__build_id_filename(dso, dso_path, sizeof(dso_path), false);
+
+	fd = open(dso_path, O_RDONLY);
+
+	if (fd < 0) {
+		pr_debug("%s: cannot read cached %s.\n", __func__, dso_path);
+		goto exit;	// false
+	}
+
+	elf = elf_begin(fd, ELF_C_READ, NULL);
+	gelf_getehdr(elf, &ehdr);
+
+	if (ehdr.e_type == ET_DYN) {
+		Elf_Data *data;
+		GElf_Dyn *entry;
+		int n_entries = shdr.sh_size / sizeof(GElf_Dyn);
+
+		scn = elf_section_by_name(elf, &ehdr, &shdr, ".dynamic", NULL);
+		if (!scn)
+			goto exit_close;	// false
+
+		data = (Elf_Data *) elf_getdata(scn, NULL);
+		if (!data || !data->d_buf)
+			goto exit_close;	// false
+
+		// check DT_FLAGS_1
+		for (int i = 0; i < n_entries; i++) {
+			entry = ((GElf_Dyn *) data->d_buf) + i;
+			if (entry->d_tag == DT_FLAGS_1) {
+				if ((entry->d_un.d_val & DF_1_PIE) != 0) {
+					is_pie = true;
+					break;
+				}
+			}
+		} // end for
+	}
+
+exit_close:
+	elf_end(elf);
+	close(fd);
+exit:
+	return is_pie;
+}
+
 /*
  * We need to check if we have a .dynsym, so that we can handle the
  * .plt, synthesizing its symbols, that aren't on the symtabs (be it
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index 3fb5d146d9b1..33ea2596ce31 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -127,6 +127,7 @@  void dso__insert_symbol(struct dso *dso,
 			struct symbol *sym);
 void dso__delete_symbol(struct dso *dso,
 			struct symbol *sym);
+bool dso__is_pie(struct dso *dso);
 
 struct symbol *dso__find_symbol(struct dso *dso, u64 addr);
 struct symbol *dso__find_symbol_nocache(struct dso *dso, u64 addr);