diff mbox series

[v2,02/19] gendwarfksyms: Add symbol list handling

Message ID 20240815173903.4172139-23-samitolvanen@google.com (mailing list archive)
State New
Headers show
Series Implement DWARF modversions | expand

Commit Message

Sami Tolvanen Aug. 15, 2024, 5:39 p.m. UTC
Add support for passing a list of exported symbols to gendwarfksyms
via stdin and filter out non-exported symbols from the output.

Signed-off-by: Sami Tolvanen <samitolvanen@google.com>
---
 scripts/gendwarfksyms/Makefile        |  1 +
 scripts/gendwarfksyms/dwarf.c         | 53 ++++++++++++++-
 scripts/gendwarfksyms/gendwarfksyms.c |  4 +-
 scripts/gendwarfksyms/gendwarfksyms.h | 21 ++++++
 scripts/gendwarfksyms/symbols.c       | 96 +++++++++++++++++++++++++++
 5 files changed, 171 insertions(+), 4 deletions(-)
 create mode 100644 scripts/gendwarfksyms/symbols.c

Comments

Petr Pavlu Aug. 27, 2024, 9:16 a.m. UTC | #1
On 8/15/24 19:39, Sami Tolvanen wrote:
> Add support for passing a list of exported symbols to gendwarfksyms
> via stdin and filter out non-exported symbols from the output.
> 
> Signed-off-by: Sami Tolvanen <samitolvanen@google.com>
> ---
>  scripts/gendwarfksyms/Makefile        |  1 +
>  scripts/gendwarfksyms/dwarf.c         | 53 ++++++++++++++-
>  scripts/gendwarfksyms/gendwarfksyms.c |  4 +-
>  scripts/gendwarfksyms/gendwarfksyms.h | 21 ++++++
>  scripts/gendwarfksyms/symbols.c       | 96 +++++++++++++++++++++++++++
>  5 files changed, 171 insertions(+), 4 deletions(-)
>  create mode 100644 scripts/gendwarfksyms/symbols.c
> 
> diff --git a/scripts/gendwarfksyms/Makefile b/scripts/gendwarfksyms/Makefile
> index c1389c161f9c..623f8fc975ea 100644
> --- a/scripts/gendwarfksyms/Makefile
> +++ b/scripts/gendwarfksyms/Makefile
> @@ -2,6 +2,7 @@ hostprogs-always-y += gendwarfksyms
>  
>  gendwarfksyms-objs += gendwarfksyms.o
>  gendwarfksyms-objs += dwarf.o
> +gendwarfksyms-objs += symbols.o
>  
>  HOST_EXTRACFLAGS := -I $(srctree)/tools/include
>  HOSTLDLIBS_gendwarfksyms := -ldw -lelf
> diff --git a/scripts/gendwarfksyms/dwarf.c b/scripts/gendwarfksyms/dwarf.c
> index 65a29d0bd8f4..71cfab0553da 100644
> --- a/scripts/gendwarfksyms/dwarf.c
> +++ b/scripts/gendwarfksyms/dwarf.c
> @@ -5,6 +5,48 @@
>  
>  #include "gendwarfksyms.h"
>  
> +static bool get_ref_die_attr(Dwarf_Die *die, unsigned int id, Dwarf_Die *value)
> +{
> +	Dwarf_Attribute da;
> +
> +	/* dwarf_formref_die returns a pointer instead of an error value. */
> +	return dwarf_attr(die, id, &da) && dwarf_formref_die(&da, value);
> +}
> +
> +static const char *get_name(Dwarf_Die *die)
> +{
> +	Dwarf_Attribute attr;
> +
> +	/* rustc uses DW_AT_linkage_name for exported symbols */
> +	if (dwarf_attr(die, DW_AT_linkage_name, &attr) ||
> +	    dwarf_attr(die, DW_AT_name, &attr)) {
> +		return dwarf_formstring(&attr);
> +	}
> +
> +	return NULL;
> +}
> +
> +static bool is_export_symbol(struct state *state, Dwarf_Die *die)
> +{
> +	Dwarf_Die *source = die;
> +	Dwarf_Die origin;
> +
> +	state->sym = NULL;

Nit: This assignment isn't strictly necessary, the value is overwritten
a few lines below and isn't used in between.

> +
> +	/* If the DIE has an abstract origin, use it for type information. */
> +	if (get_ref_die_attr(die, DW_AT_abstract_origin, &origin))
> +		source = &origin;
> +
> +	state->sym = symbol_get(get_name(die));
> +
> +	/* Look up using the origin name if there are no matches. */
> +	if (!state->sym && source != die)
> +		state->sym = symbol_get(get_name(source));
> +
> +	state->die = *source;
> +	return !!state->sym;
> +}
> +
>  /*
>   * Type string processing
>   */
> @@ -40,7 +82,7 @@ int process_die_container(struct state *state, Dwarf_Die *die,
>  }
>  
>  /*
> - * Symbol processing
> + * Exported symbol processing
>   */
>  static int process_subprogram(struct state *state, Dwarf_Die *die)
>  {
> @@ -67,10 +109,15 @@ static int process_exported_symbols(struct state *state, Dwarf_Die *die)
>  	/* Possible exported symbols */
>  	case DW_TAG_subprogram:
>  	case DW_TAG_variable:
> +		if (!is_export_symbol(state, die))
> +			return 0;
> +
> +		debug("%s", state->sym->name);
> +
>  		if (tag == DW_TAG_subprogram)
> -			check(process_subprogram(state, die));
> +			check(process_subprogram(state, &state->die));
>  		else
> -			check(process_variable(state, die));
> +			check(process_variable(state, &state->die));
>  
>  		return 0;
>  	default:
> diff --git a/scripts/gendwarfksyms/gendwarfksyms.c b/scripts/gendwarfksyms/gendwarfksyms.c
> index 27f2d6423c45..d209b237766b 100644
> --- a/scripts/gendwarfksyms/gendwarfksyms.c
> +++ b/scripts/gendwarfksyms/gendwarfksyms.c
> @@ -27,7 +27,7 @@ static const struct {
>  
>  static int usage(void)
>  {
> -	error("usage: gendwarfksyms [options] elf-object-file ...");
> +	error("usage: gendwarfksyms [options] elf-object-file ... < symbol-list");
>  	return -1;
>  }
>  
> @@ -105,6 +105,8 @@ int main(int argc, const char **argv)
>  	if (parse_options(argc, argv) < 0)
>  		return usage();
>  
> +	check(symbol_read_exports(stdin));
> +
>  	for (n = 0; n < object_count; n++) {
>  		Dwfl *dwfl;
>  		int fd;
> diff --git a/scripts/gendwarfksyms/gendwarfksyms.h b/scripts/gendwarfksyms/gendwarfksyms.h
> index 5ab7ce7d4efb..03f3e408a839 100644
> --- a/scripts/gendwarfksyms/gendwarfksyms.h
> +++ b/scripts/gendwarfksyms/gendwarfksyms.h
> @@ -7,9 +7,11 @@
>  #include <elfutils/libdw.h>
>  #include <elfutils/libdwfl.h>
>  #include <linux/hashtable.h>
> +#include <linux/jhash.h>
>  #include <inttypes.h>
>  #include <stdlib.h>
>  #include <stdio.h>
> +#include <string.h>
>  
>  #ifndef __GENDWARFKSYMS_H
>  #define __GENDWARFKSYMS_H
> @@ -56,6 +58,23 @@ extern bool debug;
>  /* Error == negative values */
>  #define checkp(expr) __check(expr, __res < 0, __res)
>  
> +/*
> + * symbols.c
> + */
> +
> +static inline u32 name_hash(const char *name)
> +{
> +	return jhash(name, strlen(name), 0);
> +}
> +
> +struct symbol {
> +	const char *name;
> +	struct hlist_node name_hash;
> +};
> +
> +extern int symbol_read_exports(FILE *file);
> +extern struct symbol *symbol_get(const char *name);

Nit: extern isn't necessary here and in other similar cases throughout
the series. It should be removed per
Documentation/process/coding-style.rst, 6.1) Function prototypes.

> +
>  /*
>   * dwarf.c
>   */
> @@ -63,6 +82,8 @@ extern bool debug;
>  struct state {
>  	Dwfl_Module *mod;
>  	Dwarf *dbg;
> +	struct symbol *sym;
> +	Dwarf_Die die;
>  };
>  
>  typedef int (*die_callback_t)(struct state *state, Dwarf_Die *die);
> diff --git a/scripts/gendwarfksyms/symbols.c b/scripts/gendwarfksyms/symbols.c
> new file mode 100644
> index 000000000000..673ad9cf9e77
> --- /dev/null
> +++ b/scripts/gendwarfksyms/symbols.c
> @@ -0,0 +1,96 @@
> +// SPDX-License-Identifier: GPL-2.0-or-later
> +/*
> + * Copyright (C) 2024 Google LLC
> + */
> +
> +#include "gendwarfksyms.h"
> +
> +#define SYMBOL_HASH_BITS 15
> +static DEFINE_HASHTABLE(symbol_names, SYMBOL_HASH_BITS);
> +
> +typedef int (*symbol_callback_t)(struct symbol *, void *arg);
> +
> +static int for_each(const char *name, symbol_callback_t func, void *data)
> +{
> +	struct hlist_node *tmp;
> +	struct symbol *match;
> +
> +	if (!name || !*name)
> +		return 0;
> +
> +	hash_for_each_possible_safe(symbol_names, match, tmp, name_hash,
> +				    name_hash(name)) {
> +		if (strcmp(match->name, name))
> +			continue;
> +
> +		if (func)
> +			check(func(match, data));
> +
> +		return 1;
> +	}
> +
> +	return 0;
> +}
> +
> +static bool is_exported(const char *name)
> +{
> +	return checkp(for_each(name, NULL, NULL)) > 0;
> +}
> +
> +int symbol_read_exports(FILE *file)
> +{
> +	struct symbol *sym;
> +	char *line = NULL;
> +	char *name = NULL;
> +	size_t size = 0;
> +	int nsym = 0;
> +
> +	while (getline(&line, &size, file) > 0) {
> +		if (sscanf(line, "%ms\n", &name) != 1) {
> +			error("malformed input line: %s", line);
> +			return -1;
> +		}
> +
> +		free(line);
> +		line = NULL;
> +
> +		if (is_exported(name))
> +			continue; /* Ignore duplicates */
> +
> +		sym = malloc(sizeof(struct symbol));
> +		if (!sym) {
> +			error("malloc failed");
> +			return -1;
> +		}
> +
> +		sym->name = name;
> +		name = NULL;
> +
> +		hash_add(symbol_names, &sym->name_hash, name_hash(sym->name));
> +		++nsym;
> +
> +		debug("%s", sym->name);
> +	}
> +
> +	if (line)
> +		free(line);

The loop leaks line on a potential sscanf() error and name if the symbol
is a duplicate or malloc(sizeof(struct symbol)) fails. Additionally, it
should be possible to avoid allocating line by getline() on each
iteration.

I would change it to something like this (not tested):

int symbol_read_exports(FILE *file)
{
	struct symbol *sym;
	char *line = NULL;
	char *name = NULL;
	size_t size = 0;
	int nsym = 0;
	int ret = -1;

	while (getline(&line, &size, file) > 0) {
		if (sscanf(line, "%ms\n", &name) != 1) {
			error("malformed input line: %s", line);
			goto out;
		}

		if (is_exported(name)) {
			/* Ignore duplicates */
			free(name);
			name = NULL;
			continue;
		}

		sym = malloc(sizeof(struct symbol));
		if (!sym) {
			error("malloc failed");
			goto out;
		}

		sym->name = name;
		name = NULL;

		hash_add(symbol_names, &sym->name_hash, name_hash(sym->name));
		++nsym;

		debug("%s", sym->name);
	}

	debug("%d exported symbols", nsym);
	ret = 0;

out:
	free(line);
	free(name);
	return ret;
}

> +
> +	debug("%d exported symbols", nsym);
> +	return 0;
> +}
> +
> +static int get_symbol(struct symbol *sym, void *arg)
> +{
> +	struct symbol **res = arg;
> +
> +	*res = sym;
> +	return 0;
> +}
> +
> +struct symbol *symbol_get(const char *name)
> +{
> +	struct symbol *sym = NULL;
> +
> +	for_each(name, get_symbol, &sym);
> +	return sym;
> +}
Sami Tolvanen Aug. 27, 2024, 6:47 p.m. UTC | #2
Hi Petr,

On Tue, Aug 27, 2024 at 2:16 AM Petr Pavlu <petr.pavlu@suse.com> wrote:
>
> On 8/15/24 19:39, Sami Tolvanen wrote:
> > +static bool is_export_symbol(struct state *state, Dwarf_Die *die)
> > +{
> > +     Dwarf_Die *source = die;
> > +     Dwarf_Die origin;
> > +
> > +     state->sym = NULL;
>
> Nit: This assignment isn't strictly necessary, the value is overwritten
> a few lines below and isn't used in between.

True, I think this was left over from refactoring.

> > +int symbol_read_exports(FILE *file)
> > +{
> > +     struct symbol *sym;
> > +     char *line = NULL;
> > +     char *name = NULL;
> > +     size_t size = 0;
> > +     int nsym = 0;
> > +
> > +     while (getline(&line, &size, file) > 0) {
> > +             if (sscanf(line, "%ms\n", &name) != 1) {
> > +                     error("malformed input line: %s", line);
> > +                     return -1;
> > +             }
> > +
> > +             free(line);
> > +             line = NULL;
> > +
> > +             if (is_exported(name))
> > +                     continue; /* Ignore duplicates */
> > +
> > +             sym = malloc(sizeof(struct symbol));
> > +             if (!sym) {
> > +                     error("malloc failed");
> > +                     return -1;
> > +             }
> > +
> > +             sym->name = name;
> > +             name = NULL;
> > +
> > +             hash_add(symbol_names, &sym->name_hash, name_hash(sym->name));
> > +             ++nsym;
> > +
> > +             debug("%s", sym->name);
> > +     }
> > +
> > +     if (line)
> > +             free(line);
>
> The loop leaks line on a potential sscanf() error and name if the symbol
> is a duplicate or malloc(sizeof(struct symbol)) fails. Additionally, it
> should be possible to avoid allocating line by getline() on each
> iteration.
>
> I would change it to something like this (not tested):

Good points, I'll change this to your suggested version (after testing). Thanks!

Sami
Petr Pavlu Aug. 28, 2024, 12:35 p.m. UTC | #3
On 8/15/24 19:39, Sami Tolvanen wrote:
> diff --git a/scripts/gendwarfksyms/dwarf.c b/scripts/gendwarfksyms/dwarf.c
> index 65a29d0bd8f4..71cfab0553da 100644
> --- a/scripts/gendwarfksyms/dwarf.c
> +++ b/scripts/gendwarfksyms/dwarf.c
> @@ -5,6 +5,48 @@
> [...]
> +
> +static bool is_export_symbol(struct state *state, Dwarf_Die *die)
> +{
> +	Dwarf_Die *source = die;
> +	Dwarf_Die origin;
> +
> +	state->sym = NULL;
> +
> +	/* If the DIE has an abstract origin, use it for type information. */
> +	if (get_ref_die_attr(die, DW_AT_abstract_origin, &origin))
> +		source = &origin;
> +
> +	state->sym = symbol_get(get_name(die));
> +
> +	/* Look up using the origin name if there are no matches. */
> +	if (!state->sym && source != die)
> +		state->sym = symbol_get(get_name(source));
> +
> +	state->die = *source;
> +	return !!state->sym;
> +}

Sorry, I don't want to comment much on function names.. but I realized
the name of is_export_symbol() isn't really great. The "is_" prefix
strongly indicates that it is only a query function, yet it changes the
state. It makes its caller process_exported_symbols() hard to understand
on the first read.
Masahiro Yamada Aug. 28, 2024, 6:16 p.m. UTC | #4
On Fri, Aug 16, 2024 at 2:39 AM Sami Tolvanen <samitolvanen@google.com> wrote:
>
> Add support for passing a list of exported symbols to gendwarfksyms
> via stdin and filter out non-exported symbols from the output.
>
> Signed-off-by: Sami Tolvanen <samitolvanen@google.com>
> ---
>  scripts/gendwarfksyms/Makefile        |  1 +
>  scripts/gendwarfksyms/dwarf.c         | 53 ++++++++++++++-
>  scripts/gendwarfksyms/gendwarfksyms.c |  4 +-
>  scripts/gendwarfksyms/gendwarfksyms.h | 21 ++++++
>  scripts/gendwarfksyms/symbols.c       | 96 +++++++++++++++++++++++++++
>  5 files changed, 171 insertions(+), 4 deletions(-)
>  create mode 100644 scripts/gendwarfksyms/symbols.c
>
> diff --git a/scripts/gendwarfksyms/Makefile b/scripts/gendwarfksyms/Makefile
> index c1389c161f9c..623f8fc975ea 100644
> --- a/scripts/gendwarfksyms/Makefile
> +++ b/scripts/gendwarfksyms/Makefile
> @@ -2,6 +2,7 @@ hostprogs-always-y += gendwarfksyms
>
>  gendwarfksyms-objs += gendwarfksyms.o
>  gendwarfksyms-objs += dwarf.o
> +gendwarfksyms-objs += symbols.o
>
>  HOST_EXTRACFLAGS := -I $(srctree)/tools/include
>  HOSTLDLIBS_gendwarfksyms := -ldw -lelf
> diff --git a/scripts/gendwarfksyms/dwarf.c b/scripts/gendwarfksyms/dwarf.c
> index 65a29d0bd8f4..71cfab0553da 100644
> --- a/scripts/gendwarfksyms/dwarf.c
> +++ b/scripts/gendwarfksyms/dwarf.c
> @@ -5,6 +5,48 @@
>
>  #include "gendwarfksyms.h"
>
> +static bool get_ref_die_attr(Dwarf_Die *die, unsigned int id, Dwarf_Die *value)
> +{
> +       Dwarf_Attribute da;
> +
> +       /* dwarf_formref_die returns a pointer instead of an error value. */
> +       return dwarf_attr(die, id, &da) && dwarf_formref_die(&da, value);
> +}
> +
> +static const char *get_name(Dwarf_Die *die)
> +{
> +       Dwarf_Attribute attr;
> +
> +       /* rustc uses DW_AT_linkage_name for exported symbols */
> +       if (dwarf_attr(die, DW_AT_linkage_name, &attr) ||
> +           dwarf_attr(die, DW_AT_name, &attr)) {
> +               return dwarf_formstring(&attr);
> +       }
> +
> +       return NULL;
> +}
> +
> +static bool is_export_symbol(struct state *state, Dwarf_Die *die)
> +{
> +       Dwarf_Die *source = die;
> +       Dwarf_Die origin;
> +
> +       state->sym = NULL;
> +
> +       /* If the DIE has an abstract origin, use it for type information. */
> +       if (get_ref_die_attr(die, DW_AT_abstract_origin, &origin))
> +               source = &origin;
> +
> +       state->sym = symbol_get(get_name(die));
> +
> +       /* Look up using the origin name if there are no matches. */
> +       if (!state->sym && source != die)
> +               state->sym = symbol_get(get_name(source));
> +
> +       state->die = *source;
> +       return !!state->sym;
> +}
> +
>  /*
>   * Type string processing
>   */
> @@ -40,7 +82,7 @@ int process_die_container(struct state *state, Dwarf_Die *die,
>  }
>
>  /*
> - * Symbol processing
> + * Exported symbol processing
>   */
>  static int process_subprogram(struct state *state, Dwarf_Die *die)
>  {
> @@ -67,10 +109,15 @@ static int process_exported_symbols(struct state *state, Dwarf_Die *die)
>         /* Possible exported symbols */
>         case DW_TAG_subprogram:
>         case DW_TAG_variable:
> +               if (!is_export_symbol(state, die))
> +                       return 0;
> +
> +               debug("%s", state->sym->name);
> +
>                 if (tag == DW_TAG_subprogram)
> -                       check(process_subprogram(state, die));
> +                       check(process_subprogram(state, &state->die));
>                 else
> -                       check(process_variable(state, die));
> +                       check(process_variable(state, &state->die));
>
>                 return 0;
>         default:
> diff --git a/scripts/gendwarfksyms/gendwarfksyms.c b/scripts/gendwarfksyms/gendwarfksyms.c
> index 27f2d6423c45..d209b237766b 100644
> --- a/scripts/gendwarfksyms/gendwarfksyms.c
> +++ b/scripts/gendwarfksyms/gendwarfksyms.c
> @@ -27,7 +27,7 @@ static const struct {
>
>  static int usage(void)
>  {
> -       error("usage: gendwarfksyms [options] elf-object-file ...");
> +       error("usage: gendwarfksyms [options] elf-object-file ... < symbol-list");
>         return -1;
>  }
>
> @@ -105,6 +105,8 @@ int main(int argc, const char **argv)
>         if (parse_options(argc, argv) < 0)
>                 return usage();
>
> +       check(symbol_read_exports(stdin));



symbol_read_exports() is only called from main().

Do you need to make symbol_read_exports() return
the error code all the way back to the main()
function?

Personally, I'd like to make the program bail out as early as
possible if there is no point in continuing running.

See also this patchset.

https://lore.kernel.org/linux-kbuild/20240812124858.2107328-1-masahiroy@kernel.org/T/#m5c0f795b57588a2c313cd2cc6e24ac95169fd225







> +
>         for (n = 0; n < object_count; n++) {
>                 Dwfl *dwfl;
>                 int fd;
> diff --git a/scripts/gendwarfksyms/gendwarfksyms.h b/scripts/gendwarfksyms/gendwarfksyms.h
> index 5ab7ce7d4efb..03f3e408a839 100644
> --- a/scripts/gendwarfksyms/gendwarfksyms.h
> +++ b/scripts/gendwarfksyms/gendwarfksyms.h
> @@ -7,9 +7,11 @@
>  #include <elfutils/libdw.h>
>  #include <elfutils/libdwfl.h>
>  #include <linux/hashtable.h>
> +#include <linux/jhash.h>
>  #include <inttypes.h>
>  #include <stdlib.h>
>  #include <stdio.h>
> +#include <string.h>
>
>  #ifndef __GENDWARFKSYMS_H
>  #define __GENDWARFKSYMS_H
> @@ -56,6 +58,23 @@ extern bool debug;
>  /* Error == negative values */
>  #define checkp(expr) __check(expr, __res < 0, __res)
>
> +/*
> + * symbols.c
> + */
> +
> +static inline u32 name_hash(const char *name)
> +{
> +       return jhash(name, strlen(name), 0);
> +}
> +
> +struct symbol {
> +       const char *name;
> +       struct hlist_node name_hash;
> +};
> +
> +extern int symbol_read_exports(FILE *file);
> +extern struct symbol *symbol_get(const char *name);
> +
>  /*
>   * dwarf.c
>   */
> @@ -63,6 +82,8 @@ extern bool debug;
>  struct state {
>         Dwfl_Module *mod;
>         Dwarf *dbg;
> +       struct symbol *sym;
> +       Dwarf_Die die;
>  };
>
>  typedef int (*die_callback_t)(struct state *state, Dwarf_Die *die);
> diff --git a/scripts/gendwarfksyms/symbols.c b/scripts/gendwarfksyms/symbols.c
> new file mode 100644
> index 000000000000..673ad9cf9e77
> --- /dev/null
> +++ b/scripts/gendwarfksyms/symbols.c
> @@ -0,0 +1,96 @@
> +// SPDX-License-Identifier: GPL-2.0-or-later
> +/*
> + * Copyright (C) 2024 Google LLC
> + */
> +
> +#include "gendwarfksyms.h"
> +
> +#define SYMBOL_HASH_BITS 15
> +static DEFINE_HASHTABLE(symbol_names, SYMBOL_HASH_BITS);
> +
> +typedef int (*symbol_callback_t)(struct symbol *, void *arg);
> +
> +static int for_each(const char *name, symbol_callback_t func, void *data)
> +{
> +       struct hlist_node *tmp;
> +       struct symbol *match;
> +
> +       if (!name || !*name)
> +               return 0;
> +
> +       hash_for_each_possible_safe(symbol_names, match, tmp, name_hash,
> +                                   name_hash(name)) {
> +               if (strcmp(match->name, name))
> +                       continue;
> +
> +               if (func)
> +                       check(func(match, data));
> +
> +               return 1;
> +       }
> +
> +       return 0;
> +}
> +
> +static bool is_exported(const char *name)
> +{
> +       return checkp(for_each(name, NULL, NULL)) > 0;
> +}
> +
> +int symbol_read_exports(FILE *file)
> +{
> +       struct symbol *sym;
> +       char *line = NULL;
> +       char *name = NULL;
> +       size_t size = 0;
> +       int nsym = 0;
> +
> +       while (getline(&line, &size, file) > 0) {
> +               if (sscanf(line, "%ms\n", &name) != 1) {
> +                       error("malformed input line: %s", line);
> +                       return -1;
> +               }
> +
> +               free(line);
> +               line = NULL;
> +
> +               if (is_exported(name))
> +                       continue; /* Ignore duplicates */
> +
> +               sym = malloc(sizeof(struct symbol));
> +               if (!sym) {
> +                       error("malloc failed");
> +                       return -1;
> +               }
> +
> +               sym->name = name;
> +               name = NULL;

Is this necessary?




> +
> +               hash_add(symbol_names, &sym->name_hash, name_hash(sym->name));
> +               ++nsym;
> +
> +               debug("%s", sym->name);
> +       }
> +
> +       if (line)
> +               free(line);
> +
> +       debug("%d exported symbols", nsym);
> +       return 0;
> +}
> +
> +static int get_symbol(struct symbol *sym, void *arg)
> +{
> +       struct symbol **res = arg;
> +
> +       *res = sym;
> +       return 0;
> +}
> +
> +struct symbol *symbol_get(const char *name)
> +{
> +       struct symbol *sym = NULL;
> +
> +       for_each(name, get_symbol, &sym);
> +       return sym;
> +}
> --
> 2.46.0.184.g6999bdac58-goog
>


--
Best Regards



Masahiro Yamada
Sami Tolvanen Aug. 28, 2024, 9:50 p.m. UTC | #5
On Thu, Aug 29, 2024 at 03:16:21AM +0900, Masahiro Yamada wrote:
> On Fri, Aug 16, 2024 at 2:39 AM Sami Tolvanen <samitolvanen@google.com> wrote:
> > @@ -105,6 +105,8 @@ int main(int argc, const char **argv)
> >         if (parse_options(argc, argv) < 0)
> >                 return usage();
> >
> > +       check(symbol_read_exports(stdin));
> 
> 
> 
> symbol_read_exports() is only called from main().
> 
> Do you need to make symbol_read_exports() return
> the error code all the way back to the main()
> function?
> 
> Personally, I'd like to make the program bail out as early as
> possible if there is no point in continuing running.

That's a valid point. The current error handling prints out a short
trace of exactly where something failed as the error propagates
through the call stack, but bailing out after printing the first
error is probably informative enough. I'll look into cleaning this
up.

> See also this patchset.
> 
> https://lore.kernel.org/linux-kbuild/20240812124858.2107328-1-masahiroy@kernel.org/T/#m5c0f795b57588a2c313cd2cc6e24ac95169fd225

Thanks for the link. In general I prefer to print out an error to
indicate what went wrong, but I suppose memory allocation errors
should be rare enough that it's not necessary. I'll switch to these
in the next version.

> > +int symbol_read_exports(FILE *file)
> > +{
> > +       struct symbol *sym;
> > +       char *line = NULL;
> > +       char *name = NULL;
> > +       size_t size = 0;
> > +       int nsym = 0;
> > +
> > +       while (getline(&line, &size, file) > 0) {
> > +               if (sscanf(line, "%ms\n", &name) != 1) {
> > +                       error("malformed input line: %s", line);
> > +                       return -1;
> > +               }
> > +
> > +               free(line);
> > +               line = NULL;
> > +
> > +               if (is_exported(name))
> > +                       continue; /* Ignore duplicates */
> > +
> > +               sym = malloc(sizeof(struct symbol));
> > +               if (!sym) {
> > +                       error("malloc failed");
> > +                       return -1;
> > +               }
> > +
> > +               sym->name = name;
> > +               name = NULL;
> 
> Is this necessary?

Here, no, but in Petr's cleaned up version it is again necessary, so
you'll see this in v3 still.

Sami
Sami Tolvanen Aug. 28, 2024, 11:09 p.m. UTC | #6
On Wed, Aug 28, 2024 at 02:35:29PM +0200, Petr Pavlu wrote:
> On 8/15/24 19:39, Sami Tolvanen wrote:
> > diff --git a/scripts/gendwarfksyms/dwarf.c b/scripts/gendwarfksyms/dwarf.c
> > index 65a29d0bd8f4..71cfab0553da 100644
> > --- a/scripts/gendwarfksyms/dwarf.c
> > +++ b/scripts/gendwarfksyms/dwarf.c
> > @@ -5,6 +5,48 @@
> > [...]
> > +
> > +static bool is_export_symbol(struct state *state, Dwarf_Die *die)
> > +{
> > +	Dwarf_Die *source = die;
> > +	Dwarf_Die origin;
> > +
> > +	state->sym = NULL;
> > +
> > +	/* If the DIE has an abstract origin, use it for type information. */
> > +	if (get_ref_die_attr(die, DW_AT_abstract_origin, &origin))
> > +		source = &origin;
> > +
> > +	state->sym = symbol_get(get_name(die));
> > +
> > +	/* Look up using the origin name if there are no matches. */
> > +	if (!state->sym && source != die)
> > +		state->sym = symbol_get(get_name(source));
> > +
> > +	state->die = *source;
> > +	return !!state->sym;
> > +}
> 
> Sorry, I don't want to comment much on function names.. but I realized
> the name of is_export_symbol() isn't really great. The "is_" prefix
> strongly indicates that it is only a query function, yet it changes the
> state. It makes its caller process_exported_symbols() hard to understand
> on the first read.

I see your point. How would you make this more obvious? get_ doesn't
seem entirely accurate either. match_ perhaps?

Sami
Masahiro Yamada Sept. 1, 2024, 10:59 a.m. UTC | #7
On Fri, Aug 16, 2024 at 2:39 AM Sami Tolvanen <samitolvanen@google.com> wrote:
>
> Add support for passing a list of exported symbols to gendwarfksyms
> via stdin and filter out non-exported symbols from the output.
>
> Signed-off-by: Sami Tolvanen <samitolvanen@google.com>
> ---

>   */
> @@ -40,7 +82,7 @@ int process_die_container(struct state *state, Dwarf_Die *die,
>  }
>
>  /*
> - * Symbol processing
> + * Exported symbol processing
>   */
>  static int process_subprogram(struct state *state, Dwarf_Die *die)
>  {


I also tend to regard this kind comment line change as a noise.



I think you can squash 02/19 into 01/19 because
this tool does not do anything useful at this point.
Petr Pavlu Sept. 2, 2024, 9:52 a.m. UTC | #8
On 8/29/24 01:09, Sami Tolvanen wrote:
> On Wed, Aug 28, 2024 at 02:35:29PM +0200, Petr Pavlu wrote:
>> On 8/15/24 19:39, Sami Tolvanen wrote:
>>> diff --git a/scripts/gendwarfksyms/dwarf.c b/scripts/gendwarfksyms/dwarf.c
>>> index 65a29d0bd8f4..71cfab0553da 100644
>>> --- a/scripts/gendwarfksyms/dwarf.c
>>> +++ b/scripts/gendwarfksyms/dwarf.c
>>> @@ -5,6 +5,48 @@
>>> [...]
>>> +
>>> +static bool is_export_symbol(struct state *state, Dwarf_Die *die)
>>> +{
>>> +	Dwarf_Die *source = die;
>>> +	Dwarf_Die origin;
>>> +
>>> +	state->sym = NULL;
>>> +
>>> +	/* If the DIE has an abstract origin, use it for type information. */
>>> +	if (get_ref_die_attr(die, DW_AT_abstract_origin, &origin))
>>> +		source = &origin;
>>> +
>>> +	state->sym = symbol_get(get_name(die));
>>> +
>>> +	/* Look up using the origin name if there are no matches. */
>>> +	if (!state->sym && source != die)
>>> +		state->sym = symbol_get(get_name(source));
>>> +
>>> +	state->die = *source;
>>> +	return !!state->sym;
>>> +}
>>
>> Sorry, I don't want to comment much on function names.. but I realized
>> the name of is_export_symbol() isn't really great. The "is_" prefix
>> strongly indicates that it is only a query function, yet it changes the
>> state. It makes its caller process_exported_symbols() hard to understand
>> on the first read.
> 
> I see your point. How would you make this more obvious? get_ doesn't
> seem entirely accurate either. match_ perhaps?

Looks reasonable to me.
Sami Tolvanen Sept. 4, 2024, 8:51 p.m. UTC | #9
On Sun, Sep 1, 2024 at 11:00 AM Masahiro Yamada <masahiroy@kernel.org> wrote:
>
> On Fri, Aug 16, 2024 at 2:39 AM Sami Tolvanen <samitolvanen@google.com> wrote:
> >
> >  /*
> > - * Symbol processing
> > + * Exported symbol processing
> >   */
> >  static int process_subprogram(struct state *state, Dwarf_Die *die)
> >  {
>
>
> I also tend to regard this kind comment line change as a noise.
>
>
>
> I think you can squash 02/19 into 01/19 because
> this tool does not do anything useful at this point.

Yes, that makes sense. I'll squash these in v3.

Sami
diff mbox series

Patch

diff --git a/scripts/gendwarfksyms/Makefile b/scripts/gendwarfksyms/Makefile
index c1389c161f9c..623f8fc975ea 100644
--- a/scripts/gendwarfksyms/Makefile
+++ b/scripts/gendwarfksyms/Makefile
@@ -2,6 +2,7 @@  hostprogs-always-y += gendwarfksyms
 
 gendwarfksyms-objs += gendwarfksyms.o
 gendwarfksyms-objs += dwarf.o
+gendwarfksyms-objs += symbols.o
 
 HOST_EXTRACFLAGS := -I $(srctree)/tools/include
 HOSTLDLIBS_gendwarfksyms := -ldw -lelf
diff --git a/scripts/gendwarfksyms/dwarf.c b/scripts/gendwarfksyms/dwarf.c
index 65a29d0bd8f4..71cfab0553da 100644
--- a/scripts/gendwarfksyms/dwarf.c
+++ b/scripts/gendwarfksyms/dwarf.c
@@ -5,6 +5,48 @@ 
 
 #include "gendwarfksyms.h"
 
+static bool get_ref_die_attr(Dwarf_Die *die, unsigned int id, Dwarf_Die *value)
+{
+	Dwarf_Attribute da;
+
+	/* dwarf_formref_die returns a pointer instead of an error value. */
+	return dwarf_attr(die, id, &da) && dwarf_formref_die(&da, value);
+}
+
+static const char *get_name(Dwarf_Die *die)
+{
+	Dwarf_Attribute attr;
+
+	/* rustc uses DW_AT_linkage_name for exported symbols */
+	if (dwarf_attr(die, DW_AT_linkage_name, &attr) ||
+	    dwarf_attr(die, DW_AT_name, &attr)) {
+		return dwarf_formstring(&attr);
+	}
+
+	return NULL;
+}
+
+static bool is_export_symbol(struct state *state, Dwarf_Die *die)
+{
+	Dwarf_Die *source = die;
+	Dwarf_Die origin;
+
+	state->sym = NULL;
+
+	/* If the DIE has an abstract origin, use it for type information. */
+	if (get_ref_die_attr(die, DW_AT_abstract_origin, &origin))
+		source = &origin;
+
+	state->sym = symbol_get(get_name(die));
+
+	/* Look up using the origin name if there are no matches. */
+	if (!state->sym && source != die)
+		state->sym = symbol_get(get_name(source));
+
+	state->die = *source;
+	return !!state->sym;
+}
+
 /*
  * Type string processing
  */
@@ -40,7 +82,7 @@  int process_die_container(struct state *state, Dwarf_Die *die,
 }
 
 /*
- * Symbol processing
+ * Exported symbol processing
  */
 static int process_subprogram(struct state *state, Dwarf_Die *die)
 {
@@ -67,10 +109,15 @@  static int process_exported_symbols(struct state *state, Dwarf_Die *die)
 	/* Possible exported symbols */
 	case DW_TAG_subprogram:
 	case DW_TAG_variable:
+		if (!is_export_symbol(state, die))
+			return 0;
+
+		debug("%s", state->sym->name);
+
 		if (tag == DW_TAG_subprogram)
-			check(process_subprogram(state, die));
+			check(process_subprogram(state, &state->die));
 		else
-			check(process_variable(state, die));
+			check(process_variable(state, &state->die));
 
 		return 0;
 	default:
diff --git a/scripts/gendwarfksyms/gendwarfksyms.c b/scripts/gendwarfksyms/gendwarfksyms.c
index 27f2d6423c45..d209b237766b 100644
--- a/scripts/gendwarfksyms/gendwarfksyms.c
+++ b/scripts/gendwarfksyms/gendwarfksyms.c
@@ -27,7 +27,7 @@  static const struct {
 
 static int usage(void)
 {
-	error("usage: gendwarfksyms [options] elf-object-file ...");
+	error("usage: gendwarfksyms [options] elf-object-file ... < symbol-list");
 	return -1;
 }
 
@@ -105,6 +105,8 @@  int main(int argc, const char **argv)
 	if (parse_options(argc, argv) < 0)
 		return usage();
 
+	check(symbol_read_exports(stdin));
+
 	for (n = 0; n < object_count; n++) {
 		Dwfl *dwfl;
 		int fd;
diff --git a/scripts/gendwarfksyms/gendwarfksyms.h b/scripts/gendwarfksyms/gendwarfksyms.h
index 5ab7ce7d4efb..03f3e408a839 100644
--- a/scripts/gendwarfksyms/gendwarfksyms.h
+++ b/scripts/gendwarfksyms/gendwarfksyms.h
@@ -7,9 +7,11 @@ 
 #include <elfutils/libdw.h>
 #include <elfutils/libdwfl.h>
 #include <linux/hashtable.h>
+#include <linux/jhash.h>
 #include <inttypes.h>
 #include <stdlib.h>
 #include <stdio.h>
+#include <string.h>
 
 #ifndef __GENDWARFKSYMS_H
 #define __GENDWARFKSYMS_H
@@ -56,6 +58,23 @@  extern bool debug;
 /* Error == negative values */
 #define checkp(expr) __check(expr, __res < 0, __res)
 
+/*
+ * symbols.c
+ */
+
+static inline u32 name_hash(const char *name)
+{
+	return jhash(name, strlen(name), 0);
+}
+
+struct symbol {
+	const char *name;
+	struct hlist_node name_hash;
+};
+
+extern int symbol_read_exports(FILE *file);
+extern struct symbol *symbol_get(const char *name);
+
 /*
  * dwarf.c
  */
@@ -63,6 +82,8 @@  extern bool debug;
 struct state {
 	Dwfl_Module *mod;
 	Dwarf *dbg;
+	struct symbol *sym;
+	Dwarf_Die die;
 };
 
 typedef int (*die_callback_t)(struct state *state, Dwarf_Die *die);
diff --git a/scripts/gendwarfksyms/symbols.c b/scripts/gendwarfksyms/symbols.c
new file mode 100644
index 000000000000..673ad9cf9e77
--- /dev/null
+++ b/scripts/gendwarfksyms/symbols.c
@@ -0,0 +1,96 @@ 
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2024 Google LLC
+ */
+
+#include "gendwarfksyms.h"
+
+#define SYMBOL_HASH_BITS 15
+static DEFINE_HASHTABLE(symbol_names, SYMBOL_HASH_BITS);
+
+typedef int (*symbol_callback_t)(struct symbol *, void *arg);
+
+static int for_each(const char *name, symbol_callback_t func, void *data)
+{
+	struct hlist_node *tmp;
+	struct symbol *match;
+
+	if (!name || !*name)
+		return 0;
+
+	hash_for_each_possible_safe(symbol_names, match, tmp, name_hash,
+				    name_hash(name)) {
+		if (strcmp(match->name, name))
+			continue;
+
+		if (func)
+			check(func(match, data));
+
+		return 1;
+	}
+
+	return 0;
+}
+
+static bool is_exported(const char *name)
+{
+	return checkp(for_each(name, NULL, NULL)) > 0;
+}
+
+int symbol_read_exports(FILE *file)
+{
+	struct symbol *sym;
+	char *line = NULL;
+	char *name = NULL;
+	size_t size = 0;
+	int nsym = 0;
+
+	while (getline(&line, &size, file) > 0) {
+		if (sscanf(line, "%ms\n", &name) != 1) {
+			error("malformed input line: %s", line);
+			return -1;
+		}
+
+		free(line);
+		line = NULL;
+
+		if (is_exported(name))
+			continue; /* Ignore duplicates */
+
+		sym = malloc(sizeof(struct symbol));
+		if (!sym) {
+			error("malloc failed");
+			return -1;
+		}
+
+		sym->name = name;
+		name = NULL;
+
+		hash_add(symbol_names, &sym->name_hash, name_hash(sym->name));
+		++nsym;
+
+		debug("%s", sym->name);
+	}
+
+	if (line)
+		free(line);
+
+	debug("%d exported symbols", nsym);
+	return 0;
+}
+
+static int get_symbol(struct symbol *sym, void *arg)
+{
+	struct symbol **res = arg;
+
+	*res = sym;
+	return 0;
+}
+
+struct symbol *symbol_get(const char *name)
+{
+	struct symbol *sym = NULL;
+
+	for_each(name, get_symbol, &sym);
+	return sym;
+}