diff mbox series

[v2,03/19] gendwarfksyms: Add address matching

Message ID 20240815173903.4172139-24-samitolvanen@google.com (mailing list archive)
State New
Headers show
Series Implement DWARF modversions | expand

Commit Message

Sami Tolvanen Aug. 15, 2024, 5:39 p.m. UTC
The compiler may choose not to emit type information in DWARF for all
aliases, but it's possible for each alias to be exported separately.
To ensure we find type information for the aliases as well, read
{section, address} tuples from the symbol table and match symbols also
by address.

Signed-off-by: Sami Tolvanen <samitolvanen@google.com>
---
 scripts/gendwarfksyms/gendwarfksyms.c |   2 +
 scripts/gendwarfksyms/gendwarfksyms.h |   7 ++
 scripts/gendwarfksyms/symbols.c       | 161 +++++++++++++++++++++++++-
 3 files changed, 165 insertions(+), 5 deletions(-)

Comments

Petr Pavlu Aug. 27, 2024, 12:40 p.m. UTC | #1
On 8/15/24 19:39, Sami Tolvanen wrote:
> The compiler may choose not to emit type information in DWARF for all
> aliases, but it's possible for each alias to be exported separately.
> To ensure we find type information for the aliases as well, read
> {section, address} tuples from the symbol table and match symbols also
> by address.
> 
> Signed-off-by: Sami Tolvanen <samitolvanen@google.com>
> ---
>  scripts/gendwarfksyms/gendwarfksyms.c |   2 +
>  scripts/gendwarfksyms/gendwarfksyms.h |   7 ++
>  scripts/gendwarfksyms/symbols.c       | 161 +++++++++++++++++++++++++-
>  3 files changed, 165 insertions(+), 5 deletions(-)
> 
> diff --git a/scripts/gendwarfksyms/gendwarfksyms.c b/scripts/gendwarfksyms/gendwarfksyms.c
> index d209b237766b..e2f8ee5a4bf3 100644
> --- a/scripts/gendwarfksyms/gendwarfksyms.c
> +++ b/scripts/gendwarfksyms/gendwarfksyms.c
> @@ -118,6 +118,8 @@ int main(int argc, const char **argv)
>  			return -1;
>  		}
>  
> +		check(symbol_read_symtab(fd));
> +
>  		dwfl = dwfl_begin(&callbacks);
>  		if (!dwfl) {
>  			error("dwfl_begin failed for '%s': %s", object_files[n],
> diff --git a/scripts/gendwarfksyms/gendwarfksyms.h b/scripts/gendwarfksyms/gendwarfksyms.h
> index 03f3e408a839..cb9106dfddb9 100644
> --- a/scripts/gendwarfksyms/gendwarfksyms.h
> +++ b/scripts/gendwarfksyms/gendwarfksyms.h
> @@ -61,6 +61,10 @@ extern bool debug;
>  /*
>   * symbols.c
>   */
> +struct symbol_addr {
> +	uint32_t section;
> +	Elf64_Addr address;
> +};
>  
>  static inline u32 name_hash(const char *name)
>  {
> @@ -69,10 +73,13 @@ static inline u32 name_hash(const char *name)
>  
>  struct symbol {
>  	const char *name;
> +	struct symbol_addr addr;
> +	struct hlist_node addr_hash;
>  	struct hlist_node name_hash;
>  };
>  
>  extern int symbol_read_exports(FILE *file);
> +extern int symbol_read_symtab(int fd);
>  extern struct symbol *symbol_get(const char *name);
>  
>  /*
> diff --git a/scripts/gendwarfksyms/symbols.c b/scripts/gendwarfksyms/symbols.c
> index 673ad9cf9e77..f96acb941196 100644
> --- a/scripts/gendwarfksyms/symbols.c
> +++ b/scripts/gendwarfksyms/symbols.c
> @@ -6,11 +6,43 @@
>  #include "gendwarfksyms.h"
>  
>  #define SYMBOL_HASH_BITS 15
> +
> +/* struct symbol_addr -> struct symbol */
> +static DEFINE_HASHTABLE(symbol_addrs, SYMBOL_HASH_BITS);
> +/* name -> struct symbol */
>  static DEFINE_HASHTABLE(symbol_names, SYMBOL_HASH_BITS);
>  
> +static inline u32 symbol_addr_hash(const struct symbol_addr *addr)
> +{
> +	return jhash(addr, sizeof(struct symbol_addr), 0);

I would be careful and avoid including the padding between
symbol_addr.section and symbol_addr.address in the hash calculation.

> +}
> +
>  typedef int (*symbol_callback_t)(struct symbol *, void *arg);
>  
> -static int for_each(const char *name, symbol_callback_t func, void *data)
> +static int __for_each_addr(struct symbol *sym, symbol_callback_t func,
> +			   void *data)
> +{
> +	struct hlist_node *tmp;
> +	struct symbol *match = NULL;
> +	int processed = 0;
> +
> +	hash_for_each_possible_safe(symbol_addrs, match, tmp, addr_hash,
> +				    symbol_addr_hash(&sym->addr)) {
> +		if (match == sym)
> +			continue; /* Already processed */
> +
> +		if (match->addr.section == sym->addr.section &&
> +		    match->addr.address == sym->addr.address) {
> +			check(func(match, data));
> +			++processed;
> +		}
> +	}
> +
> +	return processed;
> +}
> +
> +static int for_each(const char *name, bool name_only, symbol_callback_t func,
> +		    void *data)
>  {
>  	struct hlist_node *tmp;
>  	struct symbol *match;
> @@ -23,9 +55,13 @@ static int for_each(const char *name, symbol_callback_t func, void *data)
>  		if (strcmp(match->name, name))
>  			continue;
>  
> +		/* Call func for the match, and all address matches */
>  		if (func)
>  			check(func(match, data));
>  
> +		if (!name_only && match->addr.section != SHN_UNDEF)
> +			return checkp(__for_each_addr(match, func, data)) + 1;
> +
>  		return 1;
>  	}
>  
> @@ -34,7 +70,7 @@ static int for_each(const char *name, symbol_callback_t func, void *data)
>  
>  static bool is_exported(const char *name)
>  {
> -	return checkp(for_each(name, NULL, NULL)) > 0;
> +	return checkp(for_each(name, true, NULL, NULL)) > 0;
>  }
>  
>  int symbol_read_exports(FILE *file)
> @@ -57,13 +93,14 @@ int symbol_read_exports(FILE *file)
>  		if (is_exported(name))
>  			continue; /* Ignore duplicates */
>  
> -		sym = malloc(sizeof(struct symbol));
> +		sym = calloc(1, sizeof(struct symbol));
>  		if (!sym) {
> -			error("malloc failed");
> +			error("calloc failed");
>  			return -1;
>  		}
>  
>  		sym->name = name;
> +		sym->addr.section = SHN_UNDEF;
>  		name = NULL;
>  
>  		hash_add(symbol_names, &sym->name_hash, name_hash(sym->name));
> @@ -91,6 +128,120 @@ struct symbol *symbol_get(const char *name)
>  {
>  	struct symbol *sym = NULL;
>  
> -	for_each(name, get_symbol, &sym);
> +	for_each(name, false, get_symbol, &sym);
>  	return sym;
>  }
> +
> +typedef int (*elf_symbol_callback_t)(const char *name, GElf_Sym *sym,
> +				     Elf32_Word xndx, void *arg);
> +
> +static int elf_for_each_symbol(int fd, elf_symbol_callback_t func, void *arg)
> +{
> +	size_t sym_size;
> +	GElf_Shdr shdr_mem;
> +	GElf_Shdr *shdr;
> +	Elf_Data *xndx_data = NULL;
> +	Elf_Scn *scn;
> +	Elf *elf;
> +
> +	if (elf_version(EV_CURRENT) != EV_CURRENT) {
> +		error("elf_version failed: %s", elf_errmsg(-1));
> +		return -1;
> +	}
> +
> +	elf = elf_begin(fd, ELF_C_READ_MMAP, NULL);
> +	if (!elf) {
> +		error("elf_begin failed: %s", elf_errmsg(-1));
> +		return -1;
> +	}
> +
> +	sym_size = gelf_getclass(elf) == ELFCLASS32 ? sizeof(Elf32_Sym) :
> +						      sizeof(Elf64_Sym);
> +
> +	scn = elf_nextscn(elf, NULL);
> +
> +	while (scn) {
> +		shdr = gelf_getshdr(scn, &shdr_mem);
> +
> +		if (shdr && shdr->sh_type == SHT_SYMTAB_SHNDX) {
> +			xndx_data = elf_getdata(scn, NULL);
> +			break;
> +		}
> +
> +		scn = elf_nextscn(elf, scn);
> +	}
> +
> +	scn = elf_nextscn(elf, NULL);
> +
> +	while (scn) {
> +		shdr = gelf_getshdr(scn, &shdr_mem);
> +
> +		if (shdr && shdr->sh_type == SHT_SYMTAB) {
> +			Elf_Data *data = elf_getdata(scn, NULL);
> +			unsigned int nsyms = data->d_size / sym_size;

I think strictly speaking this should be:
size_t nsyms = shdr->sh_size / shdr->sh_entsize;
.. and the code could check that shdr->sh_entsize is same as what
gelf_fsize(elf, ELF_T_SYM, 1, EV_CURRENT) returns.

> +			unsigned int n;
> +
> +			for (n = 0; n < nsyms; ++n) {

The first symbol in the symbol table is always undefined, the loop can
start from 1.

Alternatively, since elf_for_each_symbol() ends up in the entire series
being used only with process_symbol() which skips symbols with the local
binding, the function could be renamed to elf_for_each_global_symbol()
and start the loop from shdr->sh_info.

> +				const char *name = NULL;
> +				Elf32_Word xndx = 0;
> +				GElf_Sym sym_mem;
> +				GElf_Sym *sym;
> +
> +				sym = gelf_getsymshndx(data, xndx_data, n,
> +						       &sym_mem, &xndx);
> +
> +				if (sym->st_shndx != SHN_XINDEX)
> +					xndx = sym->st_shndx;
> +
> +				name = elf_strptr(elf, shdr->sh_link,
> +						  sym->st_name);
> +
> +				/* Skip empty symbol names */
> +				if (name && *name &&
> +				    checkp(func(name, sym, xndx, arg)) > 0)
> +					break;
> +			}
> +		}
> +
> +		scn = elf_nextscn(elf, scn);
> +	}
> +
> +	return check(elf_end(elf));
> +}
> +
> +static int set_symbol_addr(struct symbol *sym, void *arg)
> +{
> +	struct symbol_addr *addr = arg;
> +
> +	if (sym->addr.section == SHN_UNDEF) {
> +		sym->addr.section = addr->section;
> +		sym->addr.address = addr->address;
> +		hash_add(symbol_addrs, &sym->addr_hash,
> +			 symbol_addr_hash(&sym->addr));
> +
> +		debug("%s -> { %u, %lx }", sym->name, sym->addr.section,
> +		      sym->addr.address);
> +	} else {
> +		warn("multiple addresses for symbol %s?", sym->name);
> +	}
> +
> +	return 0;
> +}
> +
> +static int process_symbol(const char *name, GElf_Sym *sym, Elf32_Word xndx,
> +			  void *arg)
> +{
> +	struct symbol_addr addr = { .section = xndx, .address = sym->st_value };
> +
> +	/* Set addresses for exported symbols */
> +	if (GELF_ST_BIND(sym->st_info) != STB_LOCAL &&
> +	    addr.section != SHN_UNDEF)
> +		checkp(for_each(name, true, set_symbol_addr, &addr));
> +
> +	return 0;
> +}
> +
> +int symbol_read_symtab(int fd)
> +{
> +	return elf_for_each_symbol(fd, process_symbol, NULL);
> +}
Sami Tolvanen Aug. 27, 2024, 9:28 p.m. UTC | #2
Hi Petr,

On Tue, Aug 27, 2024 at 12:40 PM Petr Pavlu <petr.pavlu@suse.com> wrote:
>
> On 8/15/24 19:39, Sami Tolvanen wrote:
> > +static inline u32 symbol_addr_hash(const struct symbol_addr *addr)
> > +{
> > +     return jhash(addr, sizeof(struct symbol_addr), 0);
>
> I would be careful and avoid including the padding between
> symbol_addr.section and symbol_addr.address in the hash calculation.

Good catch. I'll fix this in the next version.

> > +static int elf_for_each_symbol(int fd, elf_symbol_callback_t func, void *arg)
> > +{
> > +     size_t sym_size;
> > +     GElf_Shdr shdr_mem;
> > +     GElf_Shdr *shdr;
> > +     Elf_Data *xndx_data = NULL;
> > +     Elf_Scn *scn;
> > +     Elf *elf;
> > +
> > +     if (elf_version(EV_CURRENT) != EV_CURRENT) {
> > +             error("elf_version failed: %s", elf_errmsg(-1));
> > +             return -1;
> > +     }
> > +
> > +     elf = elf_begin(fd, ELF_C_READ_MMAP, NULL);
> > +     if (!elf) {
> > +             error("elf_begin failed: %s", elf_errmsg(-1));
> > +             return -1;
> > +     }
> > +
> > +     sym_size = gelf_getclass(elf) == ELFCLASS32 ? sizeof(Elf32_Sym) :
> > +                                                   sizeof(Elf64_Sym);
> > +
> > +     scn = elf_nextscn(elf, NULL);
> > +
> > +     while (scn) {
> > +             shdr = gelf_getshdr(scn, &shdr_mem);
> > +
> > +             if (shdr && shdr->sh_type == SHT_SYMTAB_SHNDX) {
> > +                     xndx_data = elf_getdata(scn, NULL);
> > +                     break;
> > +             }
> > +
> > +             scn = elf_nextscn(elf, scn);
> > +     }
> > +
> > +     scn = elf_nextscn(elf, NULL);
> > +
> > +     while (scn) {
> > +             shdr = gelf_getshdr(scn, &shdr_mem);
> > +
> > +             if (shdr && shdr->sh_type == SHT_SYMTAB) {
> > +                     Elf_Data *data = elf_getdata(scn, NULL);
> > +                     unsigned int nsyms = data->d_size / sym_size;
>
> I think strictly speaking this should be:
> size_t nsyms = shdr->sh_size / shdr->sh_entsize;
> .. and the code could check that shdr->sh_entsize is same as what
> gelf_fsize(elf, ELF_T_SYM, 1, EV_CURRENT) returns.

Sure, I can change this. I'm not sure if there's a situation where the
current calculation wouldn't result in the exact same result though.

> > +                     unsigned int n;
> > +
> > +                     for (n = 0; n < nsyms; ++n) {
>
> The first symbol in the symbol table is always undefined, the loop can
> start from 1.

Ack.

> Alternatively, since elf_for_each_symbol() ends up in the entire series
> being used only with process_symbol() which skips symbols with the local
> binding, the function could be renamed to elf_for_each_global_symbol()
> and start the loop from shdr->sh_info.

Patch 15 ("Add support for declaration-only data structures") actually
also needs to process local symbols, so we can't skip them completely.

Sami
Masahiro Yamada Aug. 28, 2024, 6:22 p.m. UTC | #3
On Fri, Aug 16, 2024 at 2:39 AM Sami Tolvanen <samitolvanen@google.com> wrote:
>
> The compiler may choose not to emit type information in DWARF for all
> aliases, but it's possible for each alias to be exported separately.
> To ensure we find type information for the aliases as well, read
> {section, address} tuples from the symbol table and match symbols also
> by address.
>
> Signed-off-by: Sami Tolvanen <samitolvanen@google.com>
> ---
>  scripts/gendwarfksyms/gendwarfksyms.c |   2 +
>  scripts/gendwarfksyms/gendwarfksyms.h |   7 ++
>  scripts/gendwarfksyms/symbols.c       | 161 +++++++++++++++++++++++++-
>  3 files changed, 165 insertions(+), 5 deletions(-)
>
> diff --git a/scripts/gendwarfksyms/gendwarfksyms.c b/scripts/gendwarfksyms/gendwarfksyms.c
> index d209b237766b..e2f8ee5a4bf3 100644
> --- a/scripts/gendwarfksyms/gendwarfksyms.c
> +++ b/scripts/gendwarfksyms/gendwarfksyms.c
> @@ -118,6 +118,8 @@ int main(int argc, const char **argv)
>                         return -1;
>                 }
>
> +               check(symbol_read_symtab(fd));
> +
>                 dwfl = dwfl_begin(&callbacks);
>                 if (!dwfl) {
>                         error("dwfl_begin failed for '%s': %s", object_files[n],
> diff --git a/scripts/gendwarfksyms/gendwarfksyms.h b/scripts/gendwarfksyms/gendwarfksyms.h
> index 03f3e408a839..cb9106dfddb9 100644
> --- a/scripts/gendwarfksyms/gendwarfksyms.h
> +++ b/scripts/gendwarfksyms/gendwarfksyms.h
> @@ -61,6 +61,10 @@ extern bool debug;
>  /*
>   * symbols.c
>   */
> +struct symbol_addr {
> +       uint32_t section;
> +       Elf64_Addr address;
> +};
>
>  static inline u32 name_hash(const char *name)
>  {
> @@ -69,10 +73,13 @@ static inline u32 name_hash(const char *name)
>
>  struct symbol {
>         const char *name;
> +       struct symbol_addr addr;
> +       struct hlist_node addr_hash;
>         struct hlist_node name_hash;
>  };
>
>  extern int symbol_read_exports(FILE *file);
> +extern int symbol_read_symtab(int fd);
>  extern struct symbol *symbol_get(const char *name);
>
>  /*
> diff --git a/scripts/gendwarfksyms/symbols.c b/scripts/gendwarfksyms/symbols.c
> index 673ad9cf9e77..f96acb941196 100644
> --- a/scripts/gendwarfksyms/symbols.c
> +++ b/scripts/gendwarfksyms/symbols.c
> @@ -6,11 +6,43 @@
>  #include "gendwarfksyms.h"
>
>  #define SYMBOL_HASH_BITS 15
> +
> +/* struct symbol_addr -> struct symbol */
> +static DEFINE_HASHTABLE(symbol_addrs, SYMBOL_HASH_BITS);
> +/* name -> struct symbol */
>  static DEFINE_HASHTABLE(symbol_names, SYMBOL_HASH_BITS);
>
> +static inline u32 symbol_addr_hash(const struct symbol_addr *addr)
> +{
> +       return jhash(addr, sizeof(struct symbol_addr), 0);
> +}
> +
>  typedef int (*symbol_callback_t)(struct symbol *, void *arg);
>
> -static int for_each(const char *name, symbol_callback_t func, void *data)
> +static int __for_each_addr(struct symbol *sym, symbol_callback_t func,
> +                          void *data)
> +{
> +       struct hlist_node *tmp;
> +       struct symbol *match = NULL;
> +       int processed = 0;
> +
> +       hash_for_each_possible_safe(symbol_addrs, match, tmp, addr_hash,
> +                                   symbol_addr_hash(&sym->addr)) {
> +               if (match == sym)
> +                       continue; /* Already processed */
> +
> +               if (match->addr.section == sym->addr.section &&
> +                   match->addr.address == sym->addr.address) {
> +                       check(func(match, data));
> +                       ++processed;
> +               }
> +       }
> +
> +       return processed;
> +}
> +
> +static int for_each(const char *name, bool name_only, symbol_callback_t func,
> +                   void *data)
>  {
>         struct hlist_node *tmp;
>         struct symbol *match;
> @@ -23,9 +55,13 @@ static int for_each(const char *name, symbol_callback_t func, void *data)
>                 if (strcmp(match->name, name))
>                         continue;
>
> +               /* Call func for the match, and all address matches */
>                 if (func)
>                         check(func(match, data));
>
> +               if (!name_only && match->addr.section != SHN_UNDEF)
> +                       return checkp(__for_each_addr(match, func, data)) + 1;
> +
>                 return 1;
>         }
>
> @@ -34,7 +70,7 @@ static int for_each(const char *name, symbol_callback_t func, void *data)
>
>  static bool is_exported(const char *name)
>  {
> -       return checkp(for_each(name, NULL, NULL)) > 0;
> +       return checkp(for_each(name, true, NULL, NULL)) > 0;
>  }
>
>  int symbol_read_exports(FILE *file)
> @@ -57,13 +93,14 @@ int symbol_read_exports(FILE *file)
>                 if (is_exported(name))
>                         continue; /* Ignore duplicates */
>
> -               sym = malloc(sizeof(struct symbol));
> +               sym = calloc(1, sizeof(struct symbol));



I am tired of noise changes when reviewing this patch set.


2/19 added malloc(), which is immediately replaced with calloc() by 3/19.










--
Best Regards

Masahiro Yamada
Sami Tolvanen Aug. 28, 2024, 9:56 p.m. UTC | #4
On Thu, Aug 29, 2024 at 03:22:25AM +0900, Masahiro Yamada wrote:
> On Fri, Aug 16, 2024 at 2:39 AM Sami Tolvanen <samitolvanen@google.com> wrote:
> >  int symbol_read_exports(FILE *file)
> > @@ -57,13 +93,14 @@ int symbol_read_exports(FILE *file)
> >                 if (is_exported(name))
> >                         continue; /* Ignore duplicates */
> >
> > -               sym = malloc(sizeof(struct symbol));
> > +               sym = calloc(1, sizeof(struct symbol));
> 
> 
> 
> I am tired of noise changes when reviewing this patch set.
> 
> 
> 2/19 added malloc(), which is immediately replaced with calloc() by 3/19.

This was changed to calloc because the structure now has a new field
that should be zero-initialized, but I do agree, this could have
just been a calloc from the beginning. I'll change this in the next
version.

I did try to make sure there wouldn't be too much churn in the series,
but clearly I've missed a few places. Hopefully there's nothing
equally egregious in the later patches!

Sami
Masahiro Yamada Sept. 1, 2024, 11:10 a.m. UTC | #5
On Fri, Aug 16, 2024 at 2:39 AM Sami Tolvanen <samitolvanen@google.com> wrote:
>
> The compiler may choose not to emit type information in DWARF for all
> aliases, but it's possible for each alias to be exported separately.
> To ensure we find type information for the aliases as well, read
> {section, address} tuples from the symbol table and match symbols also
> by address.
>
> Signed-off-by: Sami Tolvanen <samitolvanen@google.com>
> ---

> +
> +static int set_symbol_addr(struct symbol *sym, void *arg)
> +{
> +       struct symbol_addr *addr = arg;
> +
> +       if (sym->addr.section == SHN_UNDEF) {
> +               sym->addr.section = addr->section;
> +               sym->addr.address = addr->address;


These two lines can be replaced with

                  sym->addr = *addr;





--
Best Regards
Masahiro Yamada
Sami Tolvanen Sept. 4, 2024, 8:48 p.m. UTC | #6
Hi,

On Sun, Sep 1, 2024 at 11:11 AM Masahiro Yamada <masahiroy@kernel.org> wrote:
>
> On Fri, Aug 16, 2024 at 2:39 AM Sami Tolvanen <samitolvanen@google.com> wrote:
> >
> > +static int set_symbol_addr(struct symbol *sym, void *arg)
> > +{
> > +       struct symbol_addr *addr = arg;
> > +
> > +       if (sym->addr.section == SHN_UNDEF) {
> > +               sym->addr.section = addr->section;
> > +               sym->addr.address = addr->address;
>
>
> These two lines can be replaced with
>
>                   sym->addr = *addr;

Good point, I'll fix this in the next version.

Sami
diff mbox series

Patch

diff --git a/scripts/gendwarfksyms/gendwarfksyms.c b/scripts/gendwarfksyms/gendwarfksyms.c
index d209b237766b..e2f8ee5a4bf3 100644
--- a/scripts/gendwarfksyms/gendwarfksyms.c
+++ b/scripts/gendwarfksyms/gendwarfksyms.c
@@ -118,6 +118,8 @@  int main(int argc, const char **argv)
 			return -1;
 		}
 
+		check(symbol_read_symtab(fd));
+
 		dwfl = dwfl_begin(&callbacks);
 		if (!dwfl) {
 			error("dwfl_begin failed for '%s': %s", object_files[n],
diff --git a/scripts/gendwarfksyms/gendwarfksyms.h b/scripts/gendwarfksyms/gendwarfksyms.h
index 03f3e408a839..cb9106dfddb9 100644
--- a/scripts/gendwarfksyms/gendwarfksyms.h
+++ b/scripts/gendwarfksyms/gendwarfksyms.h
@@ -61,6 +61,10 @@  extern bool debug;
 /*
  * symbols.c
  */
+struct symbol_addr {
+	uint32_t section;
+	Elf64_Addr address;
+};
 
 static inline u32 name_hash(const char *name)
 {
@@ -69,10 +73,13 @@  static inline u32 name_hash(const char *name)
 
 struct symbol {
 	const char *name;
+	struct symbol_addr addr;
+	struct hlist_node addr_hash;
 	struct hlist_node name_hash;
 };
 
 extern int symbol_read_exports(FILE *file);
+extern int symbol_read_symtab(int fd);
 extern struct symbol *symbol_get(const char *name);
 
 /*
diff --git a/scripts/gendwarfksyms/symbols.c b/scripts/gendwarfksyms/symbols.c
index 673ad9cf9e77..f96acb941196 100644
--- a/scripts/gendwarfksyms/symbols.c
+++ b/scripts/gendwarfksyms/symbols.c
@@ -6,11 +6,43 @@ 
 #include "gendwarfksyms.h"
 
 #define SYMBOL_HASH_BITS 15
+
+/* struct symbol_addr -> struct symbol */
+static DEFINE_HASHTABLE(symbol_addrs, SYMBOL_HASH_BITS);
+/* name -> struct symbol */
 static DEFINE_HASHTABLE(symbol_names, SYMBOL_HASH_BITS);
 
+static inline u32 symbol_addr_hash(const struct symbol_addr *addr)
+{
+	return jhash(addr, sizeof(struct symbol_addr), 0);
+}
+
 typedef int (*symbol_callback_t)(struct symbol *, void *arg);
 
-static int for_each(const char *name, symbol_callback_t func, void *data)
+static int __for_each_addr(struct symbol *sym, symbol_callback_t func,
+			   void *data)
+{
+	struct hlist_node *tmp;
+	struct symbol *match = NULL;
+	int processed = 0;
+
+	hash_for_each_possible_safe(symbol_addrs, match, tmp, addr_hash,
+				    symbol_addr_hash(&sym->addr)) {
+		if (match == sym)
+			continue; /* Already processed */
+
+		if (match->addr.section == sym->addr.section &&
+		    match->addr.address == sym->addr.address) {
+			check(func(match, data));
+			++processed;
+		}
+	}
+
+	return processed;
+}
+
+static int for_each(const char *name, bool name_only, symbol_callback_t func,
+		    void *data)
 {
 	struct hlist_node *tmp;
 	struct symbol *match;
@@ -23,9 +55,13 @@  static int for_each(const char *name, symbol_callback_t func, void *data)
 		if (strcmp(match->name, name))
 			continue;
 
+		/* Call func for the match, and all address matches */
 		if (func)
 			check(func(match, data));
 
+		if (!name_only && match->addr.section != SHN_UNDEF)
+			return checkp(__for_each_addr(match, func, data)) + 1;
+
 		return 1;
 	}
 
@@ -34,7 +70,7 @@  static int for_each(const char *name, symbol_callback_t func, void *data)
 
 static bool is_exported(const char *name)
 {
-	return checkp(for_each(name, NULL, NULL)) > 0;
+	return checkp(for_each(name, true, NULL, NULL)) > 0;
 }
 
 int symbol_read_exports(FILE *file)
@@ -57,13 +93,14 @@  int symbol_read_exports(FILE *file)
 		if (is_exported(name))
 			continue; /* Ignore duplicates */
 
-		sym = malloc(sizeof(struct symbol));
+		sym = calloc(1, sizeof(struct symbol));
 		if (!sym) {
-			error("malloc failed");
+			error("calloc failed");
 			return -1;
 		}
 
 		sym->name = name;
+		sym->addr.section = SHN_UNDEF;
 		name = NULL;
 
 		hash_add(symbol_names, &sym->name_hash, name_hash(sym->name));
@@ -91,6 +128,120 @@  struct symbol *symbol_get(const char *name)
 {
 	struct symbol *sym = NULL;
 
-	for_each(name, get_symbol, &sym);
+	for_each(name, false, get_symbol, &sym);
 	return sym;
 }
+
+typedef int (*elf_symbol_callback_t)(const char *name, GElf_Sym *sym,
+				     Elf32_Word xndx, void *arg);
+
+static int elf_for_each_symbol(int fd, elf_symbol_callback_t func, void *arg)
+{
+	size_t sym_size;
+	GElf_Shdr shdr_mem;
+	GElf_Shdr *shdr;
+	Elf_Data *xndx_data = NULL;
+	Elf_Scn *scn;
+	Elf *elf;
+
+	if (elf_version(EV_CURRENT) != EV_CURRENT) {
+		error("elf_version failed: %s", elf_errmsg(-1));
+		return -1;
+	}
+
+	elf = elf_begin(fd, ELF_C_READ_MMAP, NULL);
+	if (!elf) {
+		error("elf_begin failed: %s", elf_errmsg(-1));
+		return -1;
+	}
+
+	sym_size = gelf_getclass(elf) == ELFCLASS32 ? sizeof(Elf32_Sym) :
+						      sizeof(Elf64_Sym);
+
+	scn = elf_nextscn(elf, NULL);
+
+	while (scn) {
+		shdr = gelf_getshdr(scn, &shdr_mem);
+
+		if (shdr && shdr->sh_type == SHT_SYMTAB_SHNDX) {
+			xndx_data = elf_getdata(scn, NULL);
+			break;
+		}
+
+		scn = elf_nextscn(elf, scn);
+	}
+
+	scn = elf_nextscn(elf, NULL);
+
+	while (scn) {
+		shdr = gelf_getshdr(scn, &shdr_mem);
+
+		if (shdr && shdr->sh_type == SHT_SYMTAB) {
+			Elf_Data *data = elf_getdata(scn, NULL);
+			unsigned int nsyms = data->d_size / sym_size;
+			unsigned int n;
+
+			for (n = 0; n < nsyms; ++n) {
+				const char *name = NULL;
+				Elf32_Word xndx = 0;
+				GElf_Sym sym_mem;
+				GElf_Sym *sym;
+
+				sym = gelf_getsymshndx(data, xndx_data, n,
+						       &sym_mem, &xndx);
+
+				if (sym->st_shndx != SHN_XINDEX)
+					xndx = sym->st_shndx;
+
+				name = elf_strptr(elf, shdr->sh_link,
+						  sym->st_name);
+
+				/* Skip empty symbol names */
+				if (name && *name &&
+				    checkp(func(name, sym, xndx, arg)) > 0)
+					break;
+			}
+		}
+
+		scn = elf_nextscn(elf, scn);
+	}
+
+	return check(elf_end(elf));
+}
+
+static int set_symbol_addr(struct symbol *sym, void *arg)
+{
+	struct symbol_addr *addr = arg;
+
+	if (sym->addr.section == SHN_UNDEF) {
+		sym->addr.section = addr->section;
+		sym->addr.address = addr->address;
+		hash_add(symbol_addrs, &sym->addr_hash,
+			 symbol_addr_hash(&sym->addr));
+
+		debug("%s -> { %u, %lx }", sym->name, sym->addr.section,
+		      sym->addr.address);
+	} else {
+		warn("multiple addresses for symbol %s?", sym->name);
+	}
+
+	return 0;
+}
+
+static int process_symbol(const char *name, GElf_Sym *sym, Elf32_Word xndx,
+			  void *arg)
+{
+	struct symbol_addr addr = { .section = xndx, .address = sym->st_value };
+
+	/* Set addresses for exported symbols */
+	if (GELF_ST_BIND(sym->st_info) != STB_LOCAL &&
+	    addr.section != SHN_UNDEF)
+		checkp(for_each(name, true, set_symbol_addr, &addr));
+
+	return 0;
+}
+
+int symbol_read_symtab(int fd)
+{
+	return elf_for_each_symbol(fd, process_symbol, NULL);
+}