diff mbox series

[v2,13/19] gendwarfksyms: Add symtypes output

Message ID 20240815173903.4172139-34-samitolvanen@google.com (mailing list archive)
State Handled Elsewhere
Headers show
Series Implement DWARF modversions | expand

Commit Message

Sami Tolvanen Aug. 15, 2024, 5:39 p.m. UTC
Add support for producing genksyms-style symtypes files. Process
die_map to find the longest expansions for each type, and use symtypes
references in type definitions. The basic file format is similar to
genksyms, with two notable exceptions:

  1. Type names with spaces (common with Rust) in references are
     wrapped in single quotes. E.g.:

     s#'core::result::Result<u8, core::num::error::ParseIntError>'

  2. The actual type definition is the simple parsed DWARF format we
     output with --dump-dies, not the preprocessed C-style format
     genksyms produces.

Signed-off-by: Sami Tolvanen <samitolvanen@google.com>
---
 scripts/gendwarfksyms/Makefile        |   1 +
 scripts/gendwarfksyms/die.c           |  13 +
 scripts/gendwarfksyms/dwarf.c         |  14 +-
 scripts/gendwarfksyms/gendwarfksyms.c |  28 +-
 scripts/gendwarfksyms/gendwarfksyms.h |  21 +-
 scripts/gendwarfksyms/symbols.c       |  11 +-
 scripts/gendwarfksyms/types.c         | 439 ++++++++++++++++++++++++++
 7 files changed, 517 insertions(+), 10 deletions(-)
 create mode 100644 scripts/gendwarfksyms/types.c

Comments

Petr Pavlu Sept. 10, 2024, 2:58 p.m. UTC | #1
On 8/15/24 19:39, Sami Tolvanen wrote:
> Add support for producing genksyms-style symtypes files. Process
> die_map to find the longest expansions for each type, and use symtypes
> references in type definitions. The basic file format is similar to
> genksyms, with two notable exceptions:
> 
>   1. Type names with spaces (common with Rust) in references are
>      wrapped in single quotes. E.g.:
> 
>      s#'core::result::Result<u8, core::num::error::ParseIntError>'
> 
>   2. The actual type definition is the simple parsed DWARF format we
>      output with --dump-dies, not the preprocessed C-style format
>      genksyms produces.

Thank you for adding this output to the tool.

> 
> Signed-off-by: Sami Tolvanen <samitolvanen@google.com>
> ---
>  scripts/gendwarfksyms/Makefile        |   1 +
>  scripts/gendwarfksyms/die.c           |  13 +
>  scripts/gendwarfksyms/dwarf.c         |  14 +-
>  scripts/gendwarfksyms/gendwarfksyms.c |  28 +-
>  scripts/gendwarfksyms/gendwarfksyms.h |  21 +-
>  scripts/gendwarfksyms/symbols.c       |  11 +-
>  scripts/gendwarfksyms/types.c         | 439 ++++++++++++++++++++++++++
>  7 files changed, 517 insertions(+), 10 deletions(-)
>  create mode 100644 scripts/gendwarfksyms/types.c
> 
> diff --git a/scripts/gendwarfksyms/Makefile b/scripts/gendwarfksyms/Makefile
> index 681b42441840..4866a2fd0e46 100644
> --- a/scripts/gendwarfksyms/Makefile
> +++ b/scripts/gendwarfksyms/Makefile
> @@ -5,6 +5,7 @@ gendwarfksyms-objs += cache.o
>  gendwarfksyms-objs += die.o
>  gendwarfksyms-objs += dwarf.o
>  gendwarfksyms-objs += symbols.o
> +gendwarfksyms-objs += types.o
>  
>  HOST_EXTRACFLAGS := -I $(srctree)/tools/include
>  HOSTLDLIBS_gendwarfksyms := -ldw -lelf
> diff --git a/scripts/gendwarfksyms/die.c b/scripts/gendwarfksyms/die.c
> index fdd52df88fdd..e40f04b70f7f 100644
> --- a/scripts/gendwarfksyms/die.c
> +++ b/scripts/gendwarfksyms/die.c
> @@ -85,6 +85,19 @@ static void reset_die(struct die *cd)
>  	cd->list = NULL;
>  }
>  
> +int die_map_for_each(die_map_callback_t func, void *arg)
> +{
> +	struct die *cd;
> +	struct hlist_node *tmp;
> +	int i;
> +
> +	hash_for_each_safe(die_map, i, tmp, cd, hash) {
> +		check(func(cd, arg));
> +	}
> +
> +	return 0;
> +}
> +
>  void die_map_free(void)
>  {
>  	struct hlist_node *tmp;
> diff --git a/scripts/gendwarfksyms/dwarf.c b/scripts/gendwarfksyms/dwarf.c
> index 9bca21a71639..62241cc97a76 100644
> --- a/scripts/gendwarfksyms/dwarf.c
> +++ b/scripts/gendwarfksyms/dwarf.c
> @@ -60,11 +60,11 @@ static bool is_export_symbol(struct state *state, Dwarf_Die *die)
>  	if (get_ref_die_attr(die, DW_AT_abstract_origin, &origin))
>  		source = &origin;
>  
> -	state->sym = symbol_get(get_name(die));
> +	state->sym = symbol_get_unprocessed(get_name(die));
>  
>  	/* Look up using the origin name if there are no matches. */
>  	if (!state->sym && source != die)
> -		state->sym = symbol_get(get_name(source));
> +		state->sym = symbol_get_unprocessed(get_name(source));
>  
>  	state->die = *source;
>  	return !!state->sym;
> @@ -384,6 +384,7 @@ static int process_subroutine_type(struct state *state, struct die *cache,
>  	return check(__process_subroutine_type(state, cache, die,
>  					       "subroutine_type"));
>  }
> +
>  static int process_variant_type(struct state *state, struct die *cache,
>  				Dwarf_Die *die)
>  {
> @@ -695,14 +696,16 @@ static int process_type(struct state *state, struct die *parent, Dwarf_Die *die)
>  static int process_subprogram(struct state *state, Dwarf_Die *die)
>  {
>  	check(__process_subroutine_type(state, NULL, die, "subprogram"));
> -	return check(process(state, NULL, ";\n"));
> +	state->sym->state = MAPPED;
> +	return 0;
>  }
>  
>  static int process_variable(struct state *state, Dwarf_Die *die)
>  {
>  	check(process(state, NULL, "variable "));
>  	check(process_type_attr(state, NULL, die));
> -	return check(process(state, NULL, ";\n"));
> +	state->sym->state = MAPPED;
> +	return 0;
>  }
>  
>  static int process_symbol_ptr(struct state *state, Dwarf_Die *die)
> @@ -757,6 +760,9 @@ static int process_exported_symbols(struct state *state, struct die *cache,
>  		else
>  			check(process_variable(state, &state->die));
>  
> +		if (dump_dies)
> +			fputs("\n", stderr);
> +
>  		cache_clear_expanded(&state->expansion_cache);
>  		return 0;
>  	default:
> diff --git a/scripts/gendwarfksyms/gendwarfksyms.c b/scripts/gendwarfksyms/gendwarfksyms.c
> index 1349e592783b..6a219a54c342 100644
> --- a/scripts/gendwarfksyms/gendwarfksyms.c
> +++ b/scripts/gendwarfksyms/gendwarfksyms.c
> @@ -20,6 +20,11 @@ bool debug;
>  bool dump_dies;
>  /* Print out inline debugging information about die_map changes */
>  bool dump_die_map;
> +/* Print out type_map contents */
> +bool dump_types;
> +/* Produce a symtypes file */
> +bool symtypes;
> +static const char *symtypes_file;
>  
>  static const struct {
>  	const char *arg;
> @@ -29,6 +34,8 @@ static const struct {
>  	{ "--debug", &debug, NULL },
>  	{ "--dump-dies", &dump_dies, NULL },
>  	{ "--dump-die-map", &dump_die_map, NULL },
> +	{ "--dump-types", &dump_types, NULL },
> +	{ "--symtypes", &symtypes, &symtypes_file },
>  };
>  
>  static int usage(void)
> @@ -79,6 +86,7 @@ static int process_modules(Dwfl_Module *mod, void **userdata, const char *name,
>  	Dwarf_Die cudie;
>  	Dwarf_CU *cu = NULL;
>  	Dwarf *dbg;
> +	FILE *symfile = arg;
>  	int res;
>  
>  	debug("%s", name);
> @@ -100,6 +108,10 @@ static int process_modules(Dwfl_Module *mod, void **userdata, const char *name,
>  		check(process_module(mod, dbg, &cudie));
>  	} while (cu);
>  
> +	/*
> +	 * Use die_map to expand type strings and write them to `symfile`.
> +	 */
> +	check(generate_symtypes(symfile));
>  	die_map_free();
>  
>  	return DWARF_CB_OK;
> @@ -112,6 +124,7 @@ static const Dwfl_Callbacks callbacks = {
>  
>  int main(int argc, const char **argv)
>  {
> +	FILE *symfile = NULL;
>  	unsigned int n;
>  
>  	if (parse_options(argc, argv) < 0)
> @@ -122,6 +135,16 @@ int main(int argc, const char **argv)
>  
>  	check(symbol_read_exports(stdin));
>  
> +	if (symtypes_file) {
> +		symfile = fopen(symtypes_file, "w+");

The file is sufficient to open only for writing.

> +
> +		if (!symfile) {
> +			error("fopen failed for '%s': %s", symtypes_file,
> +			      strerror(errno));
> +			return -1;
> +		}
> +	}
> +
>  	for (n = 0; n < object_count; n++) {
>  		Dwfl *dwfl;
>  		int fd;
> @@ -151,7 +174,7 @@ int main(int argc, const char **argv)
>  
>  		dwfl_report_end(dwfl, NULL, NULL);
>  
> -		if (dwfl_getmodules(dwfl, &process_modules, NULL, 0)) {
> +		if (dwfl_getmodules(dwfl, &process_modules, symfile, 0)) {
>  			error("dwfl_getmodules failed for '%s'",
>  			      object_files[n]);
>  			return -1;
> @@ -161,5 +184,8 @@ int main(int argc, const char **argv)
>  		close(fd);
>  	}
>  
> +	if (symfile)
> +		fclose(symfile);
> +
>  	return 0;
>  }

The fclose() call should be wrapped in check() to catch a situation when
flushing the stream potentially failed.

> diff --git a/scripts/gendwarfksyms/gendwarfksyms.h b/scripts/gendwarfksyms/gendwarfksyms.h
> index 7cd907e3d5e3..6edbd6478e0f 100644
> --- a/scripts/gendwarfksyms/gendwarfksyms.h
> +++ b/scripts/gendwarfksyms/gendwarfksyms.h
> @@ -22,6 +22,8 @@
>  extern bool debug;
>  extern bool dump_dies;
>  extern bool dump_die_map;
> +extern bool dump_types;
> +extern bool symtypes;
>  
>  #define MAX_INPUT_FILES 128
>  
> @@ -89,6 +91,12 @@ extern bool dump_die_map;
>  #define SYMBOL_PTR_PREFIX "__gendwarfksyms_ptr_"
>  #define SYMBOL_PTR_PREFIX_LEN (sizeof(SYMBOL_PTR_PREFIX) - 1)
>  
> +/* See dwarf.c:is_declaration */
> +#define SYMBOL_DECLONLY_PREFIX "__gendwarfksyms_declonly_"
> +#define SYMBOL_DECLONLY_PREFIX_LEN (sizeof(SYMBOL_DECLONLY_PREFIX) - 1)

Nit: These defines should go into the patch 15/19 "gendwarfksyms: Add
support for declaration-only data structures".

> +
> +enum symbol_state { UNPROCESSED, MAPPED };
> +
>  struct symbol_addr {
>  	uint32_t section;
>  	Elf64_Addr address;
> @@ -109,12 +117,14 @@ struct symbol {
>  	struct symbol_addr addr;
>  	struct hlist_node addr_hash;
>  	struct hlist_node name_hash;
> +	enum symbol_state state;
> +	uintptr_t die_addr;
>  };
>  
>  extern bool is_symbol_ptr(const char *name);
>  extern int symbol_read_exports(FILE *file);
>  extern int symbol_read_symtab(int fd);
> -extern struct symbol *symbol_get(const char *name);
> +extern struct symbol *symbol_get_unprocessed(const char *name);
>  
>  /*
>   * die.c
> @@ -157,12 +167,15 @@ struct die {
>  	struct hlist_node hash;
>  };
>  
> +typedef int (*die_map_callback_t)(struct die *, void *arg);
> +
>  extern int __die_map_get(uintptr_t addr, enum die_state state,
>  			 struct die **res);
>  extern int die_map_get(Dwarf_Die *die, enum die_state state, struct die **res);
>  extern int die_map_add_string(struct die *pd, const char *str);
>  extern int die_map_add_linebreak(struct die *pd, int linebreak);
>  extern int die_map_add_die(struct die *pd, struct die *child);
> +extern int die_map_for_each(die_map_callback_t func, void *arg);
>  extern void die_map_free(void);
>  
>  /*
> @@ -222,4 +235,10 @@ extern int process_die_container(struct state *state, struct die *cache,
>  
>  extern int process_module(Dwfl_Module *mod, Dwarf *dbg, Dwarf_Die *cudie);
>  
> +/*
> + * types.c
> + */
> +
> +extern int generate_symtypes(FILE *file);
> +
>  #endif /* __GENDWARFKSYMS_H */
> diff --git a/scripts/gendwarfksyms/symbols.c b/scripts/gendwarfksyms/symbols.c
> index d6d016458ae1..8cc04e6295a7 100644
> --- a/scripts/gendwarfksyms/symbols.c
> +++ b/scripts/gendwarfksyms/symbols.c
> @@ -117,6 +117,7 @@ int symbol_read_exports(FILE *file)
>  
>  		sym->name = name;
>  		sym->addr.section = SHN_UNDEF;
> +		sym->state = UNPROCESSED;
>  		name = NULL;
>  
>  		hash_add(symbol_names, &sym->name_hash, name_hash(sym->name));
> @@ -132,19 +133,21 @@ int symbol_read_exports(FILE *file)
>  	return 0;
>  }
>  
> -static int get_symbol(struct symbol *sym, void *arg)
> +static int get_unprocessed(struct symbol *sym, void *arg)
>  {
>  	struct symbol **res = arg;
>  
> -	*res = sym;
> +	if (sym->state == UNPROCESSED)
> +		*res = sym;
> +
>  	return 0;
>  }
>  
> -struct symbol *symbol_get(const char *name)
> +struct symbol *symbol_get_unprocessed(const char *name)
>  {
>  	struct symbol *sym = NULL;
>  
> -	for_each(name, false, get_symbol, &sym);
> +	for_each(name, false, get_unprocessed, &sym);
>  	return sym;
>  }
>  
> diff --git a/scripts/gendwarfksyms/types.c b/scripts/gendwarfksyms/types.c
> new file mode 100644
> index 000000000000..7b9997d8322d
> --- /dev/null
> +++ b/scripts/gendwarfksyms/types.c
> @@ -0,0 +1,439 @@
> +// SPDX-License-Identifier: GPL-2.0-or-later
> +/*
> + * Copyright (C) 2024 Google LLC
> + */
> +
> +#include "gendwarfksyms.h"
> +#include "crc32.h"
> +
> +static struct expansion_cache expansion_cache;
> +
> +/*
> + * A simple linked list of shared or owned strings to avoid copying strings
> + * around when not necessary.
> + */
> +struct type_list {
> +	const char *str;
> +	void *owned;
> +	struct type_list *next;
> +};
> +
> +static struct type_list *type_list_alloc(void)
> +{
> +	struct type_list *list;
> +
> +	list = calloc(1, sizeof(struct type_list));
> +	if (!list)
> +		error("calloc failed");
> +
> +	return list;
> +}
> +
> +static void type_list_free(struct type_list *list)
> +{
> +	struct type_list *tmp;
> +
> +	while (list) {
> +		if (list->owned)
> +			free(list->owned);
> +
> +		tmp = list;
> +		list = list->next;
> +		free(tmp);
> +	}
> +}
> +
> +static int type_list_append(struct type_list *list, const char *s, void *owned)
> +{
> +	if (!list || !s)
> +		return 0;
> +
> +	while (list->next)
> +		list = list->next;
> +
> +	if (list->str) {
> +		list->next = type_list_alloc();
> +
> +		if (!list->next) {
> +			error("type_list_alloc failed");
> +			return -1;
> +		}
> +
> +		list = list->next;
> +	}
> +
> +	list->str = s;
> +	list->owned = owned;
> +
> +	return strlen(list->str);
> +}
> +
> +static int type_list_write(struct type_list *list, FILE *file)
> +{
> +	while (list) {
> +		if (list->str)
> +			checkp(fputs(list->str, file));
> +		list = list->next;
> +	}
> +
> +	return 0;
> +}
> +
> +/*
> + * An expanded type string in symtypes format.
> + */
> +struct type_expansion {
> +	char *name;
> +	struct type_list *expanded;
> +	struct type_list *last;
> +	size_t len;
> +	struct hlist_node hash;
> +};

I found the manipulation of type_expansion.expanded and
type_expansion.last somewhat strange.

The list starts already with one element in type_expansion_init(). This
is apparently to make the last pointer valid. This element is however
empty and gets only assigned on the first call to type_list_append().
Other elements are then added normally, always assigned.

Perhaps consider using a regular list implementation, similarly to what
was discussed under the patch 06/19 "gendwarfksyms: Add a cache for
processed DIEs".

> +
> +static int type_expansion_init(struct type_expansion *type, bool alloc)
> +{
> +	memset(type, 0, sizeof(struct type_expansion));
> +	if (alloc) {
> +		type->expanded = type_list_alloc();
> +		if (!type->expanded)
> +			return -1;
> +
> +		type->last = type->expanded;
> +	}
> +	return 0;
> +}
> +
> +static inline void type_expansion_free(struct type_expansion *type)
> +{
> +	free(type->name);
> +	type_list_free(type->expanded);
> +	type_expansion_init(type, false);
> +}
> +
> +static int type_expansion_append(struct type_expansion *type, const char *s,
> +				 void *owned)
> +{
> +	type->len += checkp(type_list_append(type->last, s, owned));
> +
> +	if (type->last->next)
> +		type->last = type->last->next;
> +
> +	return 0;
> +}
> +
> +/*
> + * type_map -- the longest expansions for each type.
> + *
> + * const char *name -> struct type_expansion *
> + */
> +#define TYPE_HASH_BITS 16
> +static DEFINE_HASHTABLE(type_map, TYPE_HASH_BITS);
> +
> +static int type_map_get(const char *name, struct type_expansion **res)
> +{
> +	struct type_expansion *e;
> +
> +	hash_for_each_possible(type_map, e, hash, name_hash(name)) {
> +		if (!strcmp(name, e->name)) {
> +			*res = e;
> +			return 0;
> +		}
> +	}
> +
> +	return -1;
> +}
> +
> +static int type_map_add(const char *name, struct type_expansion *type)
> +{
> +	struct type_expansion *e;
> +
> +	if (type_map_get(name, &e)) {
> +		e = malloc(sizeof(struct type_expansion));
> +		if (!e) {
> +			error("malloc failed");
> +			return -1;
> +		}
> +
> +		type_expansion_init(e, false);
> +
> +		e->name = strdup(name);
> +		if (!e->name) {
> +			error("strdup failed");
> +			return -1;
> +		}
> +
> +		hash_add(type_map, &e->hash, name_hash(e->name));
> +
> +		if (dump_types)
> +			debug("adding %s", e->name);
> +	} else {
> +		/* Use the longest available expansion */
> +		if (type->len <= e->len)
> +			return 0;
> +
> +		type_list_free(e->expanded);
> +
> +		if (dump_types)
> +			debug("replacing %s", e->name);
> +	}
> +
> +	/* Take ownership of type->expanded */
> +	e->expanded = type->expanded;
> +	e->last = type->last;
> +	e->len = type->len;
> +	type->expanded = NULL;
> +	type->last = NULL;
> +	type->len = 0;
> +
> +	if (dump_types) {
> +		fputs(e->name, stderr);
> +		fputs(" ", stderr);
> +		type_list_write(e->expanded, stderr);
> +		fputs("\n", stderr);
> +	}
> +
> +	return 0;
> +}
> +
> +static int type_map_write(FILE *file)
> +{
> +	struct type_expansion *e;
> +	struct hlist_node *tmp;
> +	int i;
> +
> +	if (!file)
> +		return 0;
> +
> +	hash_for_each_safe(type_map, i, tmp, e, hash) {
> +		checkp(fputs(e->name, file));
> +		checkp(fputs(" ", file));
> +		type_list_write(e->expanded, file);
> +		checkp(fputs("\n", file));
> +	}
> +
> +	return 0;
> +}
> +
> +static void type_map_free(void)
> +{
> +	struct type_expansion *e;
> +	struct hlist_node *tmp;
> +	int i;
> +
> +	hash_for_each_safe(type_map, i, tmp, e, hash) {
> +		type_expansion_free(e);
> +		free(e);
> +	}
> +
> +	hash_init(type_map);
> +}
> +
> +/*
> + * Type reference format: <prefix>#<name>, where prefix:
> + * 	s -> structure
> + * 	u -> union
> + * 	e -> enum
> + * 	t -> typedef
> + *
> + * Names with spaces are additionally wrapped in single quotes.
> + */
> +static inline bool is_type_prefix(const char *s)
> +{
> +	return (s[0] == 's' || s[0] == 'u' || s[0] == 'e' || s[0] == 't') &&
> +	       s[1] == '#';
> +}
> +
> +static char get_type_prefix(int tag)
> +{
> +	switch (tag) {
> +	case DW_TAG_class_type:
> +	case DW_TAG_structure_type:
> +		return 's';
> +	case DW_TAG_union_type:
> +		return 'u';
> +	case DW_TAG_enumeration_type:
> +		return 'e';
> +	case DW_TAG_typedef_type:
> +		return 't';
> +	default:
> +		return 0;
> +	}
> +}
> +
> +static char *get_type_name(struct die *cache)
> +{
> +	const char *format;
> +	char prefix;
> +	char *name;
> +	size_t len;
> +
> +	if (cache->state == INCOMPLETE) {
> +		warn("found incomplete cache entry: %p", cache);
> +		return NULL;
> +	}
> +	if (!cache->fqn)
> +		return NULL;
> +
> +	prefix = get_type_prefix(cache->tag);
> +	if (!prefix)
> +		return NULL;
> +
> +	/* <prefix>#<type_name>\0 */
> +	len = 2 + strlen(cache->fqn) + 1;
> +
> +	/* Wrap names with spaces in single quotes */
> +	if (strstr(cache->fqn, " ")) {
> +		format = "%c#'%s'";
> +		len += 2;
> +	} else {
> +		format = "%c#%s";
> +	}
> +
> +	name = malloc(len);
> +	if (!name) {
> +		error("malloc failed");
> +		return NULL;
> +	}
> +
> +	if (snprintf(name, len, format, prefix, cache->fqn) >= len) {
> +		error("snprintf failed for '%s' (length %zu)", cache->fqn,
> +		      len);
> +		free(name);
> +		return NULL;
> +	}

This could be quite simplified:

const char *quote = strstr(cache->fqn, " ") != NULL ? "'" : "";
if (asprintf(&name, "%c#%s%s%s", prefix, quote, cache->fqn, quote) < 0)
	[...]

> +
> +	return name;
> +}
> +
> +static int __type_expand(struct die *cache, struct type_expansion *type,
> +			 bool recursive);
> +
> +static int type_expand_child(struct die *cache, struct type_expansion *type,
> +			     bool recursive)
> +{
> +	struct type_expansion child;
> +	char *name;
> +
> +	name = get_type_name(cache);
> +	if (!name)
> +		return check(__type_expand(cache, type, recursive));
> +
> +	if (recursive && !__cache_was_expanded(&expansion_cache, cache->addr)) {
> +		check(__cache_mark_expanded(&expansion_cache, cache->addr));
> +		check(type_expansion_init(&child, true));
> +		check(__type_expand(cache, &child, true));
> +		check(type_map_add(name, &child));
> +		type_expansion_free(&child);
> +	}
> +
> +	check(type_expansion_append(type, name, name));
> +	return 0;
> +}
> +
> +static int __type_expand(struct die *cache, struct type_expansion *type,
> +			 bool recursive)
> +{
> +	struct die_fragment *df = cache->list;
> +	struct die *child;
> +
> +	while (df) {
> +		switch (df->type) {
> +		case STRING:
> +			check(type_expansion_append(type, df->data.str, NULL));
> +			break;
> +		case DIE:
> +			/* Use a complete die_map expansion if available */
> +			if (__die_map_get(df->data.addr, COMPLETE, &child) &&
> +			    __die_map_get(df->data.addr, UNEXPANDED, &child)) {
> +				error("unknown child: %" PRIxPTR,
> +				      df->data.addr);
> +				return -1;
> +			}
> +
> +			check(type_expand_child(child, type, recursive));
> +			break;
> +		case LINEBREAK:
> +			/*
> +			 * Keep whitespace in the symtypes format, but avoid
> +			 * repeated spaces.
> +			 */
> +			if (!df->next || df->next->type != LINEBREAK)
> +				check(type_expansion_append(type, " ", NULL));
> +			break;
> +		default:
> +			error("empty die_fragment in %p", cache);
> +			return -1;
> +		}
> +
> +		df = df->next;
> +	}
> +
> +	return 0;
> +}
> +
> +static int type_expand(struct die *cache, struct type_expansion *type,
> +		       bool recursive)
> +{
> +	check(type_expansion_init(type, true));
> +	check(__type_expand(cache, type, recursive));
> +	cache_clear_expanded(&expansion_cache);
> +	return 0;
> +}
> +
> +static int expand_type(struct die *cache, void *arg)
> +{
> +	struct type_expansion type;
> +	char *name;
> +
> +	/*
> +	 * Skip unexpanded die_map entries if there's a complete
> +	 * expansion available for this DIE.
> +	 */
> +	if (cache->state == UNEXPANDED)
> +		__die_map_get(cache->addr, COMPLETE, &cache);
> +
> +	if (cache->mapped)
> +		return 0;
> +
> +	cache->mapped = true;
> +
> +	name = get_type_name(cache);
> +	if (!name)
> +		return 0;
> +
> +	debug("%s", name);
> +	check(type_expand(cache, &type, true));
> +	check(type_map_add(name, &type));
> +
> +	type_expansion_free(&type);
> +	free(name);
> +
> +	return 0;
> +}
> +
> +int generate_symtypes(FILE *file)
> +{
> +	hash_init(expansion_cache.cache);
> +
> +	/*
> +	 * die_map processing:
> +	 *
> +	 *   1. die_map contains all types referenced in exported symbol
> +	 *      signatures, but can contain duplicates just like the original
> +	 *      DWARF, and some references may not be fully expanded depending
> +	 *      on how far we processed the DIE tree for that specific symbol.
> +	 *
> +	 *      For each die_map entry, find the longest available expansion,
> +	 *      and add it to type_map.
> +	 */
> +	check(die_map_for_each(expand_type, NULL));
> +
> +	/*
> +	 *   2. If a symtypes file is requested, write type_map contents to
> +	 *      the file.
> +	 */
> +	check(type_map_write(file));
> +	type_map_free();
> +
> +	return 0;
> +}
Sami Tolvanen Sept. 10, 2024, 9:15 p.m. UTC | #2
Hi Petr,

On Tue, Sep 10, 2024 at 7:58 AM Petr Pavlu <petr.pavlu@suse.com> wrote:
>
> On 8/15/24 19:39, Sami Tolvanen wrote:
> > @@ -122,6 +135,16 @@ int main(int argc, const char **argv)
> >
> >       check(symbol_read_exports(stdin));
> >
> > +     if (symtypes_file) {
> > +             symfile = fopen(symtypes_file, "w+");
>
> The file is sufficient to open only for writing.

True, I'll drop the +.

> > +     if (symfile)
> > +             fclose(symfile);
> > +
> >       return 0;
> >  }
>
> The fclose() call should be wrapped in check() to catch a situation when
> flushing the stream potentially failed.

Ack.

> > +/* See dwarf.c:is_declaration */
> > +#define SYMBOL_DECLONLY_PREFIX "__gendwarfksyms_declonly_"
> > +#define SYMBOL_DECLONLY_PREFIX_LEN (sizeof(SYMBOL_DECLONLY_PREFIX) - 1)
>
> Nit: These defines should go into the patch 15/19 "gendwarfksyms: Add
> support for declaration-only data structures".

Yeah, I noticed these too. Will fix in the next version.

> > +struct type_expansion {
> > +     char *name;
> > +     struct type_list *expanded;
> > +     struct type_list *last;
> > +     size_t len;
> > +     struct hlist_node hash;
> > +};
>
> I found the manipulation of type_expansion.expanded and
> type_expansion.last somewhat strange.
>
> The list starts already with one element in type_expansion_init(). This
> is apparently to make the last pointer valid. This element is however
> empty and gets only assigned on the first call to type_list_append().
> Other elements are then added normally, always assigned.
>
> Perhaps consider using a regular list implementation, similarly to what
> was discussed under the patch 06/19 "gendwarfksyms: Add a cache for
> processed DIEs".

Agreed, I'll switch this to a regular list in v3.

> > +     /* Wrap names with spaces in single quotes */
> > +     if (strstr(cache->fqn, " ")) {
> > +             format = "%c#'%s'";
> > +             len += 2;
> > +     } else {
> > +             format = "%c#%s";
> > +     }
> > +
> > +     name = malloc(len);
> > +     if (!name) {
> > +             error("malloc failed");
> > +             return NULL;
> > +     }
> > +
> > +     if (snprintf(name, len, format, prefix, cache->fqn) >= len) {
> > +             error("snprintf failed for '%s' (length %zu)", cache->fqn,
> > +                   len);
> > +             free(name);
> > +             return NULL;
> > +     }
>
> This could be quite simplified:
>
> const char *quote = strstr(cache->fqn, " ") != NULL ? "'" : "";
> if (asprintf(&name, "%c#%s%s%s", prefix, quote, cache->fqn, quote) < 0)
>         [...]

Good point, I'll change this too. Thanks for taking a look!

Sami
diff mbox series

Patch

diff --git a/scripts/gendwarfksyms/Makefile b/scripts/gendwarfksyms/Makefile
index 681b42441840..4866a2fd0e46 100644
--- a/scripts/gendwarfksyms/Makefile
+++ b/scripts/gendwarfksyms/Makefile
@@ -5,6 +5,7 @@  gendwarfksyms-objs += cache.o
 gendwarfksyms-objs += die.o
 gendwarfksyms-objs += dwarf.o
 gendwarfksyms-objs += symbols.o
+gendwarfksyms-objs += types.o
 
 HOST_EXTRACFLAGS := -I $(srctree)/tools/include
 HOSTLDLIBS_gendwarfksyms := -ldw -lelf
diff --git a/scripts/gendwarfksyms/die.c b/scripts/gendwarfksyms/die.c
index fdd52df88fdd..e40f04b70f7f 100644
--- a/scripts/gendwarfksyms/die.c
+++ b/scripts/gendwarfksyms/die.c
@@ -85,6 +85,19 @@  static void reset_die(struct die *cd)
 	cd->list = NULL;
 }
 
+int die_map_for_each(die_map_callback_t func, void *arg)
+{
+	struct die *cd;
+	struct hlist_node *tmp;
+	int i;
+
+	hash_for_each_safe(die_map, i, tmp, cd, hash) {
+		check(func(cd, arg));
+	}
+
+	return 0;
+}
+
 void die_map_free(void)
 {
 	struct hlist_node *tmp;
diff --git a/scripts/gendwarfksyms/dwarf.c b/scripts/gendwarfksyms/dwarf.c
index 9bca21a71639..62241cc97a76 100644
--- a/scripts/gendwarfksyms/dwarf.c
+++ b/scripts/gendwarfksyms/dwarf.c
@@ -60,11 +60,11 @@  static bool is_export_symbol(struct state *state, Dwarf_Die *die)
 	if (get_ref_die_attr(die, DW_AT_abstract_origin, &origin))
 		source = &origin;
 
-	state->sym = symbol_get(get_name(die));
+	state->sym = symbol_get_unprocessed(get_name(die));
 
 	/* Look up using the origin name if there are no matches. */
 	if (!state->sym && source != die)
-		state->sym = symbol_get(get_name(source));
+		state->sym = symbol_get_unprocessed(get_name(source));
 
 	state->die = *source;
 	return !!state->sym;
@@ -384,6 +384,7 @@  static int process_subroutine_type(struct state *state, struct die *cache,
 	return check(__process_subroutine_type(state, cache, die,
 					       "subroutine_type"));
 }
+
 static int process_variant_type(struct state *state, struct die *cache,
 				Dwarf_Die *die)
 {
@@ -695,14 +696,16 @@  static int process_type(struct state *state, struct die *parent, Dwarf_Die *die)
 static int process_subprogram(struct state *state, Dwarf_Die *die)
 {
 	check(__process_subroutine_type(state, NULL, die, "subprogram"));
-	return check(process(state, NULL, ";\n"));
+	state->sym->state = MAPPED;
+	return 0;
 }
 
 static int process_variable(struct state *state, Dwarf_Die *die)
 {
 	check(process(state, NULL, "variable "));
 	check(process_type_attr(state, NULL, die));
-	return check(process(state, NULL, ";\n"));
+	state->sym->state = MAPPED;
+	return 0;
 }
 
 static int process_symbol_ptr(struct state *state, Dwarf_Die *die)
@@ -757,6 +760,9 @@  static int process_exported_symbols(struct state *state, struct die *cache,
 		else
 			check(process_variable(state, &state->die));
 
+		if (dump_dies)
+			fputs("\n", stderr);
+
 		cache_clear_expanded(&state->expansion_cache);
 		return 0;
 	default:
diff --git a/scripts/gendwarfksyms/gendwarfksyms.c b/scripts/gendwarfksyms/gendwarfksyms.c
index 1349e592783b..6a219a54c342 100644
--- a/scripts/gendwarfksyms/gendwarfksyms.c
+++ b/scripts/gendwarfksyms/gendwarfksyms.c
@@ -20,6 +20,11 @@  bool debug;
 bool dump_dies;
 /* Print out inline debugging information about die_map changes */
 bool dump_die_map;
+/* Print out type_map contents */
+bool dump_types;
+/* Produce a symtypes file */
+bool symtypes;
+static const char *symtypes_file;
 
 static const struct {
 	const char *arg;
@@ -29,6 +34,8 @@  static const struct {
 	{ "--debug", &debug, NULL },
 	{ "--dump-dies", &dump_dies, NULL },
 	{ "--dump-die-map", &dump_die_map, NULL },
+	{ "--dump-types", &dump_types, NULL },
+	{ "--symtypes", &symtypes, &symtypes_file },
 };
 
 static int usage(void)
@@ -79,6 +86,7 @@  static int process_modules(Dwfl_Module *mod, void **userdata, const char *name,
 	Dwarf_Die cudie;
 	Dwarf_CU *cu = NULL;
 	Dwarf *dbg;
+	FILE *symfile = arg;
 	int res;
 
 	debug("%s", name);
@@ -100,6 +108,10 @@  static int process_modules(Dwfl_Module *mod, void **userdata, const char *name,
 		check(process_module(mod, dbg, &cudie));
 	} while (cu);
 
+	/*
+	 * Use die_map to expand type strings and write them to `symfile`.
+	 */
+	check(generate_symtypes(symfile));
 	die_map_free();
 
 	return DWARF_CB_OK;
@@ -112,6 +124,7 @@  static const Dwfl_Callbacks callbacks = {
 
 int main(int argc, const char **argv)
 {
+	FILE *symfile = NULL;
 	unsigned int n;
 
 	if (parse_options(argc, argv) < 0)
@@ -122,6 +135,16 @@  int main(int argc, const char **argv)
 
 	check(symbol_read_exports(stdin));
 
+	if (symtypes_file) {
+		symfile = fopen(symtypes_file, "w+");
+
+		if (!symfile) {
+			error("fopen failed for '%s': %s", symtypes_file,
+			      strerror(errno));
+			return -1;
+		}
+	}
+
 	for (n = 0; n < object_count; n++) {
 		Dwfl *dwfl;
 		int fd;
@@ -151,7 +174,7 @@  int main(int argc, const char **argv)
 
 		dwfl_report_end(dwfl, NULL, NULL);
 
-		if (dwfl_getmodules(dwfl, &process_modules, NULL, 0)) {
+		if (dwfl_getmodules(dwfl, &process_modules, symfile, 0)) {
 			error("dwfl_getmodules failed for '%s'",
 			      object_files[n]);
 			return -1;
@@ -161,5 +184,8 @@  int main(int argc, const char **argv)
 		close(fd);
 	}
 
+	if (symfile)
+		fclose(symfile);
+
 	return 0;
 }
diff --git a/scripts/gendwarfksyms/gendwarfksyms.h b/scripts/gendwarfksyms/gendwarfksyms.h
index 7cd907e3d5e3..6edbd6478e0f 100644
--- a/scripts/gendwarfksyms/gendwarfksyms.h
+++ b/scripts/gendwarfksyms/gendwarfksyms.h
@@ -22,6 +22,8 @@ 
 extern bool debug;
 extern bool dump_dies;
 extern bool dump_die_map;
+extern bool dump_types;
+extern bool symtypes;
 
 #define MAX_INPUT_FILES 128
 
@@ -89,6 +91,12 @@  extern bool dump_die_map;
 #define SYMBOL_PTR_PREFIX "__gendwarfksyms_ptr_"
 #define SYMBOL_PTR_PREFIX_LEN (sizeof(SYMBOL_PTR_PREFIX) - 1)
 
+/* See dwarf.c:is_declaration */
+#define SYMBOL_DECLONLY_PREFIX "__gendwarfksyms_declonly_"
+#define SYMBOL_DECLONLY_PREFIX_LEN (sizeof(SYMBOL_DECLONLY_PREFIX) - 1)
+
+enum symbol_state { UNPROCESSED, MAPPED };
+
 struct symbol_addr {
 	uint32_t section;
 	Elf64_Addr address;
@@ -109,12 +117,14 @@  struct symbol {
 	struct symbol_addr addr;
 	struct hlist_node addr_hash;
 	struct hlist_node name_hash;
+	enum symbol_state state;
+	uintptr_t die_addr;
 };
 
 extern bool is_symbol_ptr(const char *name);
 extern int symbol_read_exports(FILE *file);
 extern int symbol_read_symtab(int fd);
-extern struct symbol *symbol_get(const char *name);
+extern struct symbol *symbol_get_unprocessed(const char *name);
 
 /*
  * die.c
@@ -157,12 +167,15 @@  struct die {
 	struct hlist_node hash;
 };
 
+typedef int (*die_map_callback_t)(struct die *, void *arg);
+
 extern int __die_map_get(uintptr_t addr, enum die_state state,
 			 struct die **res);
 extern int die_map_get(Dwarf_Die *die, enum die_state state, struct die **res);
 extern int die_map_add_string(struct die *pd, const char *str);
 extern int die_map_add_linebreak(struct die *pd, int linebreak);
 extern int die_map_add_die(struct die *pd, struct die *child);
+extern int die_map_for_each(die_map_callback_t func, void *arg);
 extern void die_map_free(void);
 
 /*
@@ -222,4 +235,10 @@  extern int process_die_container(struct state *state, struct die *cache,
 
 extern int process_module(Dwfl_Module *mod, Dwarf *dbg, Dwarf_Die *cudie);
 
+/*
+ * types.c
+ */
+
+extern int generate_symtypes(FILE *file);
+
 #endif /* __GENDWARFKSYMS_H */
diff --git a/scripts/gendwarfksyms/symbols.c b/scripts/gendwarfksyms/symbols.c
index d6d016458ae1..8cc04e6295a7 100644
--- a/scripts/gendwarfksyms/symbols.c
+++ b/scripts/gendwarfksyms/symbols.c
@@ -117,6 +117,7 @@  int symbol_read_exports(FILE *file)
 
 		sym->name = name;
 		sym->addr.section = SHN_UNDEF;
+		sym->state = UNPROCESSED;
 		name = NULL;
 
 		hash_add(symbol_names, &sym->name_hash, name_hash(sym->name));
@@ -132,19 +133,21 @@  int symbol_read_exports(FILE *file)
 	return 0;
 }
 
-static int get_symbol(struct symbol *sym, void *arg)
+static int get_unprocessed(struct symbol *sym, void *arg)
 {
 	struct symbol **res = arg;
 
-	*res = sym;
+	if (sym->state == UNPROCESSED)
+		*res = sym;
+
 	return 0;
 }
 
-struct symbol *symbol_get(const char *name)
+struct symbol *symbol_get_unprocessed(const char *name)
 {
 	struct symbol *sym = NULL;
 
-	for_each(name, false, get_symbol, &sym);
+	for_each(name, false, get_unprocessed, &sym);
 	return sym;
 }
 
diff --git a/scripts/gendwarfksyms/types.c b/scripts/gendwarfksyms/types.c
new file mode 100644
index 000000000000..7b9997d8322d
--- /dev/null
+++ b/scripts/gendwarfksyms/types.c
@@ -0,0 +1,439 @@ 
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2024 Google LLC
+ */
+
+#include "gendwarfksyms.h"
+#include "crc32.h"
+
+static struct expansion_cache expansion_cache;
+
+/*
+ * A simple linked list of shared or owned strings to avoid copying strings
+ * around when not necessary.
+ */
+struct type_list {
+	const char *str;
+	void *owned;
+	struct type_list *next;
+};
+
+static struct type_list *type_list_alloc(void)
+{
+	struct type_list *list;
+
+	list = calloc(1, sizeof(struct type_list));
+	if (!list)
+		error("calloc failed");
+
+	return list;
+}
+
+static void type_list_free(struct type_list *list)
+{
+	struct type_list *tmp;
+
+	while (list) {
+		if (list->owned)
+			free(list->owned);
+
+		tmp = list;
+		list = list->next;
+		free(tmp);
+	}
+}
+
+static int type_list_append(struct type_list *list, const char *s, void *owned)
+{
+	if (!list || !s)
+		return 0;
+
+	while (list->next)
+		list = list->next;
+
+	if (list->str) {
+		list->next = type_list_alloc();
+
+		if (!list->next) {
+			error("type_list_alloc failed");
+			return -1;
+		}
+
+		list = list->next;
+	}
+
+	list->str = s;
+	list->owned = owned;
+
+	return strlen(list->str);
+}
+
+static int type_list_write(struct type_list *list, FILE *file)
+{
+	while (list) {
+		if (list->str)
+			checkp(fputs(list->str, file));
+		list = list->next;
+	}
+
+	return 0;
+}
+
+/*
+ * An expanded type string in symtypes format.
+ */
+struct type_expansion {
+	char *name;
+	struct type_list *expanded;
+	struct type_list *last;
+	size_t len;
+	struct hlist_node hash;
+};
+
+static int type_expansion_init(struct type_expansion *type, bool alloc)
+{
+	memset(type, 0, sizeof(struct type_expansion));
+	if (alloc) {
+		type->expanded = type_list_alloc();
+		if (!type->expanded)
+			return -1;
+
+		type->last = type->expanded;
+	}
+	return 0;
+}
+
+static inline void type_expansion_free(struct type_expansion *type)
+{
+	free(type->name);
+	type_list_free(type->expanded);
+	type_expansion_init(type, false);
+}
+
+static int type_expansion_append(struct type_expansion *type, const char *s,
+				 void *owned)
+{
+	type->len += checkp(type_list_append(type->last, s, owned));
+
+	if (type->last->next)
+		type->last = type->last->next;
+
+	return 0;
+}
+
+/*
+ * type_map -- the longest expansions for each type.
+ *
+ * const char *name -> struct type_expansion *
+ */
+#define TYPE_HASH_BITS 16
+static DEFINE_HASHTABLE(type_map, TYPE_HASH_BITS);
+
+static int type_map_get(const char *name, struct type_expansion **res)
+{
+	struct type_expansion *e;
+
+	hash_for_each_possible(type_map, e, hash, name_hash(name)) {
+		if (!strcmp(name, e->name)) {
+			*res = e;
+			return 0;
+		}
+	}
+
+	return -1;
+}
+
+static int type_map_add(const char *name, struct type_expansion *type)
+{
+	struct type_expansion *e;
+
+	if (type_map_get(name, &e)) {
+		e = malloc(sizeof(struct type_expansion));
+		if (!e) {
+			error("malloc failed");
+			return -1;
+		}
+
+		type_expansion_init(e, false);
+
+		e->name = strdup(name);
+		if (!e->name) {
+			error("strdup failed");
+			return -1;
+		}
+
+		hash_add(type_map, &e->hash, name_hash(e->name));
+
+		if (dump_types)
+			debug("adding %s", e->name);
+	} else {
+		/* Use the longest available expansion */
+		if (type->len <= e->len)
+			return 0;
+
+		type_list_free(e->expanded);
+
+		if (dump_types)
+			debug("replacing %s", e->name);
+	}
+
+	/* Take ownership of type->expanded */
+	e->expanded = type->expanded;
+	e->last = type->last;
+	e->len = type->len;
+	type->expanded = NULL;
+	type->last = NULL;
+	type->len = 0;
+
+	if (dump_types) {
+		fputs(e->name, stderr);
+		fputs(" ", stderr);
+		type_list_write(e->expanded, stderr);
+		fputs("\n", stderr);
+	}
+
+	return 0;
+}
+
+static int type_map_write(FILE *file)
+{
+	struct type_expansion *e;
+	struct hlist_node *tmp;
+	int i;
+
+	if (!file)
+		return 0;
+
+	hash_for_each_safe(type_map, i, tmp, e, hash) {
+		checkp(fputs(e->name, file));
+		checkp(fputs(" ", file));
+		type_list_write(e->expanded, file);
+		checkp(fputs("\n", file));
+	}
+
+	return 0;
+}
+
+static void type_map_free(void)
+{
+	struct type_expansion *e;
+	struct hlist_node *tmp;
+	int i;
+
+	hash_for_each_safe(type_map, i, tmp, e, hash) {
+		type_expansion_free(e);
+		free(e);
+	}
+
+	hash_init(type_map);
+}
+
+/*
+ * Type reference format: <prefix>#<name>, where prefix:
+ * 	s -> structure
+ * 	u -> union
+ * 	e -> enum
+ * 	t -> typedef
+ *
+ * Names with spaces are additionally wrapped in single quotes.
+ */
+static inline bool is_type_prefix(const char *s)
+{
+	return (s[0] == 's' || s[0] == 'u' || s[0] == 'e' || s[0] == 't') &&
+	       s[1] == '#';
+}
+
+static char get_type_prefix(int tag)
+{
+	switch (tag) {
+	case DW_TAG_class_type:
+	case DW_TAG_structure_type:
+		return 's';
+	case DW_TAG_union_type:
+		return 'u';
+	case DW_TAG_enumeration_type:
+		return 'e';
+	case DW_TAG_typedef_type:
+		return 't';
+	default:
+		return 0;
+	}
+}
+
+static char *get_type_name(struct die *cache)
+{
+	const char *format;
+	char prefix;
+	char *name;
+	size_t len;
+
+	if (cache->state == INCOMPLETE) {
+		warn("found incomplete cache entry: %p", cache);
+		return NULL;
+	}
+	if (!cache->fqn)
+		return NULL;
+
+	prefix = get_type_prefix(cache->tag);
+	if (!prefix)
+		return NULL;
+
+	/* <prefix>#<type_name>\0 */
+	len = 2 + strlen(cache->fqn) + 1;
+
+	/* Wrap names with spaces in single quotes */
+	if (strstr(cache->fqn, " ")) {
+		format = "%c#'%s'";
+		len += 2;
+	} else {
+		format = "%c#%s";
+	}
+
+	name = malloc(len);
+	if (!name) {
+		error("malloc failed");
+		return NULL;
+	}
+
+	if (snprintf(name, len, format, prefix, cache->fqn) >= len) {
+		error("snprintf failed for '%s' (length %zu)", cache->fqn,
+		      len);
+		free(name);
+		return NULL;
+	}
+
+	return name;
+}
+
+static int __type_expand(struct die *cache, struct type_expansion *type,
+			 bool recursive);
+
+static int type_expand_child(struct die *cache, struct type_expansion *type,
+			     bool recursive)
+{
+	struct type_expansion child;
+	char *name;
+
+	name = get_type_name(cache);
+	if (!name)
+		return check(__type_expand(cache, type, recursive));
+
+	if (recursive && !__cache_was_expanded(&expansion_cache, cache->addr)) {
+		check(__cache_mark_expanded(&expansion_cache, cache->addr));
+		check(type_expansion_init(&child, true));
+		check(__type_expand(cache, &child, true));
+		check(type_map_add(name, &child));
+		type_expansion_free(&child);
+	}
+
+	check(type_expansion_append(type, name, name));
+	return 0;
+}
+
+static int __type_expand(struct die *cache, struct type_expansion *type,
+			 bool recursive)
+{
+	struct die_fragment *df = cache->list;
+	struct die *child;
+
+	while (df) {
+		switch (df->type) {
+		case STRING:
+			check(type_expansion_append(type, df->data.str, NULL));
+			break;
+		case DIE:
+			/* Use a complete die_map expansion if available */
+			if (__die_map_get(df->data.addr, COMPLETE, &child) &&
+			    __die_map_get(df->data.addr, UNEXPANDED, &child)) {
+				error("unknown child: %" PRIxPTR,
+				      df->data.addr);
+				return -1;
+			}
+
+			check(type_expand_child(child, type, recursive));
+			break;
+		case LINEBREAK:
+			/*
+			 * Keep whitespace in the symtypes format, but avoid
+			 * repeated spaces.
+			 */
+			if (!df->next || df->next->type != LINEBREAK)
+				check(type_expansion_append(type, " ", NULL));
+			break;
+		default:
+			error("empty die_fragment in %p", cache);
+			return -1;
+		}
+
+		df = df->next;
+	}
+
+	return 0;
+}
+
+static int type_expand(struct die *cache, struct type_expansion *type,
+		       bool recursive)
+{
+	check(type_expansion_init(type, true));
+	check(__type_expand(cache, type, recursive));
+	cache_clear_expanded(&expansion_cache);
+	return 0;
+}
+
+static int expand_type(struct die *cache, void *arg)
+{
+	struct type_expansion type;
+	char *name;
+
+	/*
+	 * Skip unexpanded die_map entries if there's a complete
+	 * expansion available for this DIE.
+	 */
+	if (cache->state == UNEXPANDED)
+		__die_map_get(cache->addr, COMPLETE, &cache);
+
+	if (cache->mapped)
+		return 0;
+
+	cache->mapped = true;
+
+	name = get_type_name(cache);
+	if (!name)
+		return 0;
+
+	debug("%s", name);
+	check(type_expand(cache, &type, true));
+	check(type_map_add(name, &type));
+
+	type_expansion_free(&type);
+	free(name);
+
+	return 0;
+}
+
+int generate_symtypes(FILE *file)
+{
+	hash_init(expansion_cache.cache);
+
+	/*
+	 * die_map processing:
+	 *
+	 *   1. die_map contains all types referenced in exported symbol
+	 *      signatures, but can contain duplicates just like the original
+	 *      DWARF, and some references may not be fully expanded depending
+	 *      on how far we processed the DIE tree for that specific symbol.
+	 *
+	 *      For each die_map entry, find the longest available expansion,
+	 *      and add it to type_map.
+	 */
+	check(die_map_for_each(expand_type, NULL));
+
+	/*
+	 *   2. If a symtypes file is requested, write type_map contents to
+	 *      the file.
+	 */
+	check(type_map_write(file));
+	type_map_free();
+
+	return 0;
+}