diff mbox series

[v4,10/19] gendwarfksyms: Limit structure expansion

Message ID 20241008183823.36676-31-samitolvanen@google.com (mailing list archive)
State Handled Elsewhere
Headers show
Series Implement DWARF modversions | expand

Commit Message

Sami Tolvanen Oct. 8, 2024, 6:38 p.m. UTC
Expand each structure type only once per exported symbol. This
is necessary to support self-referential structures, which would
otherwise result in infinite recursion, but is still sufficient for
catching ABI changes.

For pointers, limit structure expansion after the first pointer
in the symbol type. This should be plenty for detecting ABI
differences, but it stops us from pulling in half the kernel for
types that contain pointers to large kernel data structures, like
task_struct, for example.

Signed-off-by: Sami Tolvanen <samitolvanen@google.com>
Acked-by: Neal Gompa <neal@gompa.dev>
---
 scripts/gendwarfksyms/Makefile        |   1 +
 scripts/gendwarfksyms/cache.c         |  44 +++++++++++
 scripts/gendwarfksyms/dwarf.c         | 109 +++++++++++++++++++++++---
 scripts/gendwarfksyms/gendwarfksyms.h |  37 +++++++++
 4 files changed, 182 insertions(+), 9 deletions(-)
 create mode 100644 scripts/gendwarfksyms/cache.c

Comments

Petr Pavlu Oct. 16, 2024, 2:16 p.m. UTC | #1
On 10/8/24 20:38, Sami Tolvanen wrote:
> Expand each structure type only once per exported symbol. This
> is necessary to support self-referential structures, which would
> otherwise result in infinite recursion, but is still sufficient for
> catching ABI changes.
> 
> For pointers, limit structure expansion after the first pointer
> in the symbol type. This should be plenty for detecting ABI
> differences, but it stops us from pulling in half the kernel for
> types that contain pointers to large kernel data structures, like
> task_struct, for example.
> 
> Signed-off-by: Sami Tolvanen <samitolvanen@google.com>
> Acked-by: Neal Gompa <neal@gompa.dev>
> ---
>  scripts/gendwarfksyms/Makefile        |   1 +
>  scripts/gendwarfksyms/cache.c         |  44 +++++++++++
>  scripts/gendwarfksyms/dwarf.c         | 109 +++++++++++++++++++++++---
>  scripts/gendwarfksyms/gendwarfksyms.h |  37 +++++++++
>  4 files changed, 182 insertions(+), 9 deletions(-)
>  create mode 100644 scripts/gendwarfksyms/cache.c
> 
> diff --git a/scripts/gendwarfksyms/Makefile b/scripts/gendwarfksyms/Makefile
> index c0d4ce50fc27..c06145d84df8 100644
> --- a/scripts/gendwarfksyms/Makefile
> +++ b/scripts/gendwarfksyms/Makefile
> @@ -2,6 +2,7 @@
>  hostprogs-always-y += gendwarfksyms
>  
>  gendwarfksyms-objs += gendwarfksyms.o
> +gendwarfksyms-objs += cache.o
>  gendwarfksyms-objs += die.o
>  gendwarfksyms-objs += dwarf.o
>  gendwarfksyms-objs += symbols.o
> diff --git a/scripts/gendwarfksyms/cache.c b/scripts/gendwarfksyms/cache.c
> new file mode 100644
> index 000000000000..2f1517133a20
> --- /dev/null
> +++ b/scripts/gendwarfksyms/cache.c
> @@ -0,0 +1,44 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Copyright (C) 2024 Google LLC
> + */
> +
> +#include "gendwarfksyms.h"
> +
> +struct expanded {
> +	uintptr_t addr;
> +	struct hlist_node hash;
> +};
> +
> +void __cache_mark_expanded(struct expansion_cache *ec, uintptr_t addr)
> +{
> +	struct expanded *es;
> +
> +	es = xmalloc(sizeof(struct expanded));
> +	es->addr = addr;
> +	hash_add(ec->cache, &es->hash, addr_hash(addr));
> +}
> +
> +bool __cache_was_expanded(struct expansion_cache *ec, uintptr_t addr)
> +{
> +	struct expanded *es;
> +
> +	hash_for_each_possible(ec->cache, es, hash, addr_hash(addr)) {
> +		if (es->addr == addr)
> +			return true;
> +	}
> +
> +	return false;
> +}
> +
> +void cache_clear_expanded(struct expansion_cache *ec)
> +{
> +	struct hlist_node *tmp;
> +	struct expanded *es;
> +
> +	hash_for_each_safe(ec->cache, es, tmp, hash) {
> +		free(es);
> +	}
> +
> +	hash_init(ec->cache);
> +}
> diff --git a/scripts/gendwarfksyms/dwarf.c b/scripts/gendwarfksyms/dwarf.c
> index f5cebbdcc212..51dd8e82f9e7 100644
> --- a/scripts/gendwarfksyms/dwarf.c
> +++ b/scripts/gendwarfksyms/dwarf.c
> @@ -26,6 +26,7 @@ static void process_linebreak(struct die *cache, int n)
>  		       !dwarf_form##attr(&da, value);                  \
>  	}
>  
> +DEFINE_GET_ATTR(flag, bool)
>  DEFINE_GET_ATTR(udata, Dwarf_Word)
>  
>  static bool get_ref_die_attr(Dwarf_Die *die, unsigned int id, Dwarf_Die *value)
> @@ -79,6 +80,13 @@ static bool match_export_symbol(struct state *state, Dwarf_Die *die)
>  	return !!state->sym;
>  }
>  
> +static bool is_declaration(Dwarf_Die *die)
> +{
> +	bool value;
> +
> +	return get_flag_attr(die, DW_AT_declaration, &value) && value;
> +}
> +
>  /*
>   * Type string processing
>   */
> @@ -455,19 +463,28 @@ static void __process_structure_type(struct state *state, struct die *cache,
>  				     die_callback_t process_func,
>  				     die_match_callback_t match_func)
>  {
> +	bool is_decl;
> +
>  	process(cache, type);
>  	process_fqn(cache, die);
>  	process(cache, " {");
>  	process_linebreak(cache, 1);
>  
> -	check(process_die_container(state, cache, die, process_func,
> -				    match_func));
> +	is_decl = is_declaration(die);
> +
> +	if (!is_decl && state->expand.expand) {
> +		cache_mark_expanded(&state->expansion_cache, die->addr);
> +		check(process_die_container(state, cache, die, process_func,
> +					    match_func));
> +	}
>  
>  	process_linebreak(cache, -1);
>  	process(cache, "}");
>  
> -	process_byte_size_attr(cache, die);
> -	process_alignment_attr(cache, die);
> +	if (!is_decl && state->expand.expand) {
> +		process_byte_size_attr(cache, die);
> +		process_alignment_attr(cache, die);
> +	}
>  }
>  
>  #define DEFINE_PROCESS_STRUCTURE_TYPE(structure)                        \
> @@ -520,7 +537,7 @@ static void process_unspecified_type(struct state *state, struct die *cache,
>  				     Dwarf_Die *die)
>  {
>  	/*
> -	 * These can be emitted for stand-elone assembly code, which means we
> +	 * These can be emitted for stand-alone assembly code, which means we
>  	 * might run into them in vmlinux.o.
>  	 */
>  	process(cache, "unspecified_type");

Nit: This hunk should be folded into patch #9 ("gendwarfksyms: Expand
structure types").

> @@ -552,6 +569,42 @@ static void process_cached(struct state *state, struct die *cache,
>  	}
>  }
>  
> +static void state_init(struct state *state)
> +{
> +	state->expand.expand = true;
> +	state->expand.ptr_depth = 0;
> +	state->expand.ptr_expansion_depth = 0;
> +	hash_init(state->expansion_cache.cache);
> +}
> +
> +static void expansion_state_restore(struct expansion_state *state,
> +				    struct expansion_state *saved)
> +{
> +	state->expand = saved->expand;
> +	state->ptr_depth = saved->ptr_depth;
> +	state->ptr_expansion_depth = saved->ptr_expansion_depth;
> +}
> +
> +static void expansion_state_save(struct expansion_state *state,
> +				 struct expansion_state *saved)
> +{
> +	expansion_state_restore(saved, state);
> +}
> +
> +static bool is_pointer_type(int tag)
> +{
> +	return tag == DW_TAG_pointer_type || tag == DW_TAG_reference_type;
> +}
> +
> +static bool is_expanded_type(int tag)
> +{
> +	return tag == DW_TAG_class_type || tag == DW_TAG_structure_type ||
> +	       tag == DW_TAG_union_type || tag == DW_TAG_enumeration_type;
> +}
> +
> +/* The maximum depth for expanding structures in pointers */
> +#define MAX_POINTER_EXPANSION_DEPTH 2
> +
>  #define PROCESS_TYPE(type)                                \
>  	case DW_TAG_##type##_type:                        \
>  		process_##type##_type(state, cache, die); \
> @@ -559,18 +612,52 @@ static void process_cached(struct state *state, struct die *cache,
>  
>  static int process_type(struct state *state, struct die *parent, Dwarf_Die *die)
>  {
> +	enum die_state want_state = DIE_COMPLETE;
>  	struct die *cache;
> +	struct expansion_state saved;
>  	int tag = dwarf_tag(die);
>  
> +	expansion_state_save(&state->expand, &saved);
> +
> +	/*
> +	 * Structures and enumeration types are expanded only once per
> +	 * exported symbol. This is sufficient for detecting ABI changes
> +	 * within the structure.
> +	 *
> +	 * We fully expand the first pointer reference in the exported
> +	 * symbol, but limit the expansion of further pointer references
> +	 * to at most MAX_POINTER_EXPANSION_DEPTH levels.
> +	 */
> +	if (is_pointer_type(tag))
> +		state->expand.ptr_depth++;
> +
> +	if (state->expand.ptr_depth > 0 && is_expanded_type(tag)) {
> +		if (state->expand.ptr_expansion_depth >=
> +			    MAX_POINTER_EXPANSION_DEPTH ||
> +		    cache_was_expanded(&state->expansion_cache, die->addr))
> +			state->expand.expand = false;
> +
> +		if (state->expand.expand)
> +			state->expand.ptr_expansion_depth++;
> +	}
> +
>  	/*
> -	 * If we have the DIE already cached, use it instead of walking
> +	 * If we have want_state already cached, use it instead of walking
>  	 * through DWARF.
>  	 */
> -	cache = die_map_get(die, DIE_COMPLETE);
> +	if (!state->expand.expand && is_expanded_type(tag))
> +		want_state = DIE_UNEXPANDED;
> +
> +	cache = die_map_get(die, want_state);
> +
> +	if (cache->state == want_state) {
> +		if (want_state == DIE_COMPLETE && is_expanded_type(tag))
> +			cache_mark_expanded(&state->expansion_cache, die->addr);
>  
> -	if (cache->state == DIE_COMPLETE) {
>  		process_cached(state, cache, die);
>  		die_map_add_die(parent, cache);
> +
> +		expansion_state_restore(&state->expand, &saved);
>  		return 0;
>  	}
>  

The commit message and the comment in process_type() describe that each
structure type should be expanded only once per symbol, but that doesn't
seem to be the case?

Consider the following example:

struct A { int m; };
void foo(struct A a1, struct A a2) {}

Running gendwarfksyms on the compiled file shows the following debug
output:

$ echo foo | ./build/scripts/gendwarfksyms/gendwarfksyms --debug --dump-types --dump-versions --dump-dies test.o
[...]
gendwarfksyms: process_symbol: foo
subprogram (
  formal_parameter structure_type A {
    member base_type int byte_size(4) encoding(5) m data_member_location(0)
  } byte_size(4) a1 ,
  formal_parameter structure_type A {
    member base_type int byte_size(4) encoding(5) m data_member_location(0)
  } byte_size(4) a2
)
-> base_type void

I think it indicates that struct A was expanded twice. My expectation
would be to see:

gendwarfksyms: process_symbol: foo
subprogram (
  formal_parameter structure_type A {
    member base_type int byte_size(4) encoding(5) m data_member_location(0)
  } byte_size(4) a1 ,
  formal_parameter structure_type A {
  } a2
)
-> base_type void

If I follow correctly, the type_expand() logic on the output eventually
performs only the first expansion for the CRC calculation. However, it
looks this process_type() code should do the same, if only to be a bit
faster. Or did I misunderstand anything how this should work?

I played with the following (applies on the top of the series), which
matches my understanding of what should happen:

diff --git a/scripts/gendwarfksyms/dwarf.c b/scripts/gendwarfksyms/dwarf.c
index 0112b5e8fbf5..1cc39be02b80 100644
--- a/scripts/gendwarfksyms/dwarf.c
+++ b/scripts/gendwarfksyms/dwarf.c
@@ -661,7 +661,6 @@ static void __process_structure_type(struct state *state, struct die *cache,
 	is_decl = is_declaration(cache, die);
 
 	if (!is_decl && state->expand.expand) {
-		cache_mark_expanded(&state->expansion_cache, die->addr);
 		state->expand.current_fqn = cache->fqn;
 		check(process_die_container(state, cache, die, process_func,
 					    match_func));
@@ -850,32 +849,35 @@ static int process_type(struct state *state, struct die *parent, Dwarf_Die *die)
 	if (is_pointer_type(tag))
 		state->expand.ptr_depth++;
 
-	if (state->expand.ptr_depth > 0 && is_expanded_type(tag)) {
-		if (state->expand.ptr_expansion_depth >=
-			    MAX_POINTER_EXPANSION_DEPTH ||
-		    cache_was_expanded(&state->expansion_cache, die->addr))
+	if (is_expanded_type(tag)) {
+		if (cache_was_expanded(&state->expansion_cache, die->addr))
 			state->expand.expand = false;
 
+		if (state->expand.ptr_depth > 0) {
+			if (state->expand.ptr_expansion_depth >=
+			    MAX_POINTER_EXPANSION_DEPTH)
+				state->expand.expand = false;
+
+			if (state->expand.expand)
+				state->expand.ptr_expansion_depth++;
+		}
+
 		if (state->expand.expand)
-			state->expand.ptr_expansion_depth++;
+			cache_mark_expanded(&state->expansion_cache, die->addr);
+		else
+			want_state = DIE_UNEXPANDED;
 	}
 
 	/*
 	 * If we have want_state already cached, use it instead of walking
 	 * through DWARF.
 	 */
-	if (!state->expand.expand && is_expanded_type(tag))
-		want_state = DIE_UNEXPANDED;
-
 	cache = die_map_get(die, want_state);
 
 	if (cache->state == want_state) {
 		die_debug_g("cached addr %p tag %x -- %s", die->addr, tag,
 			    die_state_name(cache->state));
 
-		if (want_state == DIE_COMPLETE && is_expanded_type(tag))
-			cache_mark_expanded(&state->expansion_cache, die->addr);
-
 		process_cached(state, cache, die);
 		die_map_add_die(parent, cache);
 

> @@ -611,9 +698,10 @@ static int process_type(struct state *state, struct die *parent, Dwarf_Die *die)
>  
>  	/* Update cache state and append to the parent (if any) */
>  	cache->tag = tag;
> -	cache->state = DIE_COMPLETE;
> +	cache->state = want_state;
>  	die_map_add_die(parent, cache);
>  
> +	expansion_state_restore(&state->expand, &saved);
>  	return 0;
>  }
>  
> @@ -675,11 +763,14 @@ static int process_exported_symbols(struct state *unused, struct die *cache,
>  		if (!match_export_symbol(&state, die))
>  			return 0;
>  
> +		state_init(&state);
> +
>  		if (tag == DW_TAG_subprogram)
>  			process_subprogram(&state, &state.die);
>  		else
>  			process_variable(&state, &state.die);
>  
> +		cache_clear_expanded(&state.expansion_cache);
>  		return 0;
>  	}
>  	default:
> diff --git a/scripts/gendwarfksyms/gendwarfksyms.h b/scripts/gendwarfksyms/gendwarfksyms.h
> index f317de5b0653..6147859ae2af 100644
> --- a/scripts/gendwarfksyms/gendwarfksyms.h
> +++ b/scripts/gendwarfksyms/gendwarfksyms.h
> @@ -104,6 +104,7 @@ struct symbol *symbol_get(const char *name);
>  
>  enum die_state {
>  	DIE_INCOMPLETE,
> +	DIE_UNEXPANDED,
>  	DIE_COMPLETE,
>  	DIE_LAST = DIE_COMPLETE
>  };
> @@ -133,6 +134,7 @@ static inline const char *die_state_name(enum die_state state)
>  {
>  	switch (state) {
>  	CASE_CONST_TO_STR(DIE_INCOMPLETE)
> +	CASE_CONST_TO_STR(DIE_UNEXPANDED)
>  	CASE_CONST_TO_STR(DIE_COMPLETE)
>  	}
>  
> @@ -155,9 +157,40 @@ void die_map_add_linebreak(struct die *pd, int linebreak);
>  void die_map_add_die(struct die *pd, struct die *child);
>  void die_map_free(void);
>  
> +/*
> + * cache.c
> + */
> +
> +#define EXPANSION_CACHE_HASH_BITS 11
> +
> +/* A cache for addresses we've already seen. */
> +struct expansion_cache {
> +	HASHTABLE_DECLARE(cache, 1 << EXPANSION_CACHE_HASH_BITS);
> +};
> +
> +void __cache_mark_expanded(struct expansion_cache *ec, uintptr_t addr);
> +bool __cache_was_expanded(struct expansion_cache *ec, uintptr_t addr);
> +
> +static inline void cache_mark_expanded(struct expansion_cache *ec, void *addr)
> +{
> +	__cache_mark_expanded(ec, (uintptr_t)addr);
> +}
> +
> +static inline bool cache_was_expanded(struct expansion_cache *ec, void *addr)
> +{
> +	return __cache_was_expanded(ec, (uintptr_t)addr);
> +}
> +
> +void cache_clear_expanded(struct expansion_cache *ec);
> +
>  /*
>   * dwarf.c
>   */
> +struct expansion_state {
> +	bool expand;
> +	unsigned int ptr_depth;
> +	unsigned int ptr_expansion_depth;
> +};
>  
>  struct state {
>  	struct symbol *sym;
> @@ -165,6 +198,10 @@ struct state {
>  
>  	/* List expansion */
>  	bool first_list_item;
> +
> +	/* Structure expansion */
> +	struct expansion_state expand;
> +	struct expansion_cache expansion_cache;
>  };
>  
>  typedef int (*die_callback_t)(struct state *state, struct die *cache,
Sami Tolvanen Oct. 16, 2024, 9:21 p.m. UTC | #2
Hi Petr,

On Wed, Oct 16, 2024 at 2:16 PM Petr Pavlu <petr.pavlu@suse.com> wrote:
>
> On 10/8/24 20:38, Sami Tolvanen wrote:
> >       /*
> > -      * These can be emitted for stand-elone assembly code, which means we
> > +      * These can be emitted for stand-alone assembly code, which means we
> >        * might run into them in vmlinux.o.
> >        */
> >       process(cache, "unspecified_type");
>
> Nit: This hunk should be folded into patch #9 ("gendwarfksyms: Expand
> structure types").

Oops, I'll fix this in the next version.

> The commit message and the comment in process_type() describe that each
> structure type should be expanded only once per symbol, but that doesn't
> seem to be the case?

Yup, it looks like I messed up the logic here. The ptr_depth check
should be later.

> If I follow correctly, the type_expand() logic on the output eventually
> performs only the first expansion for the CRC calculation. However, it
> looks this process_type() code should do the same, if only to be a bit
> faster. Or did I misunderstand anything how this should work?
>
> I played with the following (applies on the top of the series), which
> matches my understanding of what should happen:

This looks correct to me, and do I like the way you cleaned this up.
I'll apply this to v5. Thanks!

Sami
diff mbox series

Patch

diff --git a/scripts/gendwarfksyms/Makefile b/scripts/gendwarfksyms/Makefile
index c0d4ce50fc27..c06145d84df8 100644
--- a/scripts/gendwarfksyms/Makefile
+++ b/scripts/gendwarfksyms/Makefile
@@ -2,6 +2,7 @@ 
 hostprogs-always-y += gendwarfksyms
 
 gendwarfksyms-objs += gendwarfksyms.o
+gendwarfksyms-objs += cache.o
 gendwarfksyms-objs += die.o
 gendwarfksyms-objs += dwarf.o
 gendwarfksyms-objs += symbols.o
diff --git a/scripts/gendwarfksyms/cache.c b/scripts/gendwarfksyms/cache.c
new file mode 100644
index 000000000000..2f1517133a20
--- /dev/null
+++ b/scripts/gendwarfksyms/cache.c
@@ -0,0 +1,44 @@ 
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2024 Google LLC
+ */
+
+#include "gendwarfksyms.h"
+
+struct expanded {
+	uintptr_t addr;
+	struct hlist_node hash;
+};
+
+void __cache_mark_expanded(struct expansion_cache *ec, uintptr_t addr)
+{
+	struct expanded *es;
+
+	es = xmalloc(sizeof(struct expanded));
+	es->addr = addr;
+	hash_add(ec->cache, &es->hash, addr_hash(addr));
+}
+
+bool __cache_was_expanded(struct expansion_cache *ec, uintptr_t addr)
+{
+	struct expanded *es;
+
+	hash_for_each_possible(ec->cache, es, hash, addr_hash(addr)) {
+		if (es->addr == addr)
+			return true;
+	}
+
+	return false;
+}
+
+void cache_clear_expanded(struct expansion_cache *ec)
+{
+	struct hlist_node *tmp;
+	struct expanded *es;
+
+	hash_for_each_safe(ec->cache, es, tmp, hash) {
+		free(es);
+	}
+
+	hash_init(ec->cache);
+}
diff --git a/scripts/gendwarfksyms/dwarf.c b/scripts/gendwarfksyms/dwarf.c
index f5cebbdcc212..51dd8e82f9e7 100644
--- a/scripts/gendwarfksyms/dwarf.c
+++ b/scripts/gendwarfksyms/dwarf.c
@@ -26,6 +26,7 @@  static void process_linebreak(struct die *cache, int n)
 		       !dwarf_form##attr(&da, value);                  \
 	}
 
+DEFINE_GET_ATTR(flag, bool)
 DEFINE_GET_ATTR(udata, Dwarf_Word)
 
 static bool get_ref_die_attr(Dwarf_Die *die, unsigned int id, Dwarf_Die *value)
@@ -79,6 +80,13 @@  static bool match_export_symbol(struct state *state, Dwarf_Die *die)
 	return !!state->sym;
 }
 
+static bool is_declaration(Dwarf_Die *die)
+{
+	bool value;
+
+	return get_flag_attr(die, DW_AT_declaration, &value) && value;
+}
+
 /*
  * Type string processing
  */
@@ -455,19 +463,28 @@  static void __process_structure_type(struct state *state, struct die *cache,
 				     die_callback_t process_func,
 				     die_match_callback_t match_func)
 {
+	bool is_decl;
+
 	process(cache, type);
 	process_fqn(cache, die);
 	process(cache, " {");
 	process_linebreak(cache, 1);
 
-	check(process_die_container(state, cache, die, process_func,
-				    match_func));
+	is_decl = is_declaration(die);
+
+	if (!is_decl && state->expand.expand) {
+		cache_mark_expanded(&state->expansion_cache, die->addr);
+		check(process_die_container(state, cache, die, process_func,
+					    match_func));
+	}
 
 	process_linebreak(cache, -1);
 	process(cache, "}");
 
-	process_byte_size_attr(cache, die);
-	process_alignment_attr(cache, die);
+	if (!is_decl && state->expand.expand) {
+		process_byte_size_attr(cache, die);
+		process_alignment_attr(cache, die);
+	}
 }
 
 #define DEFINE_PROCESS_STRUCTURE_TYPE(structure)                        \
@@ -520,7 +537,7 @@  static void process_unspecified_type(struct state *state, struct die *cache,
 				     Dwarf_Die *die)
 {
 	/*
-	 * These can be emitted for stand-elone assembly code, which means we
+	 * These can be emitted for stand-alone assembly code, which means we
 	 * might run into them in vmlinux.o.
 	 */
 	process(cache, "unspecified_type");
@@ -552,6 +569,42 @@  static void process_cached(struct state *state, struct die *cache,
 	}
 }
 
+static void state_init(struct state *state)
+{
+	state->expand.expand = true;
+	state->expand.ptr_depth = 0;
+	state->expand.ptr_expansion_depth = 0;
+	hash_init(state->expansion_cache.cache);
+}
+
+static void expansion_state_restore(struct expansion_state *state,
+				    struct expansion_state *saved)
+{
+	state->expand = saved->expand;
+	state->ptr_depth = saved->ptr_depth;
+	state->ptr_expansion_depth = saved->ptr_expansion_depth;
+}
+
+static void expansion_state_save(struct expansion_state *state,
+				 struct expansion_state *saved)
+{
+	expansion_state_restore(saved, state);
+}
+
+static bool is_pointer_type(int tag)
+{
+	return tag == DW_TAG_pointer_type || tag == DW_TAG_reference_type;
+}
+
+static bool is_expanded_type(int tag)
+{
+	return tag == DW_TAG_class_type || tag == DW_TAG_structure_type ||
+	       tag == DW_TAG_union_type || tag == DW_TAG_enumeration_type;
+}
+
+/* The maximum depth for expanding structures in pointers */
+#define MAX_POINTER_EXPANSION_DEPTH 2
+
 #define PROCESS_TYPE(type)                                \
 	case DW_TAG_##type##_type:                        \
 		process_##type##_type(state, cache, die); \
@@ -559,18 +612,52 @@  static void process_cached(struct state *state, struct die *cache,
 
 static int process_type(struct state *state, struct die *parent, Dwarf_Die *die)
 {
+	enum die_state want_state = DIE_COMPLETE;
 	struct die *cache;
+	struct expansion_state saved;
 	int tag = dwarf_tag(die);
 
+	expansion_state_save(&state->expand, &saved);
+
+	/*
+	 * Structures and enumeration types are expanded only once per
+	 * exported symbol. This is sufficient for detecting ABI changes
+	 * within the structure.
+	 *
+	 * We fully expand the first pointer reference in the exported
+	 * symbol, but limit the expansion of further pointer references
+	 * to at most MAX_POINTER_EXPANSION_DEPTH levels.
+	 */
+	if (is_pointer_type(tag))
+		state->expand.ptr_depth++;
+
+	if (state->expand.ptr_depth > 0 && is_expanded_type(tag)) {
+		if (state->expand.ptr_expansion_depth >=
+			    MAX_POINTER_EXPANSION_DEPTH ||
+		    cache_was_expanded(&state->expansion_cache, die->addr))
+			state->expand.expand = false;
+
+		if (state->expand.expand)
+			state->expand.ptr_expansion_depth++;
+	}
+
 	/*
-	 * If we have the DIE already cached, use it instead of walking
+	 * If we have want_state already cached, use it instead of walking
 	 * through DWARF.
 	 */
-	cache = die_map_get(die, DIE_COMPLETE);
+	if (!state->expand.expand && is_expanded_type(tag))
+		want_state = DIE_UNEXPANDED;
+
+	cache = die_map_get(die, want_state);
+
+	if (cache->state == want_state) {
+		if (want_state == DIE_COMPLETE && is_expanded_type(tag))
+			cache_mark_expanded(&state->expansion_cache, die->addr);
 
-	if (cache->state == DIE_COMPLETE) {
 		process_cached(state, cache, die);
 		die_map_add_die(parent, cache);
+
+		expansion_state_restore(&state->expand, &saved);
 		return 0;
 	}
 
@@ -611,9 +698,10 @@  static int process_type(struct state *state, struct die *parent, Dwarf_Die *die)
 
 	/* Update cache state and append to the parent (if any) */
 	cache->tag = tag;
-	cache->state = DIE_COMPLETE;
+	cache->state = want_state;
 	die_map_add_die(parent, cache);
 
+	expansion_state_restore(&state->expand, &saved);
 	return 0;
 }
 
@@ -675,11 +763,14 @@  static int process_exported_symbols(struct state *unused, struct die *cache,
 		if (!match_export_symbol(&state, die))
 			return 0;
 
+		state_init(&state);
+
 		if (tag == DW_TAG_subprogram)
 			process_subprogram(&state, &state.die);
 		else
 			process_variable(&state, &state.die);
 
+		cache_clear_expanded(&state.expansion_cache);
 		return 0;
 	}
 	default:
diff --git a/scripts/gendwarfksyms/gendwarfksyms.h b/scripts/gendwarfksyms/gendwarfksyms.h
index f317de5b0653..6147859ae2af 100644
--- a/scripts/gendwarfksyms/gendwarfksyms.h
+++ b/scripts/gendwarfksyms/gendwarfksyms.h
@@ -104,6 +104,7 @@  struct symbol *symbol_get(const char *name);
 
 enum die_state {
 	DIE_INCOMPLETE,
+	DIE_UNEXPANDED,
 	DIE_COMPLETE,
 	DIE_LAST = DIE_COMPLETE
 };
@@ -133,6 +134,7 @@  static inline const char *die_state_name(enum die_state state)
 {
 	switch (state) {
 	CASE_CONST_TO_STR(DIE_INCOMPLETE)
+	CASE_CONST_TO_STR(DIE_UNEXPANDED)
 	CASE_CONST_TO_STR(DIE_COMPLETE)
 	}
 
@@ -155,9 +157,40 @@  void die_map_add_linebreak(struct die *pd, int linebreak);
 void die_map_add_die(struct die *pd, struct die *child);
 void die_map_free(void);
 
+/*
+ * cache.c
+ */
+
+#define EXPANSION_CACHE_HASH_BITS 11
+
+/* A cache for addresses we've already seen. */
+struct expansion_cache {
+	HASHTABLE_DECLARE(cache, 1 << EXPANSION_CACHE_HASH_BITS);
+};
+
+void __cache_mark_expanded(struct expansion_cache *ec, uintptr_t addr);
+bool __cache_was_expanded(struct expansion_cache *ec, uintptr_t addr);
+
+static inline void cache_mark_expanded(struct expansion_cache *ec, void *addr)
+{
+	__cache_mark_expanded(ec, (uintptr_t)addr);
+}
+
+static inline bool cache_was_expanded(struct expansion_cache *ec, void *addr)
+{
+	return __cache_was_expanded(ec, (uintptr_t)addr);
+}
+
+void cache_clear_expanded(struct expansion_cache *ec);
+
 /*
  * dwarf.c
  */
+struct expansion_state {
+	bool expand;
+	unsigned int ptr_depth;
+	unsigned int ptr_expansion_depth;
+};
 
 struct state {
 	struct symbol *sym;
@@ -165,6 +198,10 @@  struct state {
 
 	/* List expansion */
 	bool first_list_item;
+
+	/* Structure expansion */
+	struct expansion_state expand;
+	struct expansion_cache expansion_cache;
 };
 
 typedef int (*die_callback_t)(struct state *state, struct die *cache,