diff mbox series

[05/15] gendwarfksyms: Add a cache

Message ID 20240617175818.58219-22-samitolvanen@google.com (mailing list archive)
State New
Headers show
Series Implement MODVERSIONS for Rust | expand

Commit Message

Sami Tolvanen June 17, 2024, 5:58 p.m. UTC
Basic types in DWARF repeat frequently and traversing the DIEs using
libdw is relatively slow. Add a simple hashtable based cache for the
processed DIEs.

Signed-off-by: Sami Tolvanen <samitolvanen@google.com>
---
 tools/gendwarfksyms/Build           |   1 +
 tools/gendwarfksyms/cache.c         | 147 ++++++++++++++++++++++++++++
 tools/gendwarfksyms/gendwarfksyms.c |   4 +
 tools/gendwarfksyms/gendwarfksyms.h |  37 ++++++-
 tools/gendwarfksyms/types.c         | 140 ++++++++++++++++++--------
 5 files changed, 286 insertions(+), 43 deletions(-)
 create mode 100644 tools/gendwarfksyms/cache.c
diff mbox series

Patch

diff --git a/tools/gendwarfksyms/Build b/tools/gendwarfksyms/Build
index 518642c9b9de..220a4aa9b380 100644
--- a/tools/gendwarfksyms/Build
+++ b/tools/gendwarfksyms/Build
@@ -1,4 +1,5 @@ 
 gendwarfksyms-y += gendwarfksyms.o
+gendwarfksyms-y += cache.o
 gendwarfksyms-y += crc32.o
 gendwarfksyms-y += symbols.o
 gendwarfksyms-y += types.o
diff --git a/tools/gendwarfksyms/cache.c b/tools/gendwarfksyms/cache.c
new file mode 100644
index 000000000000..9adda113e0b6
--- /dev/null
+++ b/tools/gendwarfksyms/cache.c
@@ -0,0 +1,147 @@ 
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2024 Google LLC
+ */
+
+#include <string.h>
+#include "gendwarfksyms.h"
+
+#define DIE_HASH_BITS 10
+
+/* die->addr -> struct cached_die */
+DEFINE_HASHTABLE(die_cache, DIE_HASH_BITS);
+
+static unsigned int cache_hits;
+static unsigned int cache_misses;
+
+static int create_die(Dwarf_Die *die, struct cached_die **res)
+{
+	struct cached_die *cd;
+
+	cd = malloc(sizeof(struct cached_die));
+	if (!cd) {
+		error("malloc failed");
+		return -1;
+	}
+
+	cd->state = INCOMPLETE;
+	cd->addr = (uintptr_t)die->addr;
+	cd->list = NULL;
+
+	hash_add(die_cache, &cd->hash, cd->addr);
+	*res = cd;
+	return 0;
+}
+
+int cache_get(Dwarf_Die *die, enum cached_die_state state,
+	      struct cached_die **res)
+{
+	struct cached_die *cd;
+	uintptr_t addr = (uintptr_t)die->addr;
+
+	hash_for_each_possible(die_cache, cd, hash, addr) {
+		if (cd->addr == addr && cd->state == state) {
+			*res = cd;
+			cache_hits++;
+			return 0;
+		}
+	}
+
+	cache_misses++;
+	return check(create_die(die, res));
+}
+
+static void reset_die(struct cached_die *cd)
+{
+	struct cached_item *tmp;
+	struct cached_item *ci = cd->list;
+
+	while (ci) {
+		if (ci->type == STRING)
+			free(ci->data.str);
+
+		tmp = ci->next;
+		free(ci);
+		ci = tmp;
+	}
+
+	cd->state = INCOMPLETE;
+	cd->list = NULL;
+}
+
+void cache_free(void)
+{
+	struct cached_die *cd;
+	struct hlist_node *tmp;
+	int i;
+
+	hash_for_each_safe(die_cache, i, tmp, cd, hash) {
+		reset_die(cd);
+		free(cd);
+	}
+	hash_init(die_cache);
+
+	if ((cache_hits + cache_misses > 0))
+		debug("cache: hits %u, misses %u (hit rate %.02f%%)",
+		      cache_hits, cache_misses,
+		      (100.0f * cache_hits) / (cache_hits + cache_misses));
+}
+
+static int append_item(struct cached_die *cd, struct cached_item **res)
+{
+	struct cached_item *prev;
+	struct cached_item *ci;
+
+	ci = malloc(sizeof(struct cached_item));
+	if (!ci) {
+		error("malloc failed");
+		return -1;
+	}
+
+	ci->type = EMPTY;
+	ci->next = NULL;
+
+	prev = cd->list;
+	while (prev && prev->next)
+		prev = prev->next;
+
+	if (prev)
+		prev->next = ci;
+	else
+		cd->list = ci;
+
+	*res = ci;
+	return 0;
+}
+
+int cache_add_string(struct cached_die *cd, const char *str)
+{
+	struct cached_item *ci;
+
+	if (!cd)
+		return 0;
+
+	check(append_item(cd, &ci));
+
+	ci->data.str = strdup(str);
+	if (!ci->data.str) {
+		error("strdup failed");
+		return -1;
+	}
+
+	ci->type = STRING;
+	return 0;
+}
+
+int cache_add_die(struct cached_die *cd, Dwarf_Die *die)
+{
+	struct cached_item *ci;
+
+	if (!cd)
+		return 0;
+
+	check(append_item(cd, &ci));
+	ci->data.addr = (uintptr_t)die->addr;
+	ci->type = DIE;
+	return 0;
+}
diff --git a/tools/gendwarfksyms/gendwarfksyms.c b/tools/gendwarfksyms/gendwarfksyms.c
index 7938b7440674..38ccaeb72426 100644
--- a/tools/gendwarfksyms/gendwarfksyms.c
+++ b/tools/gendwarfksyms/gendwarfksyms.c
@@ -15,12 +15,15 @@ 
 
 /* Print type descriptions and debugging output to stderr */
 bool debug;
+/* Don't use caching */
+bool no_cache;
 
 static const struct {
 	const char *arg;
 	bool *flag;
 } options[] = {
 	{ "--debug", &debug },
+	{ "--no-cache", &no_cache },
 };
 
 static int usage(void)
@@ -126,6 +129,7 @@  int main(int argc, const char **argv)
 
 	dwfl_end(dwfl);
 	symbol_print_versions();
+	cache_free();
 
 	return 0;
 }
diff --git a/tools/gendwarfksyms/gendwarfksyms.h b/tools/gendwarfksyms/gendwarfksyms.h
index 7440f1c73500..ea5a9fbda66f 100644
--- a/tools/gendwarfksyms/gendwarfksyms.h
+++ b/tools/gendwarfksyms/gendwarfksyms.h
@@ -18,6 +18,7 @@ 
  * Options -- in gendwarfksyms.c
  */
 extern bool debug;
+extern bool no_cache;
 
 /*
  * Output helpers
@@ -69,6 +70,35 @@  extern int symbol_read_list(FILE *file);
 extern struct symbol *symbol_get(uintptr_t addr, const char *name);
 extern void symbol_print_versions(void);
 
+/*
+ * cache.c
+ */
+enum cached_item_type { EMPTY, STRING, DIE };
+
+struct cached_item {
+	enum cached_item_type type;
+	union {
+		char *str;
+		uintptr_t addr;
+	} data;
+	struct cached_item *next;
+};
+
+enum cached_die_state { INCOMPLETE, COMPLETE };
+
+struct cached_die {
+	enum cached_die_state state;
+	uintptr_t addr;
+	struct cached_item *list;
+	struct hlist_node hash;
+};
+
+extern int cache_get(Dwarf_Die *die, enum cached_die_state state,
+		     struct cached_die **res);
+extern int cache_add_string(struct cached_die *pd, const char *str);
+extern int cache_add_die(struct cached_die *pd, Dwarf_Die *die);
+extern void cache_free(void);
+
 /*
  * types.c
  */
@@ -81,12 +111,13 @@  struct state {
 	unsigned long crc;
 };
 
-typedef int (*die_callback_t)(struct state *state, Dwarf_Die *die);
+typedef int (*die_callback_t)(struct state *state, struct cached_die *cache,
+			      Dwarf_Die *die);
 typedef bool (*die_match_callback_t)(Dwarf_Die *die);
 extern bool match_all(Dwarf_Die *die);
 
-extern int process_die_container(struct state *state, Dwarf_Die *die,
-				 die_callback_t func,
+extern int process_die_container(struct state *state, struct cached_die *cache,
+				 Dwarf_Die *die, die_callback_t func,
 				 die_match_callback_t match);
 
 extern int process_module(Dwfl_Module *mod, Dwarf *dbg, Dwarf_Die *cudie);
diff --git a/tools/gendwarfksyms/types.c b/tools/gendwarfksyms/types.c
index 7508d0fb8b80..78046c08be23 100644
--- a/tools/gendwarfksyms/types.c
+++ b/tools/gendwarfksyms/types.c
@@ -72,7 +72,7 @@  static Dwarf_Die *get_exported(struct state *state, Dwarf_Die *die)
 /*
  * Type string and CRC processing
  */
-static int process(struct state *state, const char *s)
+static int process(struct state *state, struct cached_die *cache, const char *s)
 {
 	s = s ?: "<null>";
 
@@ -80,12 +80,13 @@  static int process(struct state *state, const char *s)
 		fputs(s, stderr);
 
 	state->crc = partial_crc32(s, state->crc);
-	return 0;
+	return cache_add_string(cache, s);
 }
 
 #define MAX_FMT_BUFFER_SIZE 128
 
-static int process_fmt(struct state *state, const char *fmt, ...)
+static int process_fmt(struct state *state, struct cached_die *cache,
+		       const char *fmt, ...)
 {
 	char buf[MAX_FMT_BUFFER_SIZE];
 	va_list args;
@@ -98,7 +99,7 @@  static int process_fmt(struct state *state, const char *fmt, ...)
 		error("vsnprintf overflow: increase MAX_FMT_BUFFER_SIZE");
 		res = -1;
 	} else {
-		check(process(state, buf));
+		check(process(state, cache, buf));
 	}
 
 	va_end(args);
@@ -106,7 +107,8 @@  static int process_fmt(struct state *state, const char *fmt, ...)
 }
 
 /* Process a fully qualified name from DWARF scopes */
-static int process_fqn(struct state *state, Dwarf_Die *die)
+static int process_fqn(struct state *state, struct cached_die *cache,
+		       Dwarf_Die *die)
 {
 	Dwarf_Die *scopes = NULL;
 	const char *name;
@@ -117,7 +119,7 @@  static int process_fqn(struct state *state, Dwarf_Die *die)
 	if (!res) {
 		name = get_name(die);
 		name = name ?: "<unnamed>";
-		return check(process(state, name));
+		return check(process(state, cache, name));
 	}
 
 	for (i = res - 1; i >= 0; i--) {
@@ -126,25 +128,25 @@  static int process_fqn(struct state *state, Dwarf_Die *die)
 
 		name = get_name(&scopes[i]);
 		name = name ?: "<unnamed>";
-		check(process(state, name));
+		check(process(state, cache, name));
 		if (i > 0)
-			check(process(state, "::"));
+			check(process(state, cache, "::"));
 	}
 
 	free(scopes);
 	return 0;
 }
 
-#define DEFINE_PROCESS_UDATA_ATTRIBUTE(attribute)                         \
-	static int process_##attribute##_attr(struct state *state,        \
-					      Dwarf_Die *die)             \
-	{                                                                 \
-		Dwarf_Word value;                                         \
-		if (get_udata_attr(die, DW_AT_##attribute, &value))       \
-			check(process_fmt(state,                          \
-					  " " #attribute "(%" PRIu64 ")", \
-					  value));                        \
-		return 0;                                                 \
+#define DEFINE_PROCESS_UDATA_ATTRIBUTE(attribute)                              \
+	static int process_##attribute##_attr(                                 \
+		struct state *state, struct cached_die *cache, Dwarf_Die *die) \
+	{                                                                      \
+		Dwarf_Word value;                                              \
+		if (get_udata_attr(die, DW_AT_##attribute, &value))            \
+			check(process_fmt(state, cache,                        \
+					  " " #attribute "(%" PRIu64 ")",      \
+					  value));                             \
+		return 0;                                                      \
 	}
 
 DEFINE_PROCESS_UDATA_ATTRIBUTE(alignment)
@@ -155,8 +157,9 @@  bool match_all(Dwarf_Die *die)
 	return true;
 }
 
-int process_die_container(struct state *state, Dwarf_Die *die,
-			  die_callback_t func, die_match_callback_t match)
+int process_die_container(struct state *state, struct cached_die *cache,
+			  Dwarf_Die *die, die_callback_t func,
+			  die_match_callback_t match)
 {
 	Dwarf_Die current;
 	int res;
@@ -164,32 +167,65 @@  int process_die_container(struct state *state, Dwarf_Die *die,
 	res = check(dwarf_child(die, &current));
 	while (!res) {
 		if (match(&current))
-			check(func(state, &current));
+			check(func(state, cache, &current));
 		res = check(dwarf_siblingof(&current, &current));
 	}
 
 	return 0;
 }
 
-static int process_type(struct state *state, Dwarf_Die *die);
+static int process_type(struct state *state, struct cached_die *parent,
+			Dwarf_Die *die);
 
-static int process_type_attr(struct state *state, Dwarf_Die *die)
+static int process_type_attr(struct state *state, struct cached_die *cache,
+			     Dwarf_Die *die)
 {
 	Dwarf_Die type;
 
 	if (get_ref_die_attr(die, DW_AT_type, &type))
-		return check(process_type(state, &type));
+		return check(process_type(state, cache, &type));
 
 	/* Compilers can omit DW_AT_type -- print out 'void' to clarify */
-	return check(process(state, "base_type void"));
+	return check(process(state, cache, "base_type void"));
+}
+
+static int process_base_type(struct state *state, struct cached_die *cache,
+			     Dwarf_Die *die)
+{
+	check(process(state, cache, "base_type "));
+	check(process_fqn(state, cache, die));
+	check(process_byte_size_attr(state, cache, die));
+	return check(process_alignment_attr(state, cache, die));
 }
 
-static int process_base_type(struct state *state, Dwarf_Die *die)
+static int process_cached(struct state *state, struct cached_die *cache,
+			  Dwarf_Die *die)
 {
-	check(process(state, "base_type "));
-	check(process_fqn(state, die));
-	check(process_byte_size_attr(state, die));
-	return check(process_alignment_attr(state, die));
+	struct cached_item *ci = cache->list;
+	Dwarf_Die child;
+
+	while (ci) {
+		switch (ci->type) {
+		case STRING:
+			check(process(state, NULL, ci->data.str));
+			break;
+		case DIE:
+			if (!dwarf_die_addr_die(state->dbg,
+						(void *)ci->data.addr,
+						&child)) {
+				error("dwarf_die_addr_die failed");
+				return -1;
+			}
+			check(process_type(state, NULL, &child));
+			break;
+		default:
+			error("empty cached_item");
+			return -1;
+		}
+		ci = ci->next;
+	}
+
+	return 0;
 }
 
 static void state_init(struct state *state)
@@ -197,19 +233,41 @@  static void state_init(struct state *state)
 	state->crc = 0xffffffff;
 }
 
-static int process_type(struct state *state, Dwarf_Die *die)
+static int process_type(struct state *state, struct cached_die *parent,
+			Dwarf_Die *die)
 {
+	struct cached_die *cache = NULL;
 	int tag = dwarf_tag(die);
 
+	/*
+	 * If we have the DIE already cached, use it instead of walking
+	 * through DWARF.
+	 */
+	if (!no_cache) {
+		check(cache_get(die, COMPLETE, &cache));
+
+		if (cache->state == COMPLETE) {
+			check(process_cached(state, cache, die));
+			check(cache_add_die(parent, die));
+			return 0;
+		}
+	}
+
 	switch (tag) {
 	case DW_TAG_base_type:
-		check(process_base_type(state, die));
+		check(process_base_type(state, cache, die));
 		break;
 	default:
 		debug("unimplemented type: %x", tag);
 		break;
 	}
 
+	if (!no_cache) {
+		/* Update cache state and append to the parent (if any) */
+		cache->state = COMPLETE;
+		check(cache_add_die(parent, die));
+	}
+
 	return 0;
 }
 
@@ -218,17 +276,18 @@  static int process_type(struct state *state, Dwarf_Die *die)
  */
 static int process_subprogram(struct state *state, Dwarf_Die *die)
 {
-	return check(process(state, "subprogram;\n"));
+	return check(process(state, NULL, "subprogram;\n"));
 }
 
 static int process_variable(struct state *state, Dwarf_Die *die)
 {
-	check(process(state, "variable "));
-	check(process_type_attr(state, die));
-	return check(process(state, ";\n"));
+	check(process(state, NULL, "variable "));
+	check(process_type_attr(state, NULL, die));
+	return check(process(state, NULL, ";\n"));
 }
 
-static int process_exported_symbols(struct state *state, Dwarf_Die *die)
+static int process_exported_symbols(struct state *state,
+				    struct cached_die *cache, Dwarf_Die *die)
 {
 	int tag = dwarf_tag(die);
 
@@ -237,8 +296,9 @@  static int process_exported_symbols(struct state *state, Dwarf_Die *die)
 	case DW_TAG_namespace:
 	case DW_TAG_class_type:
 	case DW_TAG_structure_type:
-		return check(process_die_container(
-			state, die, process_exported_symbols, match_all));
+		return check(process_die_container(state, cache, die,
+						   process_exported_symbols,
+						   match_all));
 
 	/* Possible exported symbols */
 	case DW_TAG_subprogram:
@@ -271,5 +331,5 @@  int process_module(Dwfl_Module *mod, Dwarf *dbg, Dwarf_Die *cudie)
 	struct state state = { .mod = mod, .dbg = dbg };
 
 	return check(process_die_container(
-		&state, cudie, process_exported_symbols, match_all));
+		&state, NULL, cudie, process_exported_symbols, match_all));
 }