diff mbox series

[v9,6/8] kallsyms: distinguish text symbols fully using object file names

Message ID 20221027195754.100039-7-nick.alcock@oracle.com (mailing list archive)
State Not Applicable
Headers show
Series [v9,1/8] kbuild: bring back tristate.conf | expand

Commit Message

Nick Alcock Oct. 27, 2022, 7:57 p.m. UTC
The commits before this one allow you to distinguish identically-named
text symbols located in different built-in object files from each other;
but identically-named symbols can appear in any object files at all,
including object files that cannot be built as modules.

We already have nearly all the machinery to disambiguate these symbols
as well.  Since any given object file can contain at most one definition
of a given symbol, it suffices to name the object files containing any
symbol which is otherwise ambiguous.  (No others need be named, saving
a bunch of space).

We associate address ranges with object file names using a new
.kallsyms_objfiles section just like the previously-added
.kallsyms_modules section.

But that's not quite enough.  Even the object file name is ambiguous in
some cases: e.g. there are a lot of files named "core.o" in the kernel.
We could just store the full pathname for every object file, but this is
needlessly wasteful: doing this eats more than 50KiB in object file
names alone, and nearly all the content of every name is repeated for
many names.  But if we store the object file names in the same section
as the built-in module names, drop the .o, and store minimal path
suffixes, we can save almost all that space.  (For example, "core.o"
would be stored as "core" unless there are ambiguous symbols in two
different object files both named "core", in which case they'd be named
"sched/core" and "futex/core", etc, possibly re-extending to
"kernel/sched/core" if still ambiguous).

We do this by a repeated-rehashing process. First, we compute a hash
value for symbol\0modhash for every symbol (the modhash is ignored if
this is a built-in symbol).  Any two symbols with the same such hash are
identically-named: add the maximally-shortened (one-component,
.o-stripped) object file name for all such symbols, and rehash, this
time hashing symbol\0objname\0modhash.  Any two symbols which still have
the same hash are still ambiguous: lengthen the name given to one of the
symbols' object files and repeat.  Eventually, all the ambiguity will go
away.  (We do have to take care not to re-lengthen anything we already
lengthened in any given hashing round.)

This involves multiple sorting passes but the impact on compilation time
appears to be nearly zero, and the impact on space in the running kernel
is noticeable: only a few dozen names need lengthening, so we can
completely ignore the overhead from storing repeated path components
because there are hardly any of them.

But that's not all.  We can also do similar optimization tricks to what
was done with .kallsyms_modules, reusing module names and names of
already-emitted object files: so any given object file name only appears
once in the strtab, and can be cited by many address ranges and even by
module entries.

Put all this together and the net overhead of this in my testing is
about 3KiB of new object file names in the .kallsyms_mod_objnames table
and 6KiB for the .kallsyms_objfiles table (mostly zeroes: in future
maybe we can find a way to elide some of those, but 6KiB is small enough
that it's not worth taking too much effort).

No ambiguous textual symbols remain outside actual modules (which can
still contain identically-named symbols in different object files
because kallsyms doesn't run over them so none of these tables can be
built for them.  At least, it doesn't yet.)

Signed-off-by: Nick Alcock <nick.alcock@oracle.com>
Reviewed-by: Kris Van Hees <kris.van.hees@oracle.com>
---

Notes:
    v9: new.

 scripts/kallsyms.c | 559 +++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 546 insertions(+), 13 deletions(-)
diff mbox series

Patch

diff --git a/scripts/kallsyms.c b/scripts/kallsyms.c
index f89f569eb3c9..ffb69a8f6ff8 100644
--- a/scripts/kallsyms.c
+++ b/scripts/kallsyms.c
@@ -113,6 +113,9 @@  static unsigned int memhash(char *s, size_t len)
 	return hash;
 }
 
+/*
+ * Object file -> module and shortened object file name tracking machinery.
+ */
 #define OBJ2MOD_BITS 10
 #define OBJ2MOD_N (1 << OBJ2MOD_BITS)
 #define OBJ2MOD_MASK (OBJ2MOD_N - 1)
@@ -143,15 +146,40 @@  struct obj2mod_elem {
 	struct obj2mod_elem *mod2obj_next;
 };
 
+/*
+ * Shortened object file names.  These are only ever consulted after checking
+ * the obj2mod hashes: names that already exist in there are used directly from
+ * there (pointed to via the mod_xref field) rather than being re-emitted.
+ * Entries that do not exist there are added to the end of the mod_objnames
+ * list.
+ */
+struct obj2short_elem {
+	const char *obj;
+	char *desuffixed;	/* objname sans suffix */
+	const char *short_obj;	/* shortened at / and suffix */
+	int short_offset;	/* offset of short name in .kallsyms_mod_objnames */
+	int last_rehash;	/* used during disambiguate_hash_syms */
+
+	struct obj2mod_elem *mod_xref;
+	struct obj2short_elem *short_xref;
+	struct obj2short_elem *short_next;
+};
+
 /*
  * Map from object files to obj2mod entries (a unique mapping), and vice versa
  * (not unique, but entries for objfiles in more than one module in this hash
- * are ignored).
+ * are ignored); also map from object file names to shortened names for them
+ * (also unique: there is no point storing both longer and shorter forms of one
+ * name, so if a longer name is needed we consistently use it instead of the
+ * shorter form.)
+ *
+ * obj2short is populated very late, at disambiguate_syms time.
  */
 
 static struct obj2mod_elem *obj2mod[OBJ2MOD_N];
 static struct obj2mod_elem *mod2obj[OBJ2MOD_N];
-static size_t num_objfiles;
+static struct obj2short_elem *obj2short[OBJ2MOD_N];
+static size_t num_objfiles, num_shortnames;
 
 /*
  * An ordered list of address ranges and the objfile that occupies that range.
@@ -165,6 +193,9 @@  struct addrmap_entry {
 static struct addrmap_entry *addrmap;
 static int addrmap_num, addrmap_alloced;
 
+static void disambiguate_syms(void);
+static void optimize_objnames(void);
+
 static void obj2mod_init(void)
 {
 	memset(obj2mod, 0, sizeof(obj2mod));
@@ -182,6 +213,18 @@  static struct obj2mod_elem *obj2mod_get(const char *obj)
 	return NULL;
 }
 
+static struct obj2short_elem *obj2short_get(const char *obj)
+{
+	int i = strhash(obj) & OBJ2MOD_MASK;
+	struct obj2short_elem *elem;
+
+	for (elem = obj2short[i]; elem; elem = elem->short_next) {
+		if (strcmp(elem->obj, obj) == 0)
+			return elem;
+	}
+	return NULL;
+}
+
 /*
  * Note that a given object file is found in some module, interning it in the
  * obj2mod hash.  Should not be called more than once for any given (module,
@@ -254,6 +297,12 @@  static int qmodhash(const void *a, const void *b)
 	return 0;
 }
 
+static int qobj2short(const void *a, const void *b)
+{
+	return strcmp((*(struct obj2short_elem **)a)->short_obj,
+		      (*(struct obj2short_elem **)b)->short_obj);
+}
+
 /*
  * Associate all object files in obj2mod which refer to the same module with a
  * single obj2mod entry for emission, preferring to point into the module list
@@ -393,6 +442,336 @@  static void optimize_obj2mod(void)
 	fprintf(stderr, "kallsyms: out of memory optimizing module list\n");
 	exit(EXIT_FAILURE);
 }
+
+/*
+ * Associate all short-name entries in obj2short that refer to the same short
+ * name with a single entry for emission, either (preferably) a module that
+ * shares that name or (alternatively) the first obj2short entry referencing
+ * that name.
+ */
+static void optimize_objnames(void)
+{
+	size_t i;
+	size_t num_objnames = 0;
+	struct obj2short_elem *elem;
+	struct obj2short_elem **uniq;
+	struct obj2short_elem *last;
+
+	uniq = malloc(sizeof(struct obj2short_elem *) * num_shortnames);
+	if (uniq == NULL) {
+		fprintf(stderr, "kallsyms: out of memory optimizing object file name list\n");
+		exit(EXIT_FAILURE);
+	}
+
+	/*
+	 * Much like optimize_obj2mod, except there is no need to canonicalize
+	 * anything or handle multimodule entries, and we need to chase down
+	 * possible entries in mod2obj first (so as not to duplicate them in the
+	 * final kallsyms_mod_objnames strtab).
+	 */
+	for (i = 0; i < OBJ2MOD_N; i++)
+		for (elem = obj2short[i]; elem; elem = elem->short_next)
+			uniq[num_objnames++] = elem;
+
+	qsort(uniq, num_objnames, sizeof(struct obj2short_elem *), qobj2short);
+
+	for (i = 0, last = NULL; i < num_objnames; i++) {
+		int h = strhash(uniq[i]->short_obj) & OBJ2MOD_MASK;
+		struct obj2mod_elem *mod_elem;
+
+		for (mod_elem = mod2obj[h]; mod_elem;
+		     mod_elem = mod_elem->mod2obj_next) {
+			/*
+			 * mod_elem entries are only valid if they are for
+			 * single-module objfiles: see obj2mod_add
+			 */
+			if (mod_elem->nmods > 1)
+				continue;
+
+			if (strcmp(mod_elem->mods, uniq[i]->short_obj) != 0)
+				continue;
+			uniq[i]->mod_xref = mod_elem;
+			break;
+		}
+
+		/*
+		 * Only look for a short_xref match if we don't already have one
+		 * in mod_xref.  (This means that multiple objfiles with the
+		 * same short name that is also a module name all chain directly
+		 * to the module name via mod_xref, rather than going through a
+		 * chain of short_xrefs.)
+		 */
+		if (uniq[i]->mod_xref)
+			continue;
+
+		if (last != NULL && strcmp(last->short_obj,
+					   uniq[i]->short_obj) == 0) {
+			uniq[i]->short_xref = last;
+			continue;
+		}
+
+		last = uniq[i];
+	}
+
+	free(uniq);
+}
+
+/*
+ * Used inside disambiguate_syms to identify colliding symbols.  We spot this by
+ * hashing symbol\0modhash (or just the symbol name if this is in the core
+ * kernel) and seeing if that collides.  (This means we don't need to bother
+ * canonicalizing the module list, since optimize_obj2mod already did it for
+ * us.)
+ *
+ * If that collides, we try disambiguating by adding ever-longer pieces of the
+ * object file name before the modhash until we no longer collide.  The result
+ * of this repeated addition becomes the obj2short hashtab.
+ */
+struct sym_maybe_collides {
+	struct sym_entry *sym;
+	struct addrmap_entry *addr;
+	struct obj2short_elem *short_objname;
+	unsigned int symhash;
+};
+
+static int qsymhash(const void *a, const void *b)
+{
+	const struct sym_maybe_collides *el_a = a;
+	const struct sym_maybe_collides *el_b = b;
+	if (el_a->symhash < el_b->symhash)
+		return -1;
+	else if (el_a->symhash > el_b->symhash)
+		return 1;
+	return 0;
+}
+
+static int find_addrmap(const void *a, const void *b)
+{
+	const struct sym_entry *sym = a;
+	const struct addrmap_entry *map = b;
+
+	if (sym->addr < map->addr)
+		return -1;
+	else if (sym->addr >= map->end_addr)
+		return 1;
+	return 0;
+}
+
+/*
+ * Allocate or lengthen an object file name for a symbol that needs it.
+ */
+static int lengthen_short_name(struct sym_maybe_collides *sym, int hash_cycle)
+{
+	struct obj2short_elem *short_objname = obj2short_get(sym->addr->obj);
+
+	if (!short_objname) {
+		int i = strhash(sym->addr->obj) & OBJ2MOD_MASK;
+		char *p;
+
+		short_objname = malloc(sizeof(struct obj2short_elem));
+		if (short_objname == NULL)
+			goto oom;
+
+		/*
+		 * New symbol: try maximal shortening, which is just the object
+		 * file name (no directory) with the suffix removed (the suffix
+		 * is useless for disambiguation since it is almost always .o).
+		 *
+		 * Add a bit of paranoia to allow for names starting with /,
+		 * ending with ., and names with no suffix.  (At least two of
+		 * these are most unlikely, but possible.)
+		 */
+
+		memset(short_objname, 0, sizeof(struct obj2short_elem));
+		short_objname->obj = sym->addr->obj;
+
+		p = strrchr(sym->addr->obj, '.');
+		if (p)
+			short_objname->desuffixed = strndup(sym->addr->obj,
+							    p - sym->addr->obj);
+		else
+			short_objname->desuffixed = strdup(sym->addr->obj);
+
+		if (short_objname->desuffixed == NULL)
+			goto oom;
+
+		p = strrchr(short_objname->desuffixed, '/');
+		if (p && p[1] != 0)
+			short_objname->short_obj = p + 1;
+		else
+			short_objname->short_obj = short_objname->desuffixed;
+
+		short_objname->short_next = obj2short[i];
+		short_objname->last_rehash = hash_cycle;
+		obj2short[i] = short_objname;
+
+		num_shortnames++;
+		return 1;
+	}
+
+	/*
+	 * Objname already lengthened by a previous symbol clash: do nothing
+	 * until we rehash again.
+	 */
+	if (short_objname->last_rehash == hash_cycle)
+		return 0;
+	short_objname->last_rehash = hash_cycle;
+
+	/*
+	 * Existing symbol: lengthen the objname we already have.
+	 */
+
+	if (short_objname->desuffixed == short_objname->short_obj) {
+		fprintf(stderr, "Cannot disambiguate %s: objname %s is "
+			"max-length but still colliding\n",
+			sym->sym->sym, short_objname->short_obj);
+		return 0;
+	}
+
+	/*
+	 * Allow for absolute paths, where the first byte is '/'.
+	 */
+
+	if (short_objname->desuffixed >= short_objname->short_obj - 2)
+		short_objname->short_obj = short_objname->desuffixed;
+	else {
+		for (short_objname->short_obj -= 2;
+		     short_objname->short_obj > short_objname->desuffixed &&
+		     *short_objname->short_obj != '/';
+		     short_objname->short_obj--);
+
+		if (*short_objname->short_obj == '/')
+			short_objname->short_obj++;
+	}
+	return 1;
+ oom:
+	fprintf(stderr, "Out of memory disambiguating syms\n");
+	exit(EXIT_FAILURE);
+}
+
+/*
+ * Do one round of disambiguation-check symbol hashing, factoring in the current
+ * set of applicable shortened object file names for those symbols that need
+ * them.
+ */
+static void disambiguate_hash_syms(struct sym_maybe_collides *syms)
+{
+	size_t i;
+	for (i = 0; i < table_cnt; i++) {
+		struct obj2short_elem *short_objname = NULL;
+		char *tmp, *p;
+		size_t tmp_size;
+
+		if (syms[i].sym == NULL) {
+			syms[i].symhash = 0;
+			continue;
+		}
+
+		short_objname = obj2short_get(syms[i].addr->obj);
+
+		tmp_size = strlen((char *) &(syms[i].sym->sym[1])) + 1;
+
+		if (short_objname)
+			tmp_size += strlen(short_objname->short_obj) + 1;
+
+		if (syms[i].addr->objfile)
+			tmp_size += sizeof(syms[i].addr->objfile->modhash);
+
+		tmp = malloc(tmp_size);
+		if (tmp == NULL) {
+			fprintf(stderr, "Out of memory disambiguating syms\n");
+			exit(EXIT_FAILURE);
+		}
+
+		p = stpcpy(tmp, (char *) &(syms[i].sym->sym[1]));
+		p++;
+		if (short_objname) {
+			p = stpcpy(p, short_objname->short_obj);
+			p++;
+		}
+		if (syms[i].addr->objfile)
+			memcpy(p, &(syms[i].addr->objfile->modhash),
+			       sizeof(syms[i].addr->objfile->modhash));
+
+		syms[i].symhash = memhash(tmp, tmp_size);
+		free(tmp);
+	}
+
+	qsort(syms, table_cnt, sizeof (struct sym_maybe_collides), qsymhash);
+}
+
+/*
+ * Figure out which object file names are necessary to disambiguate all symbols
+ * in the linked kernel: transform them for minimum length while retaining
+ * disambiguity: point to them in obj2short.
+ */
+static void disambiguate_syms(void)
+{
+	size_t i;
+	int retry;
+	int hash_cycle = 0;
+	unsigned int lasthash;
+	struct sym_maybe_collides *syms;
+
+	syms = calloc(table_cnt, sizeof(struct sym_maybe_collides));
+
+	if (syms == NULL)
+		goto oom;
+
+	/*
+	 * Initial table population: symbol-dependent things not affected by
+	 * disambiguation rounds.
+	 */
+	for (i = 0; i < table_cnt; i++) {
+		struct addrmap_entry *addr;
+
+		/*
+		 * Only bother doing anything for function symbols.
+		 */
+		if (table[i]->sym[0] != 't' && table[i]->sym[0] != 'T' &&
+		    table[i]->sym[0] != 'w' && table[i]->sym[0] != 'W')
+			continue;
+
+		addr = bsearch(table[i], addrmap, addrmap_num,
+			       sizeof(struct addrmap_entry), find_addrmap);
+
+		/*
+		 * Some function symbols (section start symbols, discarded
+		 * non-text-range symbols, etc) don't appear in the linker map
+		 * at all.
+		 */
+		if (addr == NULL)
+			continue;
+
+		syms[i].sym = table[i];
+		syms[i].addr = addr;
+	}
+
+	do {
+		hash_cycle++;
+		retry = 0;
+		lasthash = 0;
+		disambiguate_hash_syms(syms);
+
+		for (i = 0; i < table_cnt; i++) {
+			if (syms[i].sym == NULL)
+				continue;
+			if (syms[i].symhash == lasthash) {
+				if (lengthen_short_name(&syms[i], hash_cycle))
+					retry = 1;
+			}
+			lasthash = syms[i].symhash;
+		}
+	} while (retry);
+
+	free(syms);
+	return;
+ oom:
+	fprintf(stderr, "kallsyms: out of memory disambiguating syms\n");
+	exit(EXIT_FAILURE);
+
+}
+
 #endif /* CONFIG_KALLMODSYMS */
 
 static void usage(void)
@@ -424,6 +803,7 @@  static bool is_ignored_symbol(const char *name, char type)
 		"kallsyms_relative_base",
 		"kallsyms_num_syms",
 		"kallsyms_num_modules",
+		"kallsyms_num_objfiles",
 		"kallsyms_names",
 		"kallsyms_markers",
 		"kallsyms_token_table",
@@ -431,6 +811,7 @@  static bool is_ignored_symbol(const char *name, char type)
 		"kallsyms_module_offsets",
 		"kallsyms_module_addresses",
 		"kallsyms_modules",
+		"kallsyms_objfiles",
 		"kallsyms_mod_objnames",
 		"kallsyms_mod_objnames_len",
 		/* Exclude linker generated symbols which vary between passes */
@@ -700,6 +1081,7 @@  static void output_address(unsigned long long addr)
 static void output_kallmodsyms_mod_objnames(void)
 {
 	struct obj2mod_elem *elem;
+	struct obj2short_elem *short_elem;
 	size_t offset = 1;
 	size_t i;
 
@@ -755,15 +1137,75 @@  static void output_kallmodsyms_mod_objnames(void)
 			}
 		}
 	}
+
+	/*
+	 * Module names are done; now emit objfile names that don't match
+	 * objfile names.  They go in the same section to enable deduplication
+	 * between (maximally-shortened) objfile names and module names.
+	 * (This is another reason why objfile names drop the suffix.)
+	 */
+	for (i = 0; i < OBJ2MOD_N; i++) {
+		for (short_elem = obj2short[i]; short_elem;
+		     short_elem = short_elem->short_next) {
+
+			/* Already emitted? */
+			if (short_elem->mod_xref)
+				continue;
+
+			if (short_elem->short_xref)
+				short_elem = short_elem->short_xref;
+
+			if (short_elem->short_offset != 0)
+				continue;
+
+			printf("/* 0x%lx: shortened from %s */\n", offset,
+			       short_elem->obj);
+
+			short_elem->short_offset = offset;
+			printf("\t.asciz\t\"%s\"\n", short_elem->short_obj);
+			offset += strlen(short_elem->short_obj) + 1;
+		}
+	}
+
 	printf("\n");
 	output_label("kallsyms_mod_objnames_len");
 	printf("\t.long\t%zi\n", offset);
 }
 
+/*
+ * Return 1 if this address range cites the same built-in module and objfile
+ * name as the previous one.
+ */
+static int same_kallmodsyms_range(int i)
+{
+	struct obj2short_elem *last_short;
+	struct obj2short_elem *this_short;
+	if (i == 0)
+		return 0;
+
+	last_short = obj2short_get(addrmap[i-1].obj);
+	this_short = obj2short_get(addrmap[i].obj);
+
+	if (addrmap[i-1].objfile == addrmap[i].objfile) {
+
+		if ((last_short == NULL && this_short != NULL) ||
+		    (last_short != NULL && this_short == NULL))
+			return 0;
+
+		if (last_short == NULL && this_short == NULL)
+			return 1;
+
+		if (strcmp(last_short->short_obj, this_short->short_obj) == 0)
+			return 1;
+	}
+	return 0;
+}
+
 static void output_kallmodsyms_objfiles(void)
 {
 	size_t i = 0;
 	size_t emitted_offsets = 0;
+	size_t emitted_modules = 0;
 	size_t emitted_objfiles = 0;
 
 	if (base_relative)
@@ -775,12 +1217,15 @@  static void output_kallmodsyms_objfiles(void)
 		long long offset;
 		int overflow;
 
-                /*
-                 * Fuse consecutive address ranges citing the same object file
-                 * into one.
-                 */
-                if (i > 0 && addrmap[i-1].objfile == addrmap[i].objfile)
-                        continue;
+		printf("/* 0x%llx--0x%llx: %s */\n", addrmap[i].addr,
+		       addrmap[i].end_addr, addrmap[i].obj);
+
+		/*
+		 * Fuse consecutive address ranges citing the same built-in
+		 * module and objfile name into one.
+		 */
+		if (same_kallmodsyms_range(i))
+			continue;
 
 		if (base_relative) {
 			if (!absolute_percpu) {
@@ -807,11 +1252,12 @@  static void output_kallmodsyms_objfiles(void)
 
 	for (i = 0; i < addrmap_num; i++) {
 		struct obj2mod_elem *elem = addrmap[i].objfile;
+		struct obj2mod_elem *orig_elem = NULL;
 		int orig_nmods;
 		const char *orig_modname;
 		int mod_offset;
 
-		if (i > 0 && addrmap[i-1].objfile == addrmap[i].objfile)
+		if (same_kallmodsyms_range(i))
 			continue;
 
 		/*
@@ -819,8 +1265,10 @@  static void output_kallmodsyms_objfiles(void)
 		 * built-in module.
 		 */
 		if (addrmap[i].objfile == NULL) {
+			printf("/* 0x%llx--0x%llx: %s: built-in */\n",
+			       addrmap[i].addr, addrmap[i].end_addr, addrmap[i].obj);
 			printf("\t.long\t0x0\n");
-			emitted_objfiles++;
+			emitted_modules++;
 			continue;
 		}
 
@@ -835,8 +1283,10 @@  static void output_kallmodsyms_objfiles(void)
 		 * always points at the start of the xref target, so its offset
 		 * can be used as is.
 		 */
-		if (elem->xref)
+		if (elem->xref) {
+			orig_elem = elem;
 			elem = elem->xref;
+		}
 
 		if (elem->nmods == 1 || orig_nmods > 1) {
 
@@ -872,6 +1322,19 @@  static void output_kallmodsyms_objfiles(void)
 			 * the multimodule entry.
 			 */
 			mod_offset += onemod - elem->mods + 2;
+
+			/*
+			 * If this was the result of an xref chase, store this
+			 * mod_offset in the original entry so we can just reuse
+			 * it if an objfile shares this name.
+			 */
+
+			printf("/* 0x%llx--0x%llx: %s: single-module ref to %s in multimodule at %x */\n",
+			       addrmap[i].addr, addrmap[i].end_addr,
+			       orig_elem->mods, onemod, elem->mod_offset);
+
+			if (orig_elem)
+				orig_elem->mod_offset = mod_offset;
 		}
 
 		/*
@@ -881,12 +1344,68 @@  static void output_kallmodsyms_objfiles(void)
 		assert(elem->mod_offset != 0);
 
 		printf("\t.long\t0x%x\n", mod_offset);
-		emitted_objfiles++;
+		emitted_modules++;
 	}
 
-	assert(emitted_offsets == emitted_objfiles);
+	assert(emitted_offsets == emitted_modules);
 	output_label("kallsyms_num_modules");
+	printf("\t.long\t%zi\n", emitted_modules);
+
+	output_label("kallsyms_objfiles");
+
+	for (i = 0; i < addrmap_num; i++) {
+		struct obj2short_elem *elem;
+		int mod_offset;
+
+		if (same_kallmodsyms_range(i))
+			continue;
+
+		/*
+		 * No corresponding objfile name: no disambiguation needed;
+		 * point at 0.
+		 */
+		elem = obj2short_get(addrmap[i].obj);
+
+		if (elem == NULL) {
+			printf("/* 0x%llx--0x%llx: %s: unambiguous */\n",
+			       addrmap[i].addr, addrmap[i].end_addr,
+			       addrmap[i].obj);
+			printf("\t.long\t0x0\n");
+			emitted_objfiles++;
+			continue;
+		}
+
+		/*
+		 * Maybe the name is also used for a module: if it is, it cannot
+		 * be a multimodule.
+		 */
+
+		if (elem->mod_xref) {
+			assert(elem->mod_xref->nmods == 1);
+			mod_offset = elem->mod_xref->mod_offset;
+			printf("/* 0x%llx--0x%llx: %s: shortened as %s, references module */\n",
+			       addrmap[i].addr, addrmap[i].end_addr,
+			       addrmap[i].obj, elem->short_obj);
+		} else {
+			/*
+			 * A name only used for objfiles.  Chase down xrefs to
+			 * reuse existing entries.
+			 */
+			if (elem->short_xref)
+				elem = elem->short_xref;
+
+			mod_offset = elem->short_offset;
+			printf("/* 0x%llx--0x%llx: %s: shortened as %s */\n",
+			       addrmap[i].addr, addrmap[i].end_addr,
+			       addrmap[i].obj, elem->short_obj);
+		}
+		printf("\t.long\t0x%x\n", mod_offset);
+		emitted_objfiles++;
+	}
+	assert(emitted_offsets == emitted_objfiles);
+	output_label("kallsyms_num_objfiles");
 	printf("\t.long\t%zi\n", emitted_objfiles);
+
 	printf("\n");
 }
 #endif /* CONFIG_KALLMODSYMS */
@@ -1430,6 +1949,20 @@  static void read_modules(const char *modules_builtin)
 	 * Read linker map.
 	 */
 	read_linker_map();
+
+	/*
+	 * Now the modules are sorted out and we know their address ranges, use
+	 * the modhashes computed in optimize_obj2mod to identify any symbols
+	 * that are still ambiguous and set up the minimal representation of
+	 * their objfile name to disambiguate them.
+	 */
+	disambiguate_syms();
+
+	/*
+	 * Now we have objfile names, optimize the objfile list.
+	 */
+	optimize_objnames();
+
 }
 #else
 static void read_modules(const char *unused) {}