diff mbox series

[v2,3/5] symbols: arrange to know where functions end

Message ID 10e116c5-5a62-4abc-a52a-e2ca12118cfe@suse.com (mailing list archive)
State New
Headers show
Series symbols: assorted adjustments | expand

Commit Message

Jan Beulich April 2, 2025, 1:58 p.m. UTC
When determining the symbol for a given address (e.g. for the %pS
logging format specifier), so far the size of a symbol (function) was
assumed to be everything until the next symbol. There may be gaps
though, which would better be recognizable in output (often suggesting
something odd is going on).

Insert "fake" end symbols in the address table, accompanied by zero-
length type/name entries (to keep lookup reasonably close to how it
was).

Note however that this, with present GNU binutils, won't work for
xen.efi: The linker loses function sizes (they're not part of a normal
symbol table entry), and hence nm has no way of reporting them.

The address table growth is quite significant on x86 release builds (due
to functions being aligned to 16-byte boundaries), though: Its size
almost doubles.

Requested-by: Andrew Cooper <andrew.cooper3@citrix.com>
Signed-off-by: Jan Beulich <jbeulich@suse.com>
---
Note: Style-wise this is a horrible mix. I'm trying to match styles with
      what's used in the respective functions.

Older GNU ld retains section symbols, which nm then also lists. Should
we perhaps strip those as we read in nm's output? They don't provide any
useful extra information, as our linker scripts add section start
symbols anyway. (For the purposes here, luckily such section symbols are
at least emitted without size.)

Even for section start symbols there is the question of whether they
really need retaining (except perhaps when producing a map file). The
main question here likely is whether livepatch may have a need to look
them up by name. (Section end symbols may actually be slightly more
useful to keep, but that may also want considering more closely.)
---
v2: Deal with multiple symbols at the same address, but only some having
    a size specified.

Comments

Jan Beulich April 2, 2025, 2:08 p.m. UTC | #1
On 02.04.2025 15:58, Jan Beulich wrote:
> When determining the symbol for a given address (e.g. for the %pS
> logging format specifier), so far the size of a symbol (function) was
> assumed to be everything until the next symbol. There may be gaps
> though, which would better be recognizable in output (often suggesting
> something odd is going on).
> 
> Insert "fake" end symbols in the address table, accompanied by zero-
> length type/name entries (to keep lookup reasonably close to how it
> was).
> 
> Note however that this, with present GNU binutils, won't work for
> xen.efi: The linker loses function sizes (they're not part of a normal
> symbol table entry), and hence nm has no way of reporting them.

And, just for reference:
https://sourceware.org/pipermail/binutils/2025-March/140252.html

Jan
diff mbox series

Patch

--- a/xen/common/symbols.c
+++ b/xen/common/symbols.c
@@ -116,6 +116,13 @@  const char *symbols_lookup(unsigned long
         else high = mid;
     }
 
+    /* If we hit an END symbol, move to the previous (real) one. */
+    if (!symbols_names[get_symbol_offset(low)]) {
+        ASSERT(low);
+        symbol_end = symbols_address(low);
+        --low;
+    }
+
     /* search for the first aliased symbol. Aliased symbols are
            symbols with the same address */
     while (low && symbols_address(low - 1) == symbols_address(low))
@@ -124,11 +131,13 @@  const char *symbols_lookup(unsigned long
         /* Grab name */
     symbols_expand_symbol(get_symbol_offset(low), namebuf);
 
-    /* Search for next non-aliased symbol */
-    for (i = low + 1; i < symbols_num_addrs; i++) {
-        if (symbols_address(i) > symbols_address(low)) {
-            symbol_end = symbols_address(i);
-            break;
+    if (!symbol_end) {
+        /* Search for next non-aliased symbol */
+        for (i = low + 1; i < symbols_num_addrs; i++) {
+            if (symbols_address(i) > symbols_address(low)) {
+                symbol_end = symbols_address(i);
+                break;
+            }
         }
     }
 
@@ -170,6 +179,7 @@  int xensyms_read(uint32_t *symnum, char
         return -ERANGE;
     if ( *symnum == symbols_num_addrs )
     {
+    no_symbol:
         /* No more symbols */
         name[0] = '\0';
         return 0;
@@ -183,10 +193,31 @@  int xensyms_read(uint32_t *symnum, char
         /* Non-sequential access */
         next_offset = get_symbol_offset(*symnum);
 
+    /*
+     * If we're at an END symbol, skip to the next (real) one. This can
+     * happen if the caller ignores the *symnum output from an earlier
+     * iteration (Linux'es /proc/xen/xensyms handling does as of 6.14-rc).
+     */
+    if ( !symbols_names[next_offset] )
+    {
+        ++next_offset;
+        if ( ++*symnum == symbols_num_addrs )
+            goto no_symbol;
+    }
+
     *type = symbols_get_symbol_type(next_offset);
     next_offset = symbols_expand_symbol(next_offset, name);
     *address = symbols_address(*symnum);
 
+    /* If next one is an END symbol, skip it. */
+    if ( !symbols_names[next_offset] )
+    {
+        ++next_offset;
+        /* Make sure not to increment past symbols_num_addrs below. */
+        if ( *symnum + 1 < symbols_num_addrs )
+            ++*symnum;
+    }
+
     next_symbol = ++*symnum;
 
     spin_unlock(&symbols_mutex);
--- a/xen/tools/symbols.c
+++ b/xen/tools/symbols.c
@@ -38,6 +38,7 @@ 
 
 struct sym_entry {
 	unsigned long long addr;
+	unsigned long size;
 	unsigned int len;
 	unsigned char *sym;
 	char *orig_symbol;
@@ -87,6 +88,8 @@  static int read_symbol(FILE *in, struct
 	static char *filename;
 	int rc = -1;
 
+	s->size = 0;
+
 	switch (input_format) {
 	case fmt_bsd:
 		rc = fscanf(in, "%llx %c %499s\n", &s->addr, &stype, str);
@@ -96,8 +99,12 @@  static int read_symbol(FILE *in, struct
 			/* nothing */;
 		rc = fscanf(in, "%499[^ |] |%llx | %c |",
 			    str, &s->addr, &stype);
-		if (rc == 3 && fscanf(in, " %19[^ |] |", type) != 1)
-			*type = '\0';
+		if (rc == 3) {
+			if(fscanf(in, " %19[^ |] |", type) != 1)
+				*type = '\0';
+			else if(fscanf(in, "%lx |", &s->size) != 1)
+				s->size = 0;
+		}
 		break;
 	}
 	if (rc != 3) {
@@ -287,9 +294,18 @@  static int compare_name_orig(const void
 	return rc;
 }
 
+/* Determine whether the symbol at address table @idx wants a fake END
+ * symbol (address only) emitted as well. */
+static bool want_symbol_end(unsigned int idx)
+{
+	return table[idx].size &&
+	       (idx + 1 == table_cnt ||
+	        table[idx].addr + table[idx].size < table[idx + 1].addr);
+}
+
 static void write_src(void)
 {
-	unsigned int i, k, off;
+	unsigned int i, k, off, ends;
 	unsigned int best_idx[256];
 	unsigned int *markers;
 	char buf[KSYM_NAME_LEN+1];
@@ -318,24 +334,42 @@  static void write_src(void)
 	printf("#else\n");
 	output_label("symbols_offsets");
 	printf("#endif\n");
-	for (i = 0; i < table_cnt; i++) {
+	for (i = 0, ends = 0; i < table_cnt; i++) {
 		printf("\tPTR\t%#llx - SYMBOLS_ORIGIN\n", table[i].addr);
+
+		table[i].addr_idx = i + ends;
+
+		if (!want_symbol_end(i)) {
+			/* If there's another symbol at the same address,
+			 * propagate this symbol's size if the next one has
+			 * no size, or if the next one's size is larger. */
+			if (table[i].size &&
+			    i + 1 < table_cnt &&
+			    table[i + 1].addr == table[i].addr &&
+			    (!table[i + 1].size ||
+			     table[i + 1].size > table[i].size))
+				table[i + 1].size = table[i].size;
+			continue;
+		}
+
+		++ends;
+		printf("\tPTR\t%#llx - SYMBOLS_ORIGIN\n",
+		       table[i].addr + table[i].size);
 	}
 	printf("\n");
 
 	output_label("symbols_num_addrs");
-	printf("\t.long\t%d\n", table_cnt);
+	printf("\t.long\t%d\n", table_cnt + ends);
 	printf("\n");
 
 	/* table of offset markers, that give the offset in the compressed stream
 	 * every 256 symbols */
-	markers = (unsigned int *) malloc(sizeof(unsigned int) * ((table_cnt + 255) / 256));
+	markers = malloc(sizeof(*markers) * ((table_cnt + ends + 255) >> 8));
 
 	output_label("symbols_names");
-	off = 0;
-	for (i = 0; i < table_cnt; i++) {
-		if ((i & 0xFF) == 0)
-			markers[i >> 8] = off;
+	for (i = 0, off = 0, ends = 0; i < table_cnt; i++) {
+		if (((i + ends) & 0xFF) == 0)
+			markers[(i + ends) >> 8] = off;
 
 		printf("\t.byte 0x%02x", table[i].len);
 		for (k = 0; k < table[i].len; k++)
@@ -344,11 +378,22 @@  static void write_src(void)
 
 		table[i].stream_offset = off;
 		off += table[i].len + 1;
+
+		if (!want_symbol_end(i))
+			continue;
+
+		/* END symbols have no name or type. */
+		++ends;
+		if (((i + ends) & 0xFF) == 0)
+			markers[(i + ends) >> 8] = off;
+
+		printf("\t.byte 0\n");
+		++off;
 	}
 	printf("\n");
 
 	output_label("symbols_markers");
-	for (i = 0; i < ((table_cnt + 255) >> 8); i++)
+	for (i = 0; i < ((table_cnt + ends + 255) >> 8); i++)
 		printf("\t.long\t%d\n", markers[i]);
 	printf("\n");
 
@@ -450,7 +495,6 @@  static void compress_symbols(unsigned ch
 		len = table[i].len;
 		p1 = table[i].sym;
 
-		table[i].addr_idx = i;
 		/* find the token on the symbol */
 		p2 = memmem_pvt(p1, len, str, 2);
 		if (!p2) continue;