diff mbox series

[v10,08/13] kbuild: make address ranges map work with IBT

Message ID 20221205163157.269335-9-nick.alcock@oracle.com (mailing list archive)
State Deferred
Headers show
Series kallsyms: reliable symbol->address lookup with /proc/kallmodsyms | expand

Commit Message

Nick Alcock Dec. 5, 2022, 4:31 p.m. UTC
The previous commit emits .tmp_vmlinux.ranges, which maps address
range/size pairs in vmlinux to the object files which make them up; this
is used by kallmodsyms to let us associate symbols with object file
names and kernel module names at address-range granularity (i.e.,
space-efficiently).

But the previous commit only works if the final linker runs directly on
the input .o files from the kernel build: if an intermediate ld -r is
done, the final link emits a mapfile containing only the name of the
intermediate ld -r (vmlinux.o), which messes up .tmp_vmlinux.ranges and
makes kallmodsyms attribute all symbols to {vmlinux.o} which isn't much
use for disambiguation. vmlinux.o has the object file names we need, but
the addresses are all wrong so we can't use that either.

This commit fixes that by extracting the addresses from the final
vmlinux mapfile (or one of the intermediate kallsyms mapfiles) and using
them to adjust the address/objfile name pairs extracted from the
vmlinux.o mapfile, giving us a ranges map with the right addresses and
the right names.  It's a bit painful because the addresses are often
large numbers, and awk is... not good at handling them (GNU awk can use
large integers, but the feature is at risk of removal).  So we emit a
slightly different file format with a pair of hex values (the address in
vmlinux.o and section address from the final mapfile), then pipe it
through a tiny new C filter (scripts/addaddrs) whose only purpose is to
add those two numbers together!

One niggle is that .hot/.cold stuff is no longer correctly attributed to
its object file any more -- it seems to not appear in any of the
mapfiles at all (only in System.map), so I don't see any way to fix
this.

(This possibly makes it work with clang LTO too, but I haven't tested
it, and if the mapfile format differs it will fail until that's fixed.
But this definitely overcomes *one* of the roadblocks preventing clang
LTO from working with kallmodsyms.)

Signed-off-by: Nick Alcock <nick.alcock@oracle.com>
Reviewed-by: Kris Van Hees <kris.van.hees@oracle.com>
---

Notes:
    v10: new.

 Documentation/dontdiff     |  1 +
 init/Kconfig               | 10 ++++++++++
 scripts/.gitignore         |  1 +
 scripts/Makefile           |  1 +
 scripts/Makefile.vmlinux_o |  6 +++++-
 scripts/addaddrs.c         | 28 ++++++++++++++++++++++++++++
 scripts/link-vmlinux.sh    | 34 +++++++++++++++++++++++++---------
 7 files changed, 71 insertions(+), 10 deletions(-)
 create mode 100644 scripts/addaddrs.c
diff mbox series

Patch

diff --git a/Documentation/dontdiff b/Documentation/dontdiff
index 17686f59039c..72c089eea111 100644
--- a/Documentation/dontdiff
+++ b/Documentation/dontdiff
@@ -79,6 +79,7 @@  SCCS
 System.map*
 TAGS
 aconf
+addaddrs
 af_names.h
 aic7*reg.h*
 aic7*reg_print.c*
diff --git a/init/Kconfig b/init/Kconfig
index c45935cd2f1f..160ec1370594 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1570,6 +1570,16 @@  config POSIX_TIMERS
 
 	  If unsure say y.
 
+config KALLMODSYMS
+	default y
+	bool "Enable support for /proc/kallmodsyms" if EXPERT
+	depends on KALLSYMS
+	select VMLINUX_MAP
+	help
+	  This option enables the /proc/kallmodsyms file, which unambiguously
+	  maps built-in kernel symbols and their associated object files and
+	  modules to addresses.
+
 config PRINTK
 	default y
 	bool "Enable support for printk" if EXPERT
diff --git a/scripts/.gitignore b/scripts/.gitignore
index b7aec8eb1bd4..d042f0e3d13f 100644
--- a/scripts/.gitignore
+++ b/scripts/.gitignore
@@ -1,4 +1,5 @@ 
 # SPDX-License-Identifier: GPL-2.0-only
+/addaddrs
 /asn1_compiler
 /bin2c
 /generate_rust_target
diff --git a/scripts/Makefile b/scripts/Makefile
index 1575af84d557..356cafcd313d 100644
--- a/scripts/Makefile
+++ b/scripts/Makefile
@@ -5,6 +5,7 @@ 
 
 hostprogs-always-$(CONFIG_BUILD_BIN2C)			+= bin2c
 hostprogs-always-$(CONFIG_KALLSYMS)			+= kallsyms
+hostprogs-always-$(CONFIG_KALLMODSYMS)			+= addaddrs
 hostprogs-always-$(BUILD_C_RECORDMCOUNT)		+= recordmcount
 hostprogs-always-$(CONFIG_BUILDTIME_TABLE_SORT)		+= sorttable
 hostprogs-always-$(CONFIG_ASN1)				+= asn1_compiler
diff --git a/scripts/Makefile.vmlinux_o b/scripts/Makefile.vmlinux_o
index 9b4ca83f0695..94c2ec366b91 100644
--- a/scripts/Makefile.vmlinux_o
+++ b/scripts/Makefile.vmlinux_o
@@ -27,6 +27,10 @@  ifdef CONFIG_LTO_CLANG
 initcalls-lds := .tmp_initcalls.lds
 endif
 
+ifneq ($(CONFIG_VMLINUX_MAP)$(CONFIG_KALLMODSYMS),)
+KBUILD_MAPFLAGS = -Map=$@.map
+endif
+
 # objtool for vmlinux.o
 # ---------------------------------------------------------------------------
 #
@@ -47,7 +51,7 @@  objtool-args = $(vmlinux-objtool-args-y) --link
 quiet_cmd_ld_vmlinux.o = LD      $@
       cmd_ld_vmlinux.o = \
 	$(LD) ${KBUILD_LDFLAGS} -r -o $@ \
-	$(addprefix -T , $(initcalls-lds)) \
+	$(KBUILD_MAPFLAGS) $(addprefix -T , $(initcalls-lds)) \
 	--whole-archive vmlinux.a --no-whole-archive \
 	--start-group $(KBUILD_VMLINUX_LIBS) --end-group \
 	$(cmd_objtool)
diff --git a/scripts/addaddrs.c b/scripts/addaddrs.c
new file mode 100644
index 000000000000..d15cbbf8d262
--- /dev/null
+++ b/scripts/addaddrs.c
@@ -0,0 +1,28 @@ 
+/* SPDX-License-Identifier: GPL-2.0 */
+
+/* Used only by link-vmlinux.sh */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <inttypes.h>
+
+int main (void)
+{
+	int i = 0;
+	while (!feof(stdin)) {
+		uint64_t a, b;
+		int ret;
+		char *rest = NULL;
+
+		i++;
+		if ((ret = scanf("%" SCNx64 " %" SCNx64 " %m[^\n]\n", &a, &b, &rest)) < 3) {
+			fprintf(stderr,
+				"Syntax error: invalid line %i found in rangefile generation: at least three fields expected, %i converted\n", i, ret);
+			exit(1);
+		}
+
+		printf("0x%018" PRIx64 " %s\n", a+b, rest);
+		free(rest);
+	}
+	exit(0);
+}
diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh
index a40d372b1289..3b3ea6214062 100755
--- a/scripts/link-vmlinux.sh
+++ b/scripts/link-vmlinux.sh
@@ -60,10 +60,7 @@  vmlinux_link()
 	# skip output file argument
 	shift
 
-	# kallmodsyms needs a linker mapfile that contains original object
-	# file names, so cannot use this optimization.
-	if { is_enabled CONFIG_LTO_CLANG || is_enabled CONFIG_X86_KERNEL_IBT; } && \
-	   ! is_enabled CONFIG_KALLMODSYMS; then
+	if is_enabled CONFIG_LTO_CLANG || is_enabled CONFIG_X86_KERNEL_IBT; then
 		# Use vmlinux.o instead of performing the slow LTO link again.
 		objs=vmlinux.o
 		libs=
@@ -153,12 +150,31 @@  kallsyms()
 	#       - but sometimes there is a line break after the first field
 	#   - start reading at "Linker script and memory map"
 	#   - stop reading at ".brk"
+	# if there is a vmlinux.o.map and LTO_CLANG or KERNEL_IBT are
+	# turned on, we have used a vmlinux -r'ed .o for linking: use this
+	# as our primary information source, but acquire section addresses
+	# from the (later) linker map we were passed in.  This makes things
+	# a bit more complex, since we have to recognize and eliminate
+	# sections elided by the linker, and add together numbers larger
+	# than awk can portably handle.
 	if is_enabled CONFIG_KALLMODSYMS; then
-		${AWK} '
-		    /\.o$/ && start==1 { print $(NF-2), $(NF-1), $NF }
-		    /^Linker script and memory map/ { start = 1 }
-		    /^\.brk/ { exit(0) }
-		' ${3} | sort > .tmp_vmlinux.ranges
+		if is_enabled CONFIG_LTO_CLANG || is_enabled CONFIG_X86_KERNEL_IBT; then
+			${AWK} 'BEGIN { addresses = 1 }
+			    /^Linker script and memory map/ { start = 1 }
+			    !start { next }
+			    { got_section = 0 }
+			    /^ \./ { section = $1; got_section = 1; if (NF == 1) { getline }}
+			    addresses && got_section && !(section in addrs) { addrs[section] = $2 }
+			    !addresses && got_section && section in addrs { print $(NF-2), addrs[section], $(NF-1), $NF }
+			    /^\.brk/ || /^\.bss\.\.brk/ { addresses = 0; start = 0; nextfile }
+			' ${3} vmlinux.o.map | scripts/addaddrs | sort > .tmp_vmlinux.ranges
+		else
+			${AWK} '
+			    start && /\.o$/ { print $(NF-2), $(NF-1), $NF }
+			    /^Linker script and memory map/ { start = 1 }
+			    /^\.brk/ { exit(0) }
+			' ${3} | sort > .tmp_vmlinux.ranges
+		fi
 	fi
 
 	# get kallsyms options