@@ -22,6 +22,8 @@ Kernel Build System
gcc-plugins
llvm
+ lto-build
+
.. only:: subproject and html
Indices
new file mode 100644
@@ -0,0 +1,76 @@
+=====================================================
+gcc link time optimization (LTO) for the Linux kernel
+=====================================================
+
+Link Time Optimization allows the compiler to optimize the complete program
+instead of just each file.
+
+The compiler can inline functions between files and do various other global
+optimizations, like specializing functions for common parameters,
+determing when global variables are clobbered, making functions pure/const,
+propagating constants globally, removing unneeded data and others.
+
+It will also drop unused functions which can make the kernel
+image smaller in some circumstances, in particular for small kernel
+configurations.
+
+For small monolithic kernels it can throw away unused code very effectively
+(especially when modules are disabled) and usually shrinks
+the code size.
+
+Build time and memory consumption at build time will increase, depending
+on the size of the largest binary. Modular kernels are less affected.
+With LTO incremental builds are less incremental, as always the whole
+binary needs to be re-optimized (but not re-parsed)
+
+Oopses can be somewhat more difficult to read, due to the more aggressive
+inlining: it helps to use scripts/faddr2line.
+
+It is currently incompatible with live patching.
+
+Normal "reasonable" builds work with less than 4GB of RAM, but very large
+configurations like allyesconfig typically need more memory. The actual
+memory needed depends on the available memory (gcc sizes its garbage
+collector pools based on that or on the ulimit -m limits) and
+the compiler version.
+
+Requirements:
+-------------
+
+- Enough memory: 4GB for a standard build, more for allyesconfig
+ The peak memory usage happens single threaded (when lto-wpa merges types),
+ so dialing back -j options will not help much.
+
+A 32bit hosted compiler is unlikely to work due to the memory requirements.
+You can however build a kernel targeted at 32bit on a 64bit host.
+
+FAQs:
+-----
+
+* I get a section type attribute conflict
+
+ Usually because of someone doing const __initdata (should be
+ const __initconst) or const __read_mostly (should be just const). Check
+ both symbols reported by gcc.
+
+References:
+-----------
+
+* Presentation on Kernel LTO
+ (note, performance numbers/details totally outdated.)
+
+ http://halobates.de/kernel-lto.pdf
+
+* Generic gcc LTO:
+
+ * http://www.ucw.cz/~hubicka/slides/labs2013.pdf
+ * http://www.hipeac.net/system/files/barcelona.pdf
+
+* Somewhat outdated too (from GCC site):
+
+ * http://gcc.gnu.org/projects/lto/lto.pdf
+ * http://gcc.gnu.org/projects/lto/whopr.pdf
+
+Happy Link-Time-Optimizing!
+
+Andi Kleen
@@ -482,6 +482,7 @@ KBUILD_HOSTLDLIBS := $(HOST_LFS_LIBS) $(HOSTLDLIBS)
# Make variables (CC, etc...)
CPP = $(CC) -E
+LDFINAL = $(LD)
ifneq ($(LLVM),)
CC = $(LLVM_PREFIX)clang$(LLVM_SUFFIX)
LD = $(LLVM_PREFIX)ld.lld$(LLVM_SUFFIX)
@@ -604,7 +605,7 @@ export RUSTC RUSTDOC RUSTFMT RUSTC_OR_CLIPPY_QUIET RUSTC_OR_CLIPPY BINDGEN CARGO
export HOSTRUSTC KBUILD_HOSTRUSTFLAGS
export CPP AR NM STRIP OBJCOPY OBJDUMP READELF PAHOLE RESOLVE_BTFIDS LEX YACC AWK INSTALLKERNEL
export PERL PYTHON3 CHECK CHECKFLAGS MAKE UTS_MACHINE HOSTCXX
-export KGZIP KBZIP2 KLZOP LZMA LZ4 XZ ZSTD
+export KGZIP KBZIP2 KLZOP LZMA LZ4 XZ ZSTD LDFINAL
export KBUILD_HOSTCXXFLAGS KBUILD_HOSTLDFLAGS KBUILD_HOSTLDLIBS LDFLAGS_MODULE
export KBUILD_USERCFLAGS KBUILD_USERLDFLAGS
@@ -1085,6 +1086,7 @@ include-$(CONFIG_KMSAN) += scripts/Makefile.kmsan
include-$(CONFIG_UBSAN) += scripts/Makefile.ubsan
include-$(CONFIG_KCOV) += scripts/Makefile.kcov
include-$(CONFIG_RANDSTRUCT) += scripts/Makefile.randstruct
+include-$(CONFIG_LTO_GCC) += scripts/Makefile.lto
include-$(CONFIG_GCC_PLUGINS) += scripts/Makefile.gcc-plugins
include $(addprefix $(srctree)/, $(include-y))
@@ -689,6 +689,21 @@ config HAS_LTO_CLANG
The compiler and Kconfig options support building with Clang's
LTO.
+config ARCH_SUPPORTS_LTO_GCC
+ bool
+
+# Some ar versions leak file descriptors when using the LTO
+# plugin and cause strange errors when ulimit -n is too low.
+# Pick an arbitrary threshold, which should be enough for most
+# kernel configs. This was a regression that is only
+# in some transient binutils version, so either older or
+# new enough is ok.
+# This might not be the exact range with this bug.
+config BAD_AR
+ depends on LD_VERSION = 23000
+ depends on $(shell,ulimit -n) < 4000
+ def_bool y
+
choice
prompt "Link Time Optimization (LTO)"
default LTO_NONE
@@ -736,8 +751,45 @@ config LTO_CLANG_THIN
https://clang.llvm.org/docs/ThinLTO.html
If unsure, say Y.
+
+config LTO_GCC
+ bool "gcc LTO"
+ depends on ARCH_SUPPORTS_LTO_GCC && CC_IS_GCC
+ depends on GCC_VERSION >= 100300
+ depends on LD_VERSION >= 22700
+ depends on !BAD_AR
+ select LTO
+ help
+ Enable whole program (link time) optimizations (LTO) for the whole
+ kernel and each module. This usually increases compile time,
+ especially for incremential builds, but tends to generate better code
+ as well as some global checks.
+
+ It allows the compiler to inline functions between different files
+ and do other global optimization, like propagating constants between
+ functions, determine side effects of functions, avoid unnecessary
+ register saving around functions, or optimize unused function
+ arguments. It also allows the compiler to drop unused functions.
+
+ With this option the compiler will also do some global checking over
+ different source files.
+
+ This requires a gcc 10.3 or later compiler and binutils >= 2.27.
+
+ On larger non modular configurations this may need more than 4GB of
+ RAM for the link phase, as well as a 64bit host compiler.
+
+ For more information see Documentation/kbuild/lto-build.rst
endchoice
+config LTO_CP_CLONE
+ bool "Allow aggressive cloning for function specialization"
+ depends on LTO_GCC
+ help
+ Allow the compiler to clone and specialize functions for specific
+ arguments when it determines these arguments are commonly
+ called. Experimential. Will increase text size.
+
config ARCH_SUPPORTS_CFI_CLANG
bool
help
@@ -154,7 +154,7 @@ is-single-obj-m = $(and $(part-of-module),$(filter $@, $(obj-m)),y)
# When a module consists of a single object, there is no reason to keep LLVM IR.
# Make $(LD) covert LLVM IR to ELF here.
ifdef CONFIG_LTO
-cmd_ld_single_m = $(if $(is-single-obj-m), ; $(LD) $(ld_flags) -r -o $(tmp-target) $@; mv $(tmp-target) $@)
+cmd_ld_single_m = $(if $(is-single-obj-m), ; $(LDFINAL) $(ld_flags) -r -o $(tmp-target) $@; mv $(tmp-target) $@)
endif
quiet_cmd_cc_o_c = CC $(quiet_modtag) $@
@@ -265,7 +265,8 @@ $(obj)/%.usyms: $(obj)/%.o FORCE
$(call if_changed,undefined_syms)
quiet_cmd_cc_lst_c = MKLST $@
- cmd_cc_lst_c = $(CC) $(c_flags) -g -c -o $*.o $< && \
+ cmd_cc_lst_c = $(if $(CONFIG_LTO),$(warning Listing in LTO mode does not match final binary)) \
+ $(CC) $(c_flags) -g -c -o $*.o $< && \
$(CONFIG_SHELL) $(srctree)/scripts/makelst $*.o \
System.map $(OBJDUMP) > $@
@@ -446,8 +447,8 @@ $(obj)/modules.order: $(obj-m) FORCE
$(obj)/lib.a: $(lib-y) FORCE
$(call if_changed,ar)
-quiet_cmd_ld_multi_m = LD [M] $@
- cmd_ld_multi_m = $(LD) $(ld_flags) -r -o $@ @$(patsubst %.o,%.mod,$@) $(cmd_objtool)
+quiet_cmd_ld_multi_m = LDFINAL [M] $@
+ cmd_ld_multi_m = $(LDFINAL) $(ld_flags) -r -o $@ @$(patsubst %.o,%.mod,$@) $(cmd_objtool)
define rule_ld_multi_m
$(call cmd_and_savecmd,ld_multi_m)
new file mode 100644
@@ -0,0 +1,43 @@
+#
+# Support for gcc link time optimization
+#
+
+DISABLE_LTO_GCC :=
+export DISABLE_LTO_GCC
+
+ifdef CONFIG_LTO_GCC
+ CC_FLAGS_LTO_GCC := -flto
+ DISABLE_LTO_GCC := -fno-lto
+
+ KBUILD_CFLAGS += ${CC_FLAGS_LTO_GCC}
+
+ CC_FLAGS_LTO := -flto
+ export CC_FLAGS_LTO
+
+ lto-flags-y := -flinker-output=nolto-rel -flto=jobserver
+ lto-flags-y += -fwhole-program
+
+ lto-flags-$(CONFIG_LTO_CP_CLONE) += -fipa-cp-clone
+
+ # allow extra flags from command line
+ lto-flags-y += ${LTO_EXTRA_CFLAGS}
+
+ # For LTO we need to use gcc to do the linking, not ld
+ # directly. Use a wrapper to convert the ld command line
+ # to gcc
+ LDFINAL := ${CONFIG_SHELL} ${srctree}/scripts/gcc-ld \
+ ${lto-flags-y}
+
+ # LTO gcc creates a lot of files in TMPDIR, and with /tmp as tmpfs
+ # it's easy to drive the machine OOM. Use the object directory
+ # instead for temporaries.
+ # This has the drawback that there might be some junk more visible
+ # after interrupted compilations, but you would have that junk
+ # there anyways in /tmp.
+ TMPDIR ?= $(objtree)
+ export TMPDIR
+
+ # use plugin aware tools
+ AR = $(CROSS_COMPILE)gcc-ar
+ NM = $(CROSS_COMPILE)gcc-nm
+endif # CONFIG_LTO_GCC
@@ -32,7 +32,7 @@ ARCH_POSTLINK := $(wildcard $(srctree)/arch/$(SRCARCH)/Makefile.postlink)
quiet_cmd_ld_ko_o = LD [M] $@
cmd_ld_ko_o += \
- $(LD) -r $(KBUILD_LDFLAGS) \
+ $(LDFINAL) -r $(KBUILD_LDFLAGS) \
$(KBUILD_LDFLAGS_MODULE) $(LDFLAGS_MODULE) \
-T scripts/module.lds -o $@ $(filter %.o, $^); \
$(if $(ARCH_POSTLINK), $(MAKE) -f $(ARCH_POSTLINK) $@, true)
@@ -26,7 +26,8 @@ ARCH_POSTLINK := $(wildcard $(srctree)/arch/$(SRCARCH)/Makefile.postlink)
# Final link of vmlinux with optional arch pass after final link
cmd_link_vmlinux = \
- $< "$(LD)" "$(KBUILD_LDFLAGS)" "$(LDFLAGS_vmlinux)"; \
+ $< "$(LD)" "$(LDFINAL)" "$(KBUILD_LDFLAGS)" \
+ "$(LDFLAGS_vmlinux)"; \
$(if $(ARCH_POSTLINK), $(MAKE) -f $(ARCH_POSTLINK) $@, true)
targets += vmlinux
@@ -44,9 +44,9 @@ objtool-args = $(vmlinux-objtool-args-y) --link
# Link of vmlinux.o used for section mismatch analysis
# ---------------------------------------------------------------------------
-quiet_cmd_ld_vmlinux.o = LD $@
+quiet_cmd_ld_vmlinux.o = LDFINAL $@
cmd_ld_vmlinux.o = \
- $(LD) ${KBUILD_LDFLAGS} -r -o $@ \
+ $(LDFINAL) ${KBUILD_LDFLAGS} -r -o $@ \
$(addprefix -T , $(initcalls-lds)) \
--whole-archive vmlinux.a --no-whole-archive \
--start-group $(KBUILD_VMLINUX_LIBS) --end-group \
@@ -29,8 +29,9 @@
set -e
LD="$1"
-KBUILD_LDFLAGS="$2"
-LDFLAGS_vmlinux="$3"
+LDFINAL="$2"
+KBUILD_LDFLAGS="$3"
+LDFLAGS_vmlinux="$4"
is_enabled() {
grep -q "^$1=y" include/config/auto.conf
@@ -82,7 +83,7 @@ vmlinux_link()
ldlibs="-lutil -lrt -lpthread"
else
wl=
- ld="${LD}"
+ ld="${LDFINAL}"
ldflags="${KBUILD_LDFLAGS} ${LDFLAGS_vmlinux}"
ldlibs=
fi