From 3b15cdc15956673ba1551d79bceae471436ac6a9 Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Fri, 11 Dec 2020 10:46:18 -0800 Subject: tracing: move function tracer options to Kconfig Move function tracer options to Kconfig to make it easier to add new methods for generating __mcount_loc, and to make the options available also when building kernel modules. Note that FTRACE_MCOUNT_USE_* options are updated on rebuild and therefore, work even if the .config was generated in a different environment. Signed-off-by: Sami Tolvanen Acked-by: Steven Rostedt (VMware) Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20201211184633.3213045-2-samitolvanen@google.com --- scripts/Makefile.build | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'scripts') diff --git a/scripts/Makefile.build b/scripts/Makefile.build index 4c058f12dd73..22654a463ad8 100644 --- a/scripts/Makefile.build +++ b/scripts/Makefile.build @@ -178,8 +178,7 @@ cmd_modversions_c = \ fi endif -ifdef CONFIG_FTRACE_MCOUNT_RECORD -ifndef CC_USING_RECORD_MCOUNT +ifdef CONFIG_FTRACE_MCOUNT_USE_RECORDMCOUNT # compiler will not generate __mcount_loc use recordmcount or recordmcount.pl ifdef BUILD_C_RECORDMCOUNT ifeq ("$(origin RECORDMCOUNT_WARN)", "command line") @@ -206,8 +205,7 @@ recordmcount_source := $(srctree)/scripts/recordmcount.pl endif # BUILD_C_RECORDMCOUNT cmd_record_mcount = $(if $(findstring $(strip $(CC_FLAGS_FTRACE)),$(_c_flags)), \ $(sub_cmd_record_mcount)) -endif # CC_USING_RECORD_MCOUNT -endif # CONFIG_FTRACE_MCOUNT_RECORD +endif # CONFIG_FTRACE_MCOUNT_USE_RECORDMCOUNT ifdef CONFIG_STACK_VALIDATION ifneq ($(SKIP_STACK_VALIDATION),1) -- cgit v1.2.3 From dc5723b02e523b2c4a68667f7e28c65018f7202f Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Fri, 11 Dec 2020 10:46:19 -0800 Subject: kbuild: add support for Clang LTO This change adds build system support for Clang's Link Time Optimization (LTO). With -flto, instead of ELF object files, Clang produces LLVM bitcode, which is compiled into native code at link time, allowing the final binary to be optimized globally. For more details, see: https://llvm.org/docs/LinkTimeOptimization.html The Kconfig option CONFIG_LTO_CLANG is implemented as a choice, which defaults to LTO being disabled. To use LTO, the architecture must select ARCH_SUPPORTS_LTO_CLANG and support: - compiling with Clang, - compiling all assembly code with Clang's integrated assembler, - and linking with LLD. While using CONFIG_LTO_CLANG_FULL results in the best runtime performance, the compilation is not scalable in time or memory. CONFIG_LTO_CLANG_THIN enables ThinLTO, which allows parallel optimization and faster incremental builds. ThinLTO is used by default if the architecture also selects ARCH_SUPPORTS_LTO_CLANG_THIN: https://clang.llvm.org/docs/ThinLTO.html To enable LTO, LLVM tools must be used to handle bitcode files, by passing LLVM=1 and LLVM_IAS=1 options to make: $ make LLVM=1 LLVM_IAS=1 defconfig $ scripts/config -e LTO_CLANG_THIN $ make LLVM=1 LLVM_IAS=1 To prepare for LTO support with other compilers, common parts are gated behind the CONFIG_LTO option, and LTO can be disabled for specific files by filtering out CC_FLAGS_LTO. Signed-off-by: Sami Tolvanen Reviewed-by: Kees Cook Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20201211184633.3213045-3-samitolvanen@google.com --- Makefile | 19 +++++++- arch/Kconfig | 91 +++++++++++++++++++++++++++++++++++++++ include/asm-generic/vmlinux.lds.h | 11 +++-- scripts/Makefile.build | 9 +++- scripts/Makefile.modfinal | 9 +++- scripts/Makefile.modpost | 21 ++++++++- scripts/link-vmlinux.sh | 32 ++++++++++---- 7 files changed, 174 insertions(+), 18 deletions(-) (limited to 'scripts') diff --git a/Makefile b/Makefile index b434c342080b..2b9bcecd6ea1 100644 --- a/Makefile +++ b/Makefile @@ -893,6 +893,21 @@ KBUILD_CFLAGS += $(CC_FLAGS_SCS) export CC_FLAGS_SCS endif +ifdef CONFIG_LTO_CLANG +ifdef CONFIG_LTO_CLANG_THIN +CC_FLAGS_LTO += -flto=thin -fsplit-lto-unit +KBUILD_LDFLAGS += --thinlto-cache-dir=$(extmod-prefix).thinlto-cache +else +CC_FLAGS_LTO += -flto +endif +CC_FLAGS_LTO += -fvisibility=hidden +endif + +ifdef CONFIG_LTO +KBUILD_CFLAGS += $(CC_FLAGS_LTO) +export CC_FLAGS_LTO +endif + ifdef CONFIG_DEBUG_FORCE_FUNCTION_ALIGN_32B KBUILD_CFLAGS += -falign-functions=32 endif @@ -1479,7 +1494,7 @@ MRPROPER_FILES += include/config include/generated \ *.spec # Directories & files removed with 'make distclean' -DISTCLEAN_FILES += tags TAGS cscope* GPATH GTAGS GRTAGS GSYMS +DISTCLEAN_FILES += tags TAGS cscope* GPATH GTAGS GRTAGS GSYMS .thinlto-cache # clean - Delete most, but leave enough to build external modules # @@ -1725,7 +1740,7 @@ PHONY += compile_commands.json clean-dirs := $(KBUILD_EXTMOD) clean: rm-files := $(KBUILD_EXTMOD)/Module.symvers $(KBUILD_EXTMOD)/modules.nsdeps \ - $(KBUILD_EXTMOD)/compile_commands.json + $(KBUILD_EXTMOD)/compile_commands.json $(KBUILD_EXTMOD)/.thinlto-cache PHONY += help help: diff --git a/arch/Kconfig b/arch/Kconfig index 78c6f05b10f9..9494e3931f4b 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -631,6 +631,97 @@ config SHADOW_CALL_STACK reading and writing arbitrary memory may be able to locate them and hijack control flow by modifying the stacks. +config LTO + bool + help + Selected if the kernel will be built using the compiler's LTO feature. + +config LTO_CLANG + bool + select LTO + help + Selected if the kernel will be built using Clang's LTO feature. + +config ARCH_SUPPORTS_LTO_CLANG + bool + help + An architecture should select this option if it supports: + - compiling with Clang, + - compiling inline assembly with Clang's integrated assembler, + - and linking with LLD. + +config ARCH_SUPPORTS_LTO_CLANG_THIN + bool + help + An architecture should select this option if it can support Clang's + ThinLTO mode. + +config HAS_LTO_CLANG + def_bool y + # Clang >= 11: https://github.com/ClangBuiltLinux/linux/issues/510 + depends on CC_IS_CLANG && CLANG_VERSION >= 110000 && LD_IS_LLD + depends on $(success,test $(LLVM) -eq 1) + depends on $(success,test $(LLVM_IAS) -eq 1) + depends on $(success,$(NM) --help | head -n 1 | grep -qi llvm) + depends on $(success,$(AR) --help | head -n 1 | grep -qi llvm) + depends on ARCH_SUPPORTS_LTO_CLANG + depends on !FTRACE_MCOUNT_USE_RECORDMCOUNT + depends on !KASAN + depends on !GCOV_KERNEL + depends on !MODVERSIONS + help + The compiler and Kconfig options support building with Clang's + LTO. + +choice + prompt "Link Time Optimization (LTO)" + default LTO_NONE + help + This option enables Link Time Optimization (LTO), which allows the + compiler to optimize binaries globally. + + If unsure, select LTO_NONE. Note that LTO is very resource-intensive + so it's disabled by default. + +config LTO_NONE + bool "None" + help + Build the kernel normally, without Link Time Optimization (LTO). + +config LTO_CLANG_FULL + bool "Clang Full LTO (EXPERIMENTAL)" + depends on HAS_LTO_CLANG + depends on !COMPILE_TEST + select LTO_CLANG + help + This option enables Clang's full Link Time Optimization (LTO), which + allows the compiler to optimize the kernel globally. If you enable + this option, the compiler generates LLVM bitcode instead of ELF + object files, and the actual compilation from bitcode happens at + the LTO link step, which may take several minutes depending on the + kernel configuration. More information can be found from LLVM's + documentation: + + https://llvm.org/docs/LinkTimeOptimization.html + + During link time, this option can use a large amount of RAM, and + may take much longer than the ThinLTO option. + +config LTO_CLANG_THIN + bool "Clang ThinLTO (EXPERIMENTAL)" + depends on HAS_LTO_CLANG && ARCH_SUPPORTS_LTO_CLANG_THIN + select LTO_CLANG + help + This option enables Clang's ThinLTO, which allows for parallel + optimization and faster incremental compiles compared to the + CONFIG_LTO_CLANG_FULL option. More information can be found + from Clang's documentation: + + https://clang.llvm.org/docs/ThinLTO.html + + If unsure, say Y. +endchoice + config HAVE_ARCH_WITHIN_STACK_FRAMES bool help diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index b2b3d81b1535..8988a2e445d8 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -90,15 +90,18 @@ * .data. We don't want to pull in .data..other sections, which Linux * has defined. Same for text and bss. * + * With LTO_CLANG, the linker also splits sections by default, so we need + * these macros to combine the sections during the final link. + * * RODATA_MAIN is not used because existing code already defines .rodata.x * sections to be brought in with rodata. */ -#ifdef CONFIG_LD_DEAD_CODE_DATA_ELIMINATION +#if defined(CONFIG_LD_DEAD_CODE_DATA_ELIMINATION) || defined(CONFIG_LTO_CLANG) #define TEXT_MAIN .text .text.[0-9a-zA-Z_]* -#define DATA_MAIN .data .data.[0-9a-zA-Z_]* .data..LPBX* +#define DATA_MAIN .data .data.[0-9a-zA-Z_]* .data..L* .data..compoundliteral* #define SDATA_MAIN .sdata .sdata.[0-9a-zA-Z_]* -#define RODATA_MAIN .rodata .rodata.[0-9a-zA-Z_]* -#define BSS_MAIN .bss .bss.[0-9a-zA-Z_]* +#define RODATA_MAIN .rodata .rodata.[0-9a-zA-Z_]* .rodata..L* +#define BSS_MAIN .bss .bss.[0-9a-zA-Z_]* .bss..compoundliteral* #define SBSS_MAIN .sbss .sbss.[0-9a-zA-Z_]* #else #define TEXT_MAIN .text diff --git a/scripts/Makefile.build b/scripts/Makefile.build index 22654a463ad8..65d4bea937fa 100644 --- a/scripts/Makefile.build +++ b/scripts/Makefile.build @@ -111,7 +111,7 @@ endif # --------------------------------------------------------------------------- quiet_cmd_cc_s_c = CC $(quiet_modtag) $@ - cmd_cc_s_c = $(CC) $(filter-out $(DEBUG_CFLAGS), $(c_flags)) -fverbose-asm -S -o $@ $< + cmd_cc_s_c = $(CC) $(filter-out $(DEBUG_CFLAGS) $(CC_FLAGS_LTO), $(c_flags)) -fverbose-asm -S -o $@ $< $(obj)/%.s: $(src)/%.c FORCE $(call if_changed_dep,cc_s_c) @@ -421,8 +421,15 @@ $(obj)/lib.a: $(lib-y) FORCE # Do not replace $(filter %.o,^) with $(real-prereqs). When a single object # module is turned into a multi object module, $^ will contain header file # dependencies recorded in the .*.cmd file. +ifdef CONFIG_LTO_CLANG +quiet_cmd_link_multi-m = AR [M] $@ +cmd_link_multi-m = \ + rm -f $@; \ + $(AR) cDPrsT $@ $(filter %.o,$^) +else quiet_cmd_link_multi-m = LD [M] $@ cmd_link_multi-m = $(LD) $(ld_flags) -r -o $@ $(filter %.o,$^) +endif $(multi-used-m): FORCE $(call if_changed,link_multi-m) diff --git a/scripts/Makefile.modfinal b/scripts/Makefile.modfinal index d49ec001825d..6de2c35b64e8 100644 --- a/scripts/Makefile.modfinal +++ b/scripts/Makefile.modfinal @@ -30,6 +30,12 @@ quiet_cmd_cc_o_c = CC [M] $@ ARCH_POSTLINK := $(wildcard $(srctree)/arch/$(SRCARCH)/Makefile.postlink) +ifdef CONFIG_LTO_CLANG +# With CONFIG_LTO_CLANG, reuse the object file we compiled for modpost to +# avoid a second slow LTO link +prelink-ext := .lto +endif + quiet_cmd_ld_ko_o = LD [M] $@ cmd_ld_ko_o = \ $(LD) -r $(KBUILD_LDFLAGS) \ @@ -53,8 +59,9 @@ if_changed_except = $(if $(call newer_prereqs_except,$(2))$(cmd-check), \ $(cmd); \ printf '%s\n' 'cmd_$@ := $(make-cmd)' > $(dot-target).cmd, @:) + # Re-generate module BTFs if either module's .ko or vmlinux changed -$(modules): %.ko: %.o %.mod.o scripts/module.lds $(if $(KBUILD_BUILTIN),vmlinux) FORCE +$(modules): %.ko: %$(prelink-ext).o %.mod.o scripts/module.lds $(if $(KBUILD_BUILTIN),vmlinux) FORCE +$(call if_changed_except,ld_ko_o,vmlinux) ifdef CONFIG_DEBUG_INFO_BTF_MODULES +$(if $(newer-prereqs),$(call cmd,btf_ko)) diff --git a/scripts/Makefile.modpost b/scripts/Makefile.modpost index f54b6ac37ac2..9ff8bfdb574d 100644 --- a/scripts/Makefile.modpost +++ b/scripts/Makefile.modpost @@ -43,6 +43,9 @@ __modpost: include include/config/auto.conf include scripts/Kbuild.include +# for ld_flags +include scripts/Makefile.lib + MODPOST = scripts/mod/modpost \ $(if $(CONFIG_MODVERSIONS),-m) \ $(if $(CONFIG_MODULE_SRCVERSION_ALL),-a) \ @@ -102,12 +105,26 @@ $(input-symdump): @echo >&2 'WARNING: Symbol version dump "$@" is missing.' @echo >&2 ' Modules may not have dependencies or modversions.' +ifdef CONFIG_LTO_CLANG +# With CONFIG_LTO_CLANG, .o files might be LLVM bitcode, so we need to run +# LTO to compile them into native code before running modpost +prelink-ext := .lto + +quiet_cmd_cc_lto_link_modules = LTO [M] $@ +cmd_cc_lto_link_modules = $(LD) $(ld_flags) -r -o $@ --whole-archive $^ + +%.lto.o: %.o + $(call if_changed,cc_lto_link_modules) +endif + +modules := $(sort $(shell cat $(MODORDER))) + # Read out modules.order to pass in modpost. # Otherwise, allmodconfig would fail with "Argument list too long". quiet_cmd_modpost = MODPOST $@ - cmd_modpost = sed 's/ko$$/o/' $< | $(MODPOST) -T - + cmd_modpost = sed 's/\.ko$$/$(prelink-ext)\.o/' $< | $(MODPOST) -T - -$(output-symdump): $(MODORDER) $(input-symdump) FORCE +$(output-symdump): $(MODORDER) $(input-symdump) $(modules:.ko=$(prelink-ext).o) FORCE $(call if_changed,modpost) targets += $(output-symdump) diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh index 6eded325c837..596507573a48 100755 --- a/scripts/link-vmlinux.sh +++ b/scripts/link-vmlinux.sh @@ -56,6 +56,14 @@ modpost_link() ${KBUILD_VMLINUX_LIBS} \ --end-group" + if [ -n "${CONFIG_LTO_CLANG}" ]; then + # This might take a while, so indicate that we're doing + # an LTO link + info LTO ${1} + else + info LD ${1} + fi + ${LD} ${KBUILD_LDFLAGS} -r -o ${1} ${objects} } @@ -103,13 +111,22 @@ vmlinux_link() fi if [ "${SRCARCH}" != "um" ]; then - objects="--whole-archive \ - ${KBUILD_VMLINUX_OBJS} \ - --no-whole-archive \ - --start-group \ - ${KBUILD_VMLINUX_LIBS} \ - --end-group \ - ${@}" + if [ -n "${CONFIG_LTO_CLANG}" ]; then + # Use vmlinux.o instead of performing the slow LTO + # link again. + objects="--whole-archive \ + vmlinux.o \ + --no-whole-archive \ + ${@}" + else + objects="--whole-archive \ + ${KBUILD_VMLINUX_OBJS} \ + --no-whole-archive \ + --start-group \ + ${KBUILD_VMLINUX_LIBS} \ + --end-group \ + ${@}" + fi ${LD} ${KBUILD_LDFLAGS} ${LDFLAGS_vmlinux} \ ${strip_debug#-Wl,} \ @@ -274,7 +291,6 @@ fi; ${MAKE} -f "${srctree}/scripts/Makefile.build" obj=init need-builtin=1 #link vmlinux.o -info LD vmlinux.o modpost_link vmlinux.o objtool_link vmlinux.o -- cgit v1.2.3 From 38e89184900385b0dad1ee55c35ae8abcfee6ece Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Fri, 11 Dec 2020 10:46:20 -0800 Subject: kbuild: lto: fix module versioning With CONFIG_MODVERSIONS, version information is linked into each compilation unit that exports symbols. With LTO, we cannot use this method as all C code is compiled into LLVM bitcode instead. This change collects symbol versions into .symversions files and merges them in link-vmlinux.sh where they are all linked into vmlinux.o at the same time. Signed-off-by: Sami Tolvanen Reviewed-by: Kees Cook Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20201211184633.3213045-4-samitolvanen@google.com --- .gitignore | 1 + Makefile | 3 ++- arch/Kconfig | 1 - scripts/Makefile.build | 33 +++++++++++++++++++++++++++++++-- scripts/Makefile.modpost | 6 +++++- scripts/link-vmlinux.sh | 23 ++++++++++++++++++++++- 6 files changed, 61 insertions(+), 6 deletions(-) (limited to 'scripts') diff --git a/.gitignore b/.gitignore index d01cda8e1177..44e34991875e 100644 --- a/.gitignore +++ b/.gitignore @@ -41,6 +41,7 @@ *.so.dbg *.su *.symtypes +*.symversions *.tab.[ch] *.tar *.xz diff --git a/Makefile b/Makefile index 2b9bcecd6ea1..173a7c9bd1fb 100644 --- a/Makefile +++ b/Makefile @@ -1837,7 +1837,8 @@ clean: $(clean-dirs) -o -name '.tmp_*.o.*' \ -o -name '*.c.[012]*.*' \ -o -name '*.ll' \ - -o -name '*.gcno' \) -type f -print | xargs rm -f + -o -name '*.gcno' \ + -o -name '*.*.symversions' \) -type f -print | xargs rm -f # Generate tags for editors # --------------------------------------------------------------------------- diff --git a/arch/Kconfig b/arch/Kconfig index 9494e3931f4b..4f2f045d288e 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -668,7 +668,6 @@ config HAS_LTO_CLANG depends on !FTRACE_MCOUNT_USE_RECORDMCOUNT depends on !KASAN depends on !GCOV_KERNEL - depends on !MODVERSIONS help The compiler and Kconfig options support building with Clang's LTO. diff --git a/scripts/Makefile.build b/scripts/Makefile.build index 65d4bea937fa..d94fc9ab819d 100644 --- a/scripts/Makefile.build +++ b/scripts/Makefile.build @@ -166,6 +166,15 @@ ifdef CONFIG_MODVERSIONS # the actual value of the checksum generated by genksyms # o remove .tmp_.o to .o +ifdef CONFIG_LTO_CLANG +# Generate .o.symversions files for each .o with exported symbols, and link these +# to the kernel and/or modules at the end. +cmd_modversions_c = \ + if $(NM) $@ 2>/dev/null | grep -q __ksymtab; then \ + $(call cmd_gensymtypes_c,$(KBUILD_SYMTYPES),$(@:.o=.symtypes)) \ + > $@.symversions; \ + fi; +else cmd_modversions_c = \ if $(OBJDUMP) -h $@ | grep -q __ksymtab; then \ $(call cmd_gensymtypes_c,$(KBUILD_SYMTYPES),$(@:.o=.symtypes)) \ @@ -177,6 +186,7 @@ cmd_modversions_c = \ rm -f $(@D)/.tmp_$(@F:.o=.ver); \ fi endif +endif ifdef CONFIG_FTRACE_MCOUNT_USE_RECORDMCOUNT # compiler will not generate __mcount_loc use recordmcount or recordmcount.pl @@ -386,6 +396,18 @@ $(obj)/%.asn1.c $(obj)/%.asn1.h: $(src)/%.asn1 $(objtree)/scripts/asn1_compiler $(subdir-builtin): $(obj)/%/built-in.a: $(obj)/% ; $(subdir-modorder): $(obj)/%/modules.order: $(obj)/% ; +# combine symversions for later processing +quiet_cmd_update_lto_symversions = SYMVER $@ +ifeq ($(CONFIG_LTO_CLANG) $(CONFIG_MODVERSIONS),y y) + cmd_update_lto_symversions = \ + rm -f $@.symversions \ + $(foreach n, $(filter-out FORCE,$^), \ + $(if $(wildcard $(n).symversions), \ + ; cat $(n).symversions >> $@.symversions)) +else + cmd_update_lto_symversions = echo >/dev/null +endif + # # Rule to compile a set of .o files into one .a file (without symbol table) # @@ -393,8 +415,11 @@ $(subdir-modorder): $(obj)/%/modules.order: $(obj)/% ; quiet_cmd_ar_builtin = AR $@ cmd_ar_builtin = rm -f $@; $(AR) cDPrST $@ $(real-prereqs) +quiet_cmd_ar_and_symver = AR $@ + cmd_ar_and_symver = $(cmd_update_lto_symversions); $(cmd_ar_builtin) + $(obj)/built-in.a: $(real-obj-y) FORCE - $(call if_changed,ar_builtin) + $(call if_changed,ar_and_symver) # # Rule to create modules.order file @@ -414,8 +439,11 @@ $(obj)/modules.order: $(obj-m) FORCE # # Rule to compile a set of .o files into one .a file (with symbol table) # +quiet_cmd_ar_lib = AR $@ + cmd_ar_lib = $(cmd_update_lto_symversions); $(cmd_ar) + $(obj)/lib.a: $(lib-y) FORCE - $(call if_changed,ar) + $(call if_changed,ar_lib) # NOTE: # Do not replace $(filter %.o,^) with $(real-prereqs). When a single object @@ -424,6 +452,7 @@ $(obj)/lib.a: $(lib-y) FORCE ifdef CONFIG_LTO_CLANG quiet_cmd_link_multi-m = AR [M] $@ cmd_link_multi-m = \ + $(cmd_update_lto_symversions); \ rm -f $@; \ $(AR) cDPrsT $@ $(filter %.o,$^) else diff --git a/scripts/Makefile.modpost b/scripts/Makefile.modpost index 9ff8bfdb574d..066beffca09a 100644 --- a/scripts/Makefile.modpost +++ b/scripts/Makefile.modpost @@ -111,7 +111,11 @@ ifdef CONFIG_LTO_CLANG prelink-ext := .lto quiet_cmd_cc_lto_link_modules = LTO [M] $@ -cmd_cc_lto_link_modules = $(LD) $(ld_flags) -r -o $@ --whole-archive $^ +cmd_cc_lto_link_modules = \ + $(LD) $(ld_flags) -r -o $@ \ + $(shell [ -s $(@:.lto.o=.o.symversions) ] && \ + echo -T $(@:.lto.o=.o.symversions)) \ + --whole-archive $^ %.lto.o: %.o $(call if_changed,cc_lto_link_modules) diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh index 596507573a48..78e55fe7210b 100755 --- a/scripts/link-vmlinux.sh +++ b/scripts/link-vmlinux.sh @@ -43,11 +43,26 @@ info() fi } +# If CONFIG_LTO_CLANG is selected, collect generated symbol versions into +# .tmp_symversions.lds +gen_symversions() +{ + info GEN .tmp_symversions.lds + rm -f .tmp_symversions.lds + + for o in ${KBUILD_VMLINUX_OBJS} ${KBUILD_VMLINUX_LIBS}; do + if [ -f ${o}.symversions ]; then + cat ${o}.symversions >> .tmp_symversions.lds + fi + done +} + # Link of vmlinux.o used for section mismatch analysis # ${1} output file modpost_link() { local objects + local lds="" objects="--whole-archive \ ${KBUILD_VMLINUX_OBJS} \ @@ -57,6 +72,11 @@ modpost_link() --end-group" if [ -n "${CONFIG_LTO_CLANG}" ]; then + if [ -n "${CONFIG_MODVERSIONS}" ]; then + gen_symversions + lds="${lds} -T .tmp_symversions.lds" + fi + # This might take a while, so indicate that we're doing # an LTO link info LTO ${1} @@ -64,7 +84,7 @@ modpost_link() info LD ${1} fi - ${LD} ${KBUILD_LDFLAGS} -r -o ${1} ${objects} + ${LD} ${KBUILD_LDFLAGS} -r -o ${1} ${lds} ${objects} } objtool_link() @@ -242,6 +262,7 @@ cleanup() { rm -f .btf.* rm -f .tmp_System.map + rm -f .tmp_symversions.lds rm -f .tmp_vmlinux* rm -f System.map rm -f vmlinux -- cgit v1.2.3 From dd2776222abb9893e5b5c237a2c8c880d8854cee Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Fri, 11 Dec 2020 10:46:22 -0800 Subject: kbuild: lto: merge module sections LLD always splits sections with LTO, which increases module sizes. This change adds linker script rules to merge the split sections in the final module. Suggested-by: Nick Desaulniers Signed-off-by: Sami Tolvanen Reviewed-by: Kees Cook Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20201211184633.3213045-6-samitolvanen@google.com --- scripts/module.lds.S | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'scripts') diff --git a/scripts/module.lds.S b/scripts/module.lds.S index 69b9b71a6a47..18d5b8423635 100644 --- a/scripts/module.lds.S +++ b/scripts/module.lds.S @@ -23,6 +23,30 @@ SECTIONS { .init_array 0 : ALIGN(8) { *(SORT(.init_array.*)) *(.init_array) } __jump_table 0 : ALIGN(8) { KEEP(*(__jump_table)) } + + __patchable_function_entries : { *(__patchable_function_entries) } + + /* + * With CONFIG_LTO_CLANG, LLD always enables -fdata-sections and + * -ffunction-sections, which increases the size of the final module. + * Merge the split sections in the final binary. + */ + .bss : { + *(.bss .bss.[0-9a-zA-Z_]*) + *(.bss..L*) + } + + .data : { + *(.data .data.[0-9a-zA-Z_]*) + *(.data..L*) + } + + .rodata : { + *(.rodata .rodata.[0-9a-zA-Z_]*) + *(.rodata..L*) + } + + .text : { *(.text .text.[0-9a-zA-Z_]*) } } /* bring in arch-specific sections */ -- cgit v1.2.3 From fbe078d397b4d59232f05fde977d3b1e7d0c2028 Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Fri, 11 Dec 2020 10:46:23 -0800 Subject: kbuild: lto: add a default list of used symbols With CONFIG_LTO_CLANG, LLVM bitcode has not yet been compiled into a binary when the .mod files are generated, which means they don't yet contain references to certain symbols that will be present in the final binaries. This includes intrinsic functions, such as memcpy, memmove, and memset [1], and stack protector symbols [2]. This change adds a default symbol list to use with CONFIG_TRIM_UNUSED_KSYMS when Clang's LTO is used. [1] https://llvm.org/docs/LangRef.html#standard-c-c-library-intrinsics [2] https://llvm.org/docs/LangRef.html#llvm-stackprotector-intrinsic Signed-off-by: Sami Tolvanen Reviewed-by: Kees Cook Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20201211184633.3213045-7-samitolvanen@google.com --- init/Kconfig | 1 + scripts/lto-used-symbollist.txt | 5 +++++ 2 files changed, 6 insertions(+) create mode 100644 scripts/lto-used-symbollist.txt (limited to 'scripts') diff --git a/init/Kconfig b/init/Kconfig index b77c60f8b963..5271138b8742 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -2297,6 +2297,7 @@ config TRIM_UNUSED_KSYMS config UNUSED_KSYMS_WHITELIST string "Whitelist of symbols to keep in ksymtab" depends on TRIM_UNUSED_KSYMS + default "scripts/lto-used-symbollist.txt" if LTO_CLANG help By default, all unused exported symbols will be un-exported from the build when TRIM_UNUSED_KSYMS is selected. diff --git a/scripts/lto-used-symbollist.txt b/scripts/lto-used-symbollist.txt new file mode 100644 index 000000000000..38e7bb9ebaae --- /dev/null +++ b/scripts/lto-used-symbollist.txt @@ -0,0 +1,5 @@ +memcpy +memmove +memset +__stack_chk_fail +__stack_chk_guard -- cgit v1.2.3 From a8cccdd954732a558d481407ab7c3106b89c34ae Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Fri, 11 Dec 2020 10:46:24 -0800 Subject: init: lto: ensure initcall ordering With LTO, the compiler doesn't necessarily obey the link order for initcalls, and initcall variables need globally unique names to avoid collisions at link time. This change exports __KBUILD_MODNAME and adds the initcall_id() macro, which uses it together with __COUNTER__ and __LINE__ to help ensure these variables have unique names, and moves each variable to its own section when LTO is enabled, so the correct order can be specified using a linker script. The generate_initcall_ordering.pl script uses nm to find initcalls from the object files passed to the linker, and generates a linker script that specifies the same order for initcalls that we would have without LTO. With LTO enabled, the script is called in link-vmlinux.sh through jobserver-exec to limit the number of jobs spawned. Signed-off-by: Sami Tolvanen Reviewed-by: Kees Cook Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20201211184633.3213045-8-samitolvanen@google.com --- include/linux/init.h | 52 ++++++- scripts/Makefile.lib | 6 +- scripts/generate_initcall_order.pl | 270 +++++++++++++++++++++++++++++++++++++ scripts/link-vmlinux.sh | 15 +++ 4 files changed, 334 insertions(+), 9 deletions(-) create mode 100755 scripts/generate_initcall_order.pl (limited to 'scripts') diff --git a/include/linux/init.h b/include/linux/init.h index e668832ef66a..a57bf0dfd75f 100644 --- a/include/linux/init.h +++ b/include/linux/init.h @@ -184,19 +184,57 @@ extern bool initcall_debug; * as KEEP() in the linker script. */ +/* Format: ____ */ +#define __initcall_id(fn) \ + __PASTE(__KBUILD_MODNAME, \ + __PASTE(__, \ + __PASTE(__COUNTER__, \ + __PASTE(_, \ + __PASTE(__LINE__, \ + __PASTE(_, fn)))))) + +/* Format: ____ */ +#define __initcall_name(prefix, __iid, id) \ + __PASTE(__, \ + __PASTE(prefix, \ + __PASTE(__, \ + __PASTE(__iid, id)))) + +#ifdef CONFIG_LTO_CLANG +/* + * With LTO, the compiler doesn't necessarily obey link order for + * initcalls. In order to preserve the correct order, we add each + * variable into its own section and generate a linker script (in + * scripts/link-vmlinux.sh) to specify the order of the sections. + */ +#define __initcall_section(__sec, __iid) \ + #__sec ".init.." #__iid +#else +#define __initcall_section(__sec, __iid) \ + #__sec ".init" +#endif + #ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS -#define ___define_initcall(fn, id, __sec) \ +#define ____define_initcall(fn, __name, __sec) \ __ADDRESSABLE(fn) \ - asm(".section \"" #__sec ".init\", \"a\" \n" \ - "__initcall_" #fn #id ": \n" \ + asm(".section \"" __sec "\", \"a\" \n" \ + __stringify(__name) ": \n" \ ".long " #fn " - . \n" \ ".previous \n"); #else -#define ___define_initcall(fn, id, __sec) \ - static initcall_t __initcall_##fn##id __used \ - __attribute__((__section__(#__sec ".init"))) = fn; +#define ____define_initcall(fn, __name, __sec) \ + static initcall_t __name __used \ + __attribute__((__section__(__sec))) = fn; #endif +#define __unique_initcall(fn, id, __sec, __iid) \ + ____define_initcall(fn, \ + __initcall_name(initcall, __iid, id), \ + __initcall_section(__sec, __iid)) + +#define ___define_initcall(fn, id, __sec) \ + __unique_initcall(fn, id, __sec, __initcall_id(fn)) + #define __define_initcall(fn, id) ___define_initcall(fn, id, .initcall##id) /* @@ -236,7 +274,7 @@ extern bool initcall_debug; #define __exitcall(fn) \ static exitcall_t __exitcall_##fn __exit_call = fn -#define console_initcall(fn) ___define_initcall(fn,, .con_initcall) +#define console_initcall(fn) ___define_initcall(fn, con, .con_initcall) struct obs_kernel_param { const char *str; diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib index 213677a5ed33..3ff3dbb3a830 100644 --- a/scripts/Makefile.lib +++ b/scripts/Makefile.lib @@ -117,9 +117,11 @@ target-stem = $(basename $(patsubst $(obj)/%,%,$@)) # These flags are needed for modversions and compiling, so we define them here # $(modname_flags) defines KBUILD_MODNAME as the name of the module it will # end up in (or would, if it gets compiled in) -name-fix = $(call stringify,$(subst $(comma),_,$(subst -,_,$1))) +name-fix-token = $(subst $(comma),_,$(subst -,_,$1)) +name-fix = $(call stringify,$(call name-fix-token,$1)) basename_flags = -DKBUILD_BASENAME=$(call name-fix,$(basetarget)) -modname_flags = -DKBUILD_MODNAME=$(call name-fix,$(modname)) +modname_flags = -DKBUILD_MODNAME=$(call name-fix,$(modname)) \ + -D__KBUILD_MODNAME=kmod_$(call name-fix-token,$(modname)) modfile_flags = -DKBUILD_MODFILE=$(call stringify,$(modfile)) _c_flags = $(filter-out $(CFLAGS_REMOVE_$(target-stem).o), \ diff --git a/scripts/generate_initcall_order.pl b/scripts/generate_initcall_order.pl new file mode 100755 index 000000000000..1a88d3f1b913 --- /dev/null +++ b/scripts/generate_initcall_order.pl @@ -0,0 +1,270 @@ +#!/usr/bin/env perl +# SPDX-License-Identifier: GPL-2.0 +# +# Generates a linker script that specifies the correct initcall order. +# +# Copyright (C) 2019 Google LLC + +use strict; +use warnings; +use IO::Handle; +use IO::Select; +use POSIX ":sys_wait_h"; + +my $nm = $ENV{'NM'} || die "$0: ERROR: NM not set?"; +my $objtree = $ENV{'objtree'} || '.'; + +## currently active child processes +my $jobs = {}; # child process pid -> file handle +## results from child processes +my $results = {}; # object index -> [ { level, secname }, ... ] + +## reads _NPROCESSORS_ONLN to determine the maximum number of processes to +## start +sub get_online_processors { + open(my $fh, "getconf _NPROCESSORS_ONLN 2>/dev/null |") + or die "$0: ERROR: failed to execute getconf: $!"; + my $procs = <$fh>; + close($fh); + + if (!($procs =~ /^\d+$/)) { + return 1; + } + + return int($procs); +} + +## writes results to the parent process +## format: +sub write_results { + my ($index, $initcalls) = @_; + + # sort by the counter value to ensure the order of initcalls within + # each object file is correct + foreach my $counter (sort { $a <=> $b } keys(%{$initcalls})) { + my $level = $initcalls->{$counter}->{'level'}; + + # section name for the initcall function + my $secname = $initcalls->{$counter}->{'module'} . '__' . + $counter . '_' . + $initcalls->{$counter}->{'line'} . '_' . + $initcalls->{$counter}->{'function'}; + + print "$index $level $secname\n"; + } +} + +## reads a result line from a child process and adds it to the $results array +sub read_results{ + my ($fh) = @_; + + # each child prints out a full line w/ autoflush and exits after the + # last line, so even if buffered I/O blocks here, it shouldn't block + # very long + my $data = <$fh>; + + if (!defined($data)) { + return 0; + } + + chomp($data); + + my ($index, $level, $secname) = $data =~ + /^(\d+)\ ([^\ ]+)\ (.*)$/; + + if (!defined($index) || + !defined($level) || + !defined($secname)) { + die "$0: ERROR: child process returned invalid data: $data\n"; + } + + $index = int($index); + + if (!exists($results->{$index})) { + $results->{$index} = []; + } + + push (@{$results->{$index}}, { + 'level' => $level, + 'secname' => $secname + }); + + return 1; +} + +## finds initcalls from an object file or all object files in an archive, and +## writes results back to the parent process +sub find_initcalls { + my ($index, $file) = @_; + + die "$0: ERROR: file $file doesn't exist?" if (! -f $file); + + open(my $fh, "\"$nm\" --defined-only \"$file\" 2>/dev/null |") + or die "$0: ERROR: failed to execute \"$nm\": $!"; + + my $initcalls = {}; + + while (<$fh>) { + chomp; + + # check for the start of a new object file (if processing an + # archive) + my ($path)= $_ =~ /^(.+)\:$/; + + if (defined($path)) { + write_results($index, $initcalls); + $initcalls = {}; + next; + } + + # look for an initcall + my ($module, $counter, $line, $symbol) = $_ =~ + /[a-z]\s+__initcall__(\S*)__(\d+)_(\d+)_(.*)$/; + + if (!defined($module)) { + $module = '' + } + + if (!defined($counter) || + !defined($line) || + !defined($symbol)) { + next; + } + + # parse initcall level + my ($function, $level) = $symbol =~ + /^(.*)((early|rootfs|con|[0-9])s?)$/; + + die "$0: ERROR: invalid initcall name $symbol in $file($path)" + if (!defined($function) || !defined($level)); + + $initcalls->{$counter} = { + 'module' => $module, + 'line' => $line, + 'function' => $function, + 'level' => $level, + }; + } + + close($fh); + write_results($index, $initcalls); +} + +## waits for any child process to complete, reads the results, and adds them to +## the $results array for later processing +sub wait_for_results { + my ($select) = @_; + + my $pid = 0; + do { + # unblock children that may have a full write buffer + foreach my $fh ($select->can_read(0)) { + read_results($fh); + } + + # check for children that have exited, read the remaining data + # from them, and clean up + $pid = waitpid(-1, WNOHANG); + if ($pid > 0) { + if (!exists($jobs->{$pid})) { + next; + } + + my $fh = $jobs->{$pid}; + $select->remove($fh); + + while (read_results($fh)) { + # until eof + } + + close($fh); + delete($jobs->{$pid}); + } + } while ($pid > 0); +} + +## forks a child to process each file passed in the command line and collects +## the results +sub process_files { + my $index = 0; + my $njobs = $ENV{'PARALLELISM'} || get_online_processors(); + my $select = IO::Select->new(); + + while (my $file = shift(@ARGV)) { + # fork a child process and read it's stdout + my $pid = open(my $fh, '-|'); + + if (!defined($pid)) { + die "$0: ERROR: failed to fork: $!"; + } elsif ($pid) { + # save the child process pid and the file handle + $select->add($fh); + $jobs->{$pid} = $fh; + } else { + # in the child process + STDOUT->autoflush(1); + find_initcalls($index, "$objtree/$file"); + exit; + } + + $index++; + + # limit the number of children to $njobs + if (scalar(keys(%{$jobs})) >= $njobs) { + wait_for_results($select); + } + } + + # wait for the remaining children to complete + while (scalar(keys(%{$jobs})) > 0) { + wait_for_results($select); + } +} + +sub generate_initcall_lds() { + process_files(); + + my $sections = {}; # level -> [ secname, ...] + + # sort results to retain link order and split to sections per + # initcall level + foreach my $index (sort { $a <=> $b } keys(%{$results})) { + foreach my $result (@{$results->{$index}}) { + my $level = $result->{'level'}; + + if (!exists($sections->{$level})) { + $sections->{$level} = []; + } + + push(@{$sections->{$level}}, $result->{'secname'}); + } + } + + die "$0: ERROR: no initcalls?" if (!keys(%{$sections})); + + # print out a linker script that defines the order of initcalls for + # each level + print "SECTIONS {\n"; + + foreach my $level (sort(keys(%{$sections}))) { + my $section; + + if ($level eq 'con') { + $section = '.con_initcall.init'; + } else { + $section = ".initcall${level}.init"; + } + + print "\t${section} : {\n"; + + foreach my $secname (@{$sections->{$level}}) { + print "\t\t*(${section}..${secname}) ;\n"; + } + + print "\t}\n"; + } + + print "}\n"; +} + +generate_initcall_lds(); diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh index 78e55fe7210b..c5919d5a0b4f 100755 --- a/scripts/link-vmlinux.sh +++ b/scripts/link-vmlinux.sh @@ -43,6 +43,17 @@ info() fi } +# Generate a linker script to ensure correct ordering of initcalls. +gen_initcalls() +{ + info GEN .tmp_initcalls.lds + + ${PYTHON} ${srctree}/scripts/jobserver-exec \ + ${PERL} ${srctree}/scripts/generate_initcall_order.pl \ + ${KBUILD_VMLINUX_OBJS} ${KBUILD_VMLINUX_LIBS} \ + > .tmp_initcalls.lds +} + # If CONFIG_LTO_CLANG is selected, collect generated symbol versions into # .tmp_symversions.lds gen_symversions() @@ -72,6 +83,9 @@ modpost_link() --end-group" if [ -n "${CONFIG_LTO_CLANG}" ]; then + gen_initcalls + lds="-T .tmp_initcalls.lds" + if [ -n "${CONFIG_MODVERSIONS}" ]; then gen_symversions lds="${lds} -T .tmp_symversions.lds" @@ -262,6 +276,7 @@ cleanup() { rm -f .btf.* rm -f .tmp_System.map + rm -f .tmp_initcalls.lds rm -f .tmp_symversions.lds rm -f .tmp_vmlinux* rm -f System.map -- cgit v1.2.3 From 7ac204b545f263c7595a8b5ec3797ae9f9954f82 Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Fri, 11 Dec 2020 10:46:27 -0800 Subject: modpost: lto: strip .lto from module names With LTO, everything is compiled into LLVM bitcode, so we have to link each module into native code before modpost. Kbuild uses the .lto.o suffix for these files, which also ends up in module information. This change strips the unnecessary .lto suffix from the module name. Suggested-by: Bill Wendling Signed-off-by: Sami Tolvanen Reviewed-by: Kees Cook Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20201211184633.3213045-11-samitolvanen@google.com --- scripts/mod/modpost.c | 16 +++++++--------- scripts/mod/modpost.h | 9 +++++++++ scripts/mod/sumversion.c | 6 +++++- 3 files changed, 21 insertions(+), 10 deletions(-) (limited to 'scripts') diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index d6c81657d695..be0642126f53 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -17,7 +17,6 @@ #include #include #include -#include #include #include "modpost.h" #include "../../include/linux/license.h" @@ -84,14 +83,6 @@ modpost_log(enum loglevel loglevel, const char *fmt, ...) error_occurred = true; } -static inline bool strends(const char *str, const char *postfix) -{ - if (strlen(str) < strlen(postfix)) - return false; - - return strcmp(str + strlen(str) - strlen(postfix), postfix) == 0; -} - void *do_nofail(void *ptr, const char *expr) { if (!ptr) @@ -1988,6 +1979,10 @@ static char *remove_dot(char *s) size_t m = strspn(s + n + 1, "0123456789"); if (m && (s[n + m] == '.' || s[n + m] == 0)) s[n] = 0; + + /* strip trailing .lto */ + if (strends(s, ".lto")) + s[strlen(s) - 4] = '\0'; } return s; } @@ -2011,6 +2006,9 @@ static void read_symbols(const char *modname) /* strip trailing .o */ tmp = NOFAIL(strdup(modname)); tmp[strlen(tmp) - 2] = '\0'; + /* strip trailing .lto */ + if (strends(tmp, ".lto")) + tmp[strlen(tmp) - 4] = '\0'; mod = new_module(tmp); free(tmp); } diff --git a/scripts/mod/modpost.h b/scripts/mod/modpost.h index e6f46eee0af0..1f76eea7cfde 100644 --- a/scripts/mod/modpost.h +++ b/scripts/mod/modpost.h @@ -2,6 +2,7 @@ #include #include #include +#include #include #include #include @@ -180,6 +181,14 @@ static inline unsigned int get_secindex(const struct elf_info *info, return info->symtab_shndx_start[sym - info->symtab_start]; } +static inline bool strends(const char *str, const char *postfix) +{ + if (strlen(str) < strlen(postfix)) + return false; + + return strcmp(str + strlen(str) - strlen(postfix), postfix) == 0; +} + /* file2alias.c */ extern unsigned int cross_build; void handle_moddevtable(struct module *mod, struct elf_info *info, diff --git a/scripts/mod/sumversion.c b/scripts/mod/sumversion.c index d587f40f1117..760e6baa7eda 100644 --- a/scripts/mod/sumversion.c +++ b/scripts/mod/sumversion.c @@ -391,10 +391,14 @@ void get_src_version(const char *modname, char sum[], unsigned sumlen) struct md4_ctx md; char *fname; char filelist[PATH_MAX + 1]; + int postfix_len = 1; + + if (strends(modname, ".lto.o")) + postfix_len = 5; /* objects for a module are listed in the first line of *.mod file. */ snprintf(filelist, sizeof(filelist), "%.*smod", - (int)strlen(modname) - 1, modname); + (int)strlen(modname) - postfix_len, modname); buf = read_text_file(filelist); -- cgit v1.2.3 From d23dddf86a12c96d9a3d135a567b9716e5b47821 Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Fri, 11 Dec 2020 10:46:28 -0800 Subject: scripts/mod: disable LTO for empty.c With CONFIG_LTO_CLANG, clang generates LLVM IR instead of ELF object files. As empty.o is used for probing target properties, disable LTO for it to produce an object file instead. Signed-off-by: Sami Tolvanen Reviewed-by: Kees Cook Signed-off-by: Kees Cook Link: https://lore.kernel.org/r/20201211184633.3213045-12-samitolvanen@google.com --- scripts/mod/Makefile | 1 + 1 file changed, 1 insertion(+) (limited to 'scripts') diff --git a/scripts/mod/Makefile b/scripts/mod/Makefile index 78071681d924..c9e38ad937fd 100644 --- a/scripts/mod/Makefile +++ b/scripts/mod/Makefile @@ -1,5 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 OBJECT_FILES_NON_STANDARD := y +CFLAGS_REMOVE_empty.o += $(CC_FLAGS_LTO) hostprogs-always-y += modpost mk_elfconfig always-y += empty.o -- cgit v1.2.3