Message ID | 20250210142647.083ff456@gandalf.local.home (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | arm64: scripts/sorttable: Implement sorting mcount_loc at boot for arm64 | expand |
Hi Steven, kernel test robot noticed the following build warnings: [auto build test WARNING on linus/master] [also build test WARNING on v6.14-rc2 next-20250210] [cannot apply to arm64/for-next/core arm-perf/for-next/perf kvmarm/next soc/for-next arm/for-next arm/fixes] [If your patch is applied to the wrong git tree, kindly drop us a note. And when submitting patch, we suggest to use '--base' as documented in https://git-scm.com/docs/git-format-patch#_base_tree_information] url: https://github.com/intel-lab-lkp/linux/commits/Steven-Rostedt/arm64-scripts-sorttable-Implement-sorting-mcount_loc-at-boot-for-arm64/20250211-032956 base: linus/master patch link: https://lore.kernel.org/r/20250210142647.083ff456%40gandalf.local.home patch subject: [PATCH] arm64: scripts/sorttable: Implement sorting mcount_loc at boot for arm64 config: riscv-randconfig-001-20250211 (https://download.01.org/0day-ci/archive/20250211/202502111626.K7CTghCR-lkp@intel.com/config) compiler: riscv64-linux-gcc (GCC) 14.2.0 reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20250211/202502111626.K7CTghCR-lkp@intel.com/reproduce) If you fix the issue in a separate patch/commit (i.e. not just a new version of the same patch/commit), kindly add following tags | Reported-by: kernel test robot <lkp@intel.com> | Closes: https://lore.kernel.org/oe-kbuild-all/202502111626.K7CTghCR-lkp@intel.com/ All warnings (new ones prefixed by >>): >> scripts/sorttable.c:306:13: warning: 'rela_write_addend' defined but not used [-Wunused-function] 306 | static void rela_write_addend(Elf_Rela *rela, uint64_t val) | ^~~~~~~~~~~~~~~~~ >> scripts/sorttable.c:287:17: warning: 'rela_addend' defined but not used [-Wunused-function] 287 | static uint64_t rela_##fn_name(Elf_Rela *rela) \ | ^~~~~ scripts/sorttable.c:294:1: note: in expansion of macro 'RELA_ADDR' 294 | RELA_ADDR(addend) | ^~~~~~~~~ >> scripts/sorttable.c:287:17: warning: 'rela_info' defined but not used [-Wunused-function] 287 | static uint64_t rela_##fn_name(Elf_Rela *rela) \ | ^~~~~ scripts/sorttable.c:293:1: note: in expansion of macro 'RELA_ADDR' 293 | RELA_ADDR(info) | ^~~~~~~~~ >> scripts/sorttable.c:287:17: warning: 'rela_offset' defined but not used [-Wunused-function] 287 | static uint64_t rela_##fn_name(Elf_Rela *rela) \ | ^~~~~ scripts/sorttable.c:292:1: note: in expansion of macro 'RELA_ADDR' 292 | RELA_ADDR(offset) | ^~~~~~~~~ In file included from arch/riscv/kernel/asm-offsets.c:10: include/linux/ftrace.h: In function 'ftrace_get_regs': include/linux/ftrace.h:190:16: error: implicit declaration of function 'arch_ftrace_get_regs'; did you mean 'arch_ftrace_regs'? [-Wimplicit-function-declaration] 190 | return arch_ftrace_get_regs(fregs); | ^~~~~~~~~~~~~~~~~~~~ | arch_ftrace_regs include/linux/ftrace.h:190:16: error: returning 'int' from a function with return type 'struct pt_regs *' makes pointer from integer without a cast [-Wint-conversion] 190 | return arch_ftrace_get_regs(fregs); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~ make[3]: *** [scripts/Makefile.build:102: arch/riscv/kernel/asm-offsets.s] Error 1 shuffle=1925395641 make[3]: Target 'prepare' not remade because of errors. make[2]: *** [Makefile:1264: prepare0] Error 2 shuffle=1925395641 make[2]: Target 'prepare' not remade because of errors. make[1]: *** [Makefile:251: __sub-make] Error 2 shuffle=1925395641 make[1]: Target 'prepare' not remade because of errors. make: *** [Makefile:251: __sub-make] Error 2 shuffle=1925395641 make: Target 'prepare' not remade because of errors. vim +/rela_write_addend +306 scripts/sorttable.c 275 276 #define RELA_ADDR(fn_name) \ 277 static uint64_t rela64_##fn_name(Elf_Rela *rela) \ 278 { \ 279 return r8((uint64_t *)&rela->e64.r_##fn_name); \ 280 } \ 281 \ 282 static uint64_t rela32_##fn_name(Elf_Rela *rela) \ 283 { \ 284 return r((uint32_t *)&rela->e32.r_##fn_name); \ 285 } \ 286 \ > 287 static uint64_t rela_##fn_name(Elf_Rela *rela) \ 288 { \ 289 return e.rela_##fn_name(rela); \ 290 } 291 292 RELA_ADDR(offset) 293 RELA_ADDR(info) 294 RELA_ADDR(addend) 295 296 static void rela64_write_addend(Elf_Rela *rela, uint64_t val) 297 { 298 w8(val, (uint64_t *)&rela->e64.r_addend); 299 } 300 301 static void rela32_write_addend(Elf_Rela *rela, uint64_t val) 302 { 303 w(val, (uint32_t *)&rela->e32.r_addend); 304 } 305 > 306 static void rela_write_addend(Elf_Rela *rela, uint64_t val) 307 { 308 e.rela_write_addend(rela, val); 309 } 310
On Tue, 11 Feb 2025 17:07:13 +0800 kernel test robot <lkp@intel.com> wrote: > All warnings (new ones prefixed by >>): > > >> scripts/sorttable.c:306:13: warning: 'rela_write_addend' defined but not used [-Wunused-function] > 306 | static void rela_write_addend(Elf_Rela *rela, uint64_t val) > | ^~~~~~~~~~~~~~~~~ > >> scripts/sorttable.c:287:17: warning: 'rela_addend' defined but not used [-Wunused-function] > 287 | static uint64_t rela_##fn_name(Elf_Rela *rela) \ > | ^~~~~ > scripts/sorttable.c:294:1: note: in expansion of macro 'RELA_ADDR' > 294 | RELA_ADDR(addend) > | ^~~~~~~~~ The above isn't used if sorting isn't enabled. Looks like I'll have to add some "__maybe_unused" attributes. -- Steve
On Mon, Feb 10, 2025 at 02:26:47PM -0500, Steven Rostedt wrote: > For the s390 folks. I cross compiled a s390 and looked at the mcount_loc > section, and I have no idea how to implement this for that. I wrote a elf > parser to dump sections based symbols: > > https://rostedt.org/code/dump_elf_sym.c > > And ran it on the s390 vmlinux: > > $ ./dump_elf_sym vmlinux __start_mcount_loc __stop_mcount_loc > 1801620: .. .. .. .. .. .. .. .. 00 00 00 00 00 11 70 20 ......... .....p > 1801630: 00 00 00 00 00 11 70 90 00 00 00 00 00 11 70 a0 ......p.. .....p. > 1801640: 00 00 00 00 00 11 71 10 00 00 00 00 00 11 71 20 ......q.. .....q > 1801650: 00 00 00 00 00 11 71 90 00 00 00 00 01 7c 70 00 ......q.. ....|p. > 1801660: 00 00 00 00 01 7c 70 20 00 00 00 00 01 7c 70 40 .....|p . ....|p@ > 1801670: 00 00 00 00 01 7c 70 60 00 00 00 00 01 7c 70 70 .....|p`. ....|pp > 1801680: 00 00 00 00 01 7c 70 98 00 00 00 00 01 7c 70 c0 .....|p.. ....|p. > 1801690: 00 00 00 00 01 7c 70 d0 00 00 00 00 01 7c 71 68 .....|p.. ....|qh > [..] > > It looks like addresses in that section... Those are the addresses of the mcount locations. After looking at sorttable.c it really looks like that for s390 we can simply select HAVE_BUILDTIME_MCOUNT_SORT without any further changes. I just tested it with different compiler options (fentry vs hotpatch), including selecting FTRACE_SORT_STARTUP_TEST, and as expected everything works. I'm going to give it some more testing in our CI - but if nothing breaks a patch which selects HAVE_BUILDTIME_MCOUNT_SORT for s390 will go upstream with the next merge window.
On Thu, Feb 13, 2025 at 01:53:02PM +0100, Heiko Carstens wrote: > On Mon, Feb 10, 2025 at 02:26:47PM -0500, Steven Rostedt wrote: > > For the s390 folks. I cross compiled a s390 and looked at the mcount_loc > > section, and I have no idea how to implement this for that. I wrote a elf > > parser to dump sections based symbols: > > > > https://rostedt.org/code/dump_elf_sym.c > > > > And ran it on the s390 vmlinux: > > > > $ ./dump_elf_sym vmlinux __start_mcount_loc __stop_mcount_loc > > 1801620: .. .. .. .. .. .. .. .. 00 00 00 00 00 11 70 20 ......... .....p > > 1801630: 00 00 00 00 00 11 70 90 00 00 00 00 00 11 70 a0 ......p.. .....p. > > 1801640: 00 00 00 00 00 11 71 10 00 00 00 00 00 11 71 20 ......q.. .....q > > 1801650: 00 00 00 00 00 11 71 90 00 00 00 00 01 7c 70 00 ......q.. ....|p. > > 1801660: 00 00 00 00 01 7c 70 20 00 00 00 00 01 7c 70 40 .....|p . ....|p@ > > 1801670: 00 00 00 00 01 7c 70 60 00 00 00 00 01 7c 70 70 .....|p`. ....|pp > > 1801680: 00 00 00 00 01 7c 70 98 00 00 00 00 01 7c 70 c0 .....|p.. ....|p. > > 1801690: 00 00 00 00 01 7c 70 d0 00 00 00 00 01 7c 71 68 .....|p.. ....|qh > > [..] > > > > It looks like addresses in that section... > > Those are the addresses of the mcount locations. After looking at > sorttable.c it really looks like that for s390 we can simply select > HAVE_BUILDTIME_MCOUNT_SORT without any further changes. > > I just tested it with different compiler options (fentry vs hotpatch), > including selecting FTRACE_SORT_STARTUP_TEST, and as expected everything > works. > > I'm going to give it some more testing in our CI - but if nothing breaks a > patch which selects HAVE_BUILDTIME_MCOUNT_SORT for s390 will go upstream > with the next merge window. Something like this: From 0759d6b961946b7e5cfb19971d56f5493204301c Mon Sep 17 00:00:00 2001 From: Heiko Carstens <hca@linux.ibm.com> Date: Thu, 13 Feb 2025 13:57:33 +0100 Subject: [PATCH] s390: Sort mcount locations at build time For s390 the mcount_loc section of the kernel image contains the addresses of the mcount locations. All addresses will be adjusted with the same offset by the decompressor before the kernel is started. Therefore select HAVE_BUILDTIME_MCOUNT_SORT so that the entries of this section are sorted at build time. Given that the same offset is applied to all entries the section will be sorted in any case. Note that this was not possible before commit 778666df60f0 ("s390: compile relocatable kernel without -fPIE"). Since this commit all R_390_64 absolute relocations are handled in a special way: only the address of the to be changed location is put into a special section. For all those locations the same offset is applied as described above. Without that change it would have been necessary to also adjust the addend of all relocations which correspond to the mcount_loc section, when sorting the mcount_loc section. Reported-by: Steven Rostedt <rostedt@goodmis.org> Closes: https://lore.kernel.org/r/20250210142647.083ff456@gandalf.local.home/ Signed-off-by: Heiko Carstens <hca@linux.ibm.com> --- arch/s390/Kconfig | 1 + arch/s390/configs/debug_defconfig | 1 + 2 files changed, 2 insertions(+) diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 9c9ec08d78c7..acaa1d1c12b2 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -185,6 +185,7 @@ config S390 select HAVE_ARCH_TRANSPARENT_HUGEPAGE select HAVE_ARCH_VMAP_STACK select HAVE_ASM_MODVERSIONS + select HAVE_BUILDTIME_MCOUNT_SORT select HAVE_CMPXCHG_DOUBLE select HAVE_CMPXCHG_LOCAL select HAVE_DEBUG_KMEMLEAK diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index d6beec5292a0..a2b0444b7d6b 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -887,6 +887,7 @@ CONFIG_USER_EVENTS=y CONFIG_HIST_TRIGGERS=y CONFIG_FTRACE_STARTUP_TEST=y # CONFIG_EVENT_TRACE_STARTUP_TEST is not set +CONFIG_FTRACE_SORT_STARTUP_TEST=y CONFIG_SAMPLES=y CONFIG_SAMPLE_TRACE_PRINTK=m CONFIG_SAMPLE_FTRACE_DIRECT=m
On Thu, 13 Feb 2025 16:09:34 +0100
Heiko Carstens <hca@linux.ibm.com> wrote:
> Something like this:
Thanks,
I'm about to send a bunch of patches to this code that removes weak
functions. Since s390 is big-endian, it would be good for you to test them
too. I'll Cc you on them. I think I handled the endianess correctly.
Although I found a bug where if you compile le arm64 on s390 it will break
because it assumed that the endianess of the build machine was the same as
the target.
I'll be sending them shortly. Just doing some more smoke tests.
-- Steve
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index fcdd0ed3eca8..3c6c9dcd96aa 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -217,6 +217,7 @@ config ARM64 if DYNAMIC_FTRACE_WITH_ARGS select HAVE_SAMPLE_FTRACE_DIRECT select HAVE_SAMPLE_FTRACE_DIRECT_MULTI + select HAVE_BUILDTIME_MCOUNT_SORT select HAVE_EFFICIENT_UNALIGNED_ACCESS select HAVE_GUP_FAST select HAVE_FTRACE_GRAPH_FUNC diff --git a/scripts/sorttable.c b/scripts/sorttable.c index 9f41575afd7a..f75be83916a8 100644 --- a/scripts/sorttable.c +++ b/scripts/sorttable.c @@ -28,6 +28,7 @@ #include <fcntl.h> #include <stdio.h> #include <stdlib.h> +#include <stdbool.h> #include <string.h> #include <unistd.h> #include <errno.h> @@ -79,10 +80,16 @@ typedef union { Elf64_Sym e64; } Elf_Sym; +typedef union { + Elf32_Rela e32; + Elf64_Rela e64; +} Elf_Rela; + static uint32_t (*r)(const uint32_t *); static uint16_t (*r2)(const uint16_t *); static uint64_t (*r8)(const uint64_t *); static void (*w)(uint32_t, uint32_t *); +static void (*w8)(uint64_t, uint64_t *); typedef void (*table_sort_t)(char *, int); static struct elf_funcs { @@ -102,6 +109,10 @@ static struct elf_funcs { uint32_t (*sym_name)(Elf_Sym *sym); uint64_t (*sym_value)(Elf_Sym *sym); uint16_t (*sym_shndx)(Elf_Sym *sym); + uint64_t (*rela_offset)(Elf_Rela *rela); + uint64_t (*rela_info)(Elf_Rela *rela); + uint64_t (*rela_addend)(Elf_Rela *rela); + void (*rela_write_addend)(Elf_Rela *rela, uint64_t val); } e; static uint64_t ehdr64_shoff(Elf_Ehdr *ehdr) @@ -262,6 +273,41 @@ SYM_ADDR(value) SYM_WORD(name) SYM_HALF(shndx) +#define RELA_ADDR(fn_name) \ +static uint64_t rela64_##fn_name(Elf_Rela *rela) \ +{ \ + return r8((uint64_t *)&rela->e64.r_##fn_name); \ +} \ + \ +static uint64_t rela32_##fn_name(Elf_Rela *rela) \ +{ \ + return r((uint32_t *)&rela->e32.r_##fn_name); \ +} \ + \ +static uint64_t rela_##fn_name(Elf_Rela *rela) \ +{ \ + return e.rela_##fn_name(rela); \ +} + +RELA_ADDR(offset) +RELA_ADDR(info) +RELA_ADDR(addend) + +static void rela64_write_addend(Elf_Rela *rela, uint64_t val) +{ + w8(val, (uint64_t *)&rela->e64.r_addend); +} + +static void rela32_write_addend(Elf_Rela *rela, uint64_t val) +{ + w(val, (uint32_t *)&rela->e32.r_addend); +} + +static void rela_write_addend(Elf_Rela *rela, uint64_t val) +{ + e.rela_write_addend(rela, val); +} + /* * Get the whole file as a programming convenience in order to avoid * malloc+lseek+read+free of many pieces. If successful, then mmap @@ -341,6 +387,16 @@ static void wle(uint32_t val, uint32_t *x) put_unaligned_le32(val, x); } +static void w8be(uint64_t val, uint64_t *x) +{ + put_unaligned_be64(val, x); +} + +static void w8le(uint64_t val, uint64_t *x) +{ + put_unaligned_le64(val, x); +} + /* * Move reserved section indices SHN_LORESERVE..SHN_HIRESERVE out of * the way to -256..-1, to avoid conflicting with real section @@ -398,13 +454,12 @@ static inline void *get_index(void *start, int entsize, int index) static int extable_ent_size; static int long_size; +#define ERRSTR_MAXSZ 256 #ifdef UNWINDER_ORC_ENABLED /* ORC unwinder only support X86_64 */ #include <asm/orc_types.h> -#define ERRSTR_MAXSZ 256 - static char g_err[ERRSTR_MAXSZ]; static int *g_orc_ip_table; static struct orc_entry *g_orc_table; @@ -500,6 +555,11 @@ static void *sort_orctable(void *arg) #ifdef MCOUNT_SORT_ENABLED static pthread_t mcount_sort_thread; +static bool sort_reloc; + +static long rela_type; + +static char m_err[ERRSTR_MAXSZ]; struct elf_mcount_loc { Elf_Ehdr *ehdr; @@ -508,6 +568,103 @@ struct elf_mcount_loc { uint64_t stop_mcount_loc; }; +/* Sort the relocations not the address itself */ +static void *sort_relocs(Elf_Ehdr *ehdr, uint64_t start_loc, uint64_t size) +{ + Elf_Shdr *shdr_start; + Elf_Rela *rel; + unsigned int shnum; + unsigned int count; + int shentsize; + void *vals; + void *ptr; + + shdr_start = (Elf_Shdr *)((char *)ehdr + ehdr_shoff(ehdr)); + shentsize = ehdr_shentsize(ehdr); + + vals = malloc(long_size * size); + if (!vals) { + snprintf(m_err, ERRSTR_MAXSZ, "Failed to allocate sort array"); + pthread_exit(m_err); + return NULL; + } + + ptr = vals; + + shnum = ehdr_shnum(ehdr); + if (shnum == SHN_UNDEF) + shnum = shdr_size(shdr_start); + + for (int i = 0; i < shnum; i++) { + Elf_Shdr *shdr = get_index(shdr_start, shentsize, i); + void *end; + + if (shdr_type(shdr) != SHT_RELA) + continue; + + rel = (void *)ehdr + shdr_offset(shdr); + end = (void *)rel + shdr_size(shdr); + + for (; (void *)rel < end; rel = (void *)rel + shdr_entsize(shdr)) { + uint64_t offset = rela_offset(rel); + + if (offset >= start_loc && offset < start_loc + size) { + if (ptr + long_size > vals + size) { + free(vals); + snprintf(m_err, ERRSTR_MAXSZ, + "Too many relocations"); + pthread_exit(m_err); + return NULL; + } + + /* Make sure this has the correct type */ + if (rela_info(rel) != rela_type) { + free(vals); + snprintf(m_err, ERRSTR_MAXSZ, + "rela has type %lx but expected %lx\n", + (long)rela_info(rel), rela_type); + pthread_exit(m_err); + return NULL; + } + + if (long_size == 4) + *(uint32_t *)ptr = rela_addend(rel); + else + *(uint64_t *)ptr = rela_addend(rel); + ptr += long_size; + } + } + } + count = ptr - vals; + qsort(vals, count / long_size, long_size, compare_extable); + + ptr = vals; + for (int i = 0; i < shnum; i++) { + Elf_Shdr *shdr = get_index(shdr_start, shentsize, i); + void *end; + + if (shdr_type(shdr) != SHT_RELA) + continue; + + rel = (void *)ehdr + shdr_offset(shdr); + end = (void *)rel + shdr_size(shdr); + + for (; (void *)rel < end; rel = (void *)rel + shdr_entsize(shdr)) { + uint64_t offset = rela_offset(rel); + + if (offset >= start_loc && offset < start_loc + size) { + if (long_size == 4) + rela_write_addend(rel, *(uint32_t *)ptr); + else + rela_write_addend(rel, *(uint64_t *)ptr); + ptr += long_size; + } + } + } + free(vals); + return NULL; +} + /* Sort the addresses stored between __start_mcount_loc to __stop_mcount_loc in vmlinux */ static void *sort_mcount_loc(void *arg) { @@ -517,6 +674,9 @@ static void *sort_mcount_loc(void *arg) uint64_t count = emloc->stop_mcount_loc - emloc->start_mcount_loc; unsigned char *start_loc = (void *)emloc->ehdr + offset; + if (sort_reloc) + return sort_relocs(emloc->ehdr, emloc->start_mcount_loc, count); + qsort(start_loc, count/long_size, long_size, compare_extable); return NULL; } @@ -866,12 +1026,14 @@ static int do_file(char const *const fname, void *addr) r2 = r2le; r8 = r8le; w = wle; + w8 = w8le; break; case ELFDATA2MSB: r = rbe; r2 = r2be; r8 = r8be; w = wbe; + w8 = w8be; break; default: fprintf(stderr, "unrecognized ELF data encoding %d: %s\n", @@ -887,8 +1049,13 @@ static int do_file(char const *const fname, void *addr) } switch (r2(&ehdr->e32.e_machine)) { - case EM_386: case EM_AARCH64: +#ifdef MCOUNT_SORT_ENABLED + sort_reloc = true; + rela_type = 0x403; +#endif + /* fallthrough */ + case EM_386: case EM_LOONGARCH: case EM_RISCV: case EM_S390: @@ -932,6 +1099,10 @@ static int do_file(char const *const fname, void *addr) .sym_name = sym32_name, .sym_value = sym32_value, .sym_shndx = sym32_shndx, + .rela_offset = rela32_offset, + .rela_info = rela32_info, + .rela_addend = rela32_addend, + .rela_write_addend = rela32_write_addend, }; e = efuncs; @@ -965,6 +1136,10 @@ static int do_file(char const *const fname, void *addr) .sym_name = sym64_name, .sym_value = sym64_value, .sym_shndx = sym64_shndx, + .rela_offset = rela64_offset, + .rela_info = rela64_info, + .rela_addend = rela64_addend, + .rela_write_addend = rela64_write_addend, }; e = efuncs;