diff mbox series

modpost: Optimize symbol search from linear to binary search

Message ID 20230918210631.3882376-1-jbrennen@google.com (mailing list archive)
State New, archived
Headers show
Series modpost: Optimize symbol search from linear to binary search | expand

Commit Message

Jack Brennen Sept. 18, 2023, 9:06 p.m. UTC
Modify modpost to use binary search for converting addresses back
into symbol references.  Previously it used linear search.

This change saves a few seconds of wall time for defconfig builds,
but can save several minutes on allyesconfigs.

Before:
$ make LLVM=1 -j128 allyesconfig vmlinux -s KCFLAGS="-Wno-error"
        Elapsed (wall clock) time (h:mm:ss or m:ss): 13:30.31

After:
$ make LLVM=1 -j128 allyesconfig vmlinux -s KCFLAGS="-Wno-error"
        Elapsed (wall clock) time (h:mm:ss or m:ss): 11:43.43

Signed-off-by: Jack Brennen <jbrennen@google.com>
Tested-by: Nick Desaulniers <ndesaulniers@google.com>
---
 scripts/mod/Makefile    |   4 +-
 scripts/mod/modpost.c   |  60 +----------
 scripts/mod/modpost.h   |  25 +++++
 scripts/mod/symsearch.c | 233 ++++++++++++++++++++++++++++++++++++++++
 4 files changed, 265 insertions(+), 57 deletions(-)
 create mode 100644 scripts/mod/symsearch.c

Comments

Nick Desaulniers Sept. 19, 2023, 7:40 p.m. UTC | #1
On Mon, Sep 18, 2023 at 2:06 PM Jack Brennen <jbrennen@google.com> wrote:
>
> Modify modpost to use binary search for converting addresses back
> into symbol references.  Previously it used linear search.
>
> This change saves a few seconds of wall time for defconfig builds,
> but can save several minutes on allyesconfigs.
>
> Before:
> $ make LLVM=1 -j128 allyesconfig vmlinux -s KCFLAGS="-Wno-error"
>         Elapsed (wall clock) time (h:mm:ss or m:ss): 13:30.31
>
> After:
> $ make LLVM=1 -j128 allyesconfig vmlinux -s KCFLAGS="-Wno-error"
>         Elapsed (wall clock) time (h:mm:ss or m:ss): 11:43.43
>
> Signed-off-by: Jack Brennen <jbrennen@google.com>
> Tested-by: Nick Desaulniers <ndesaulniers@google.com>

Jack, if this is your first kernel patch, it's a nice one! And
welcome!  Thanks for your work on this.

Reviewed-by: Nick Desaulniers <ndesaulniers@google.com>

Some ideas for future improvement.

I noticed we make 2 trips through the symbol table checking
is_sym_searchable twice.  We could probably skip one trip and just
malloc the full size of the symbol table, even if this is technically
larger than absolutely needed.  Guessing that might save us repeated
strlen calls on every symbol in the table.

Also, if qsort shows up in any profile, it's pretty well known why
std::sort beats qsort; there's potential for more speed there, but I
didn't profile so IDK.

> ---
>  scripts/mod/Makefile    |   4 +-
>  scripts/mod/modpost.c   |  60 +----------
>  scripts/mod/modpost.h   |  25 +++++
>  scripts/mod/symsearch.c | 233 ++++++++++++++++++++++++++++++++++++++++
>  4 files changed, 265 insertions(+), 57 deletions(-)
>  create mode 100644 scripts/mod/symsearch.c
>
> diff --git a/scripts/mod/Makefile b/scripts/mod/Makefile
> index c9e38ad937fd..3c54125eb373 100644
> --- a/scripts/mod/Makefile
> +++ b/scripts/mod/Makefile
> @@ -5,7 +5,7 @@ CFLAGS_REMOVE_empty.o += $(CC_FLAGS_LTO)
>  hostprogs-always-y     += modpost mk_elfconfig
>  always-y               += empty.o
>
> -modpost-objs   := modpost.o file2alias.o sumversion.o
> +modpost-objs   := modpost.o file2alias.o sumversion.o symsearch.o
>
>  devicetable-offsets-file := devicetable-offsets.h
>
> @@ -16,7 +16,7 @@ targets += $(devicetable-offsets-file) devicetable-offsets.s
>
>  # dependencies on generated files need to be listed explicitly
>
> -$(obj)/modpost.o $(obj)/file2alias.o $(obj)/sumversion.o: $(obj)/elfconfig.h
> +$(obj)/modpost.o $(obj)/file2alias.o $(obj)/sumversion.o $(obj)/symsearch.o: $(obj)/elfconfig.h
>  $(obj)/file2alias.o: $(obj)/$(devicetable-offsets-file)
>
>  quiet_cmd_elfconfig = MKELF   $@
> diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c
> index de499dce5265..975f235aca2c 100644
> --- a/scripts/mod/modpost.c
> +++ b/scripts/mod/modpost.c
> @@ -22,7 +22,6 @@
>  #include <errno.h>
>  #include "modpost.h"
>  #include "../../include/linux/license.h"
> -#include "../../include/linux/module_symbol.h"
>
>  static bool module_enabled;
>  /* Are we using CONFIG_MODVERSIONS? */
> @@ -577,11 +576,14 @@ static int parse_elf(struct elf_info *info, const char *filename)
>                         *p = TO_NATIVE(*p);
>         }
>
> +       symsearch_init(info);
> +
>         return 1;
>  }
>
>  static void parse_elf_finish(struct elf_info *info)
>  {
> +       symsearch_finish(info);
>         release_file(info->hdr, info->size);
>  }
>
> @@ -1039,65 +1041,13 @@ static int secref_whitelist(const char *fromsec, const char *fromsym,
>         return 1;
>  }
>
> -/*
> - * If there's no name there, ignore it; likewise, ignore it if it's
> - * one of the magic symbols emitted used by current tools.
> - *
> - * Otherwise if find_symbols_between() returns those symbols, they'll
> - * fail the whitelist tests and cause lots of false alarms ... fixable
> - * only by merging __exit and __init sections into __text, bloating
> - * the kernel (which is especially evil on embedded platforms).
> - */
> -static inline int is_valid_name(struct elf_info *elf, Elf_Sym *sym)
> -{
> -       const char *name = elf->strtab + sym->st_name;
> -
> -       if (!name || !strlen(name))
> -               return 0;
> -       return !is_mapping_symbol(name);
> -}
> -
>  /* Look up the nearest symbol based on the section and the address */
>  static Elf_Sym *find_nearest_sym(struct elf_info *elf, Elf_Addr addr,
>                                  unsigned int secndx, bool allow_negative,
>                                  Elf_Addr min_distance)
>  {
> -       Elf_Sym *sym;
> -       Elf_Sym *near = NULL;
> -       Elf_Addr sym_addr, distance;
> -       bool is_arm = (elf->hdr->e_machine == EM_ARM);
> -
> -       for (sym = elf->symtab_start; sym < elf->symtab_stop; sym++) {
> -               if (get_secindex(elf, sym) != secndx)
> -                       continue;
> -               if (!is_valid_name(elf, sym))
> -                       continue;
> -
> -               sym_addr = sym->st_value;
> -
> -               /*
> -                * For ARM Thumb instruction, the bit 0 of st_value is set
> -                * if the symbol is STT_FUNC type. Mask it to get the address.
> -                */
> -               if (is_arm && ELF_ST_TYPE(sym->st_info) == STT_FUNC)
> -                        sym_addr &= ~1;
> -
> -               if (addr >= sym_addr)
> -                       distance = addr - sym_addr;
> -               else if (allow_negative)
> -                       distance = sym_addr - addr;
> -               else
> -                       continue;
> -
> -               if (distance <= min_distance) {
> -                       min_distance = distance;
> -                       near = sym;
> -               }
> -
> -               if (min_distance == 0)
> -                       break;
> -       }
> -       return near;
> +       return symsearch_find_nearest(elf, addr, secndx,
> +                                     allow_negative, min_distance);
>  }
>
>  static Elf_Sym *find_fromsym(struct elf_info *elf, Elf_Addr addr,
> diff --git a/scripts/mod/modpost.h b/scripts/mod/modpost.h
> index 5f94c2c9f2d9..6413f26fcb6b 100644
> --- a/scripts/mod/modpost.h
> +++ b/scripts/mod/modpost.h
> @@ -10,6 +10,7 @@
>  #include <fcntl.h>
>  #include <unistd.h>
>  #include <elf.h>
> +#include "../../include/linux/module_symbol.h"
>
>  #include "list.h"
>  #include "elfconfig.h"
> @@ -128,6 +129,8 @@ struct elf_info {
>          * take shndx from symtab_shndx_start[N] instead */
>         Elf32_Word   *symtab_shndx_start;
>         Elf32_Word   *symtab_shndx_stop;
> +
> +       struct symsearch *symsearch;
>  };
>
>  /* Accessor for sym->st_shndx, hides ugliness of "64k sections" */
> @@ -154,6 +157,28 @@ static inline unsigned int get_secindex(const struct elf_info *info,
>         return index;
>  }
>
> +/*
> + * If there's no name there, ignore it; likewise, ignore it if it's
> + * one of the magic symbols emitted used by current tools.
> + *
> + * Internal symbols created by tools should be ignored by modpost.
> + */
> +static inline int is_valid_name(struct elf_info *elf, Elf_Sym *sym)
> +{
> +       const char *name = elf->strtab + sym->st_name;
> +
> +       if (!name || !strlen(name))
> +               return 0;
> +       return !is_mapping_symbol(name);
> +}
> +
> +/* symsearch.c */
> +void symsearch_init(struct elf_info *elf);
> +void symsearch_finish(struct elf_info *elf);
> +Elf_Sym *symsearch_find_nearest(struct elf_info *elf, Elf_Addr addr,
> +                               unsigned int secndx, bool allow_negative,
> +                               Elf_Addr min_distance);
> +
>  /* file2alias.c */
>  void handle_moddevtable(struct module *mod, struct elf_info *info,
>                         Elf_Sym *sym, const char *symname);
> diff --git a/scripts/mod/symsearch.c b/scripts/mod/symsearch.c
> new file mode 100644
> index 000000000000..aab79262512b
> --- /dev/null
> +++ b/scripts/mod/symsearch.c
> @@ -0,0 +1,233 @@
> +// SPDX-License-Identifier: GPL-2.0
> +
> +/* Helper functions for finding the symbol in an ELF which is "nearest"
> + * to a given address.
> + */
> +
> +#include "modpost.h"
> +
> +/* Struct used for binary search. */
> +struct syminfo {
> +       unsigned int symbol_index;
> +       unsigned int section_index;
> +       Elf_Addr addr;
> +};
> +
> +/* Container used to hold an entire binary search table.
> + * Entries in table are ascending, sorted first by section_index,
> + * then by addr, and last by symbol_index.  The sorting by
> + * symbol_index is used to duplicate the quirks of the prior
> + * find_nearest_sym() function, where exact matches to an address
> + * return the first symtab entry seen, but near misses return the
> + * last symtab entry seen.
> + * The first and last entries of the table are sentinels and their
> + * values only matter in two places:  when we sort the table, and
> + * on lookups, the end sentinel should not have an addr field which
> + * matches its immediate predecessor.  To meet these requirements,
> + * we initialize them to (0,0,0) and (max,max,max), and then after
> + * sorting, we tweak the end sentinel's addr field accordingly.
> + */
> +struct symsearch {
> +       size_t table_size;
> +       struct syminfo table[];
> +};
> +
> +static inline bool is_sym_searchable(struct elf_info *elf, Elf_Sym *sym)
> +{
> +       return is_valid_name(elf, sym) != 0;
> +}
> +
> +static int syminfo_compare(const void *s1, const void *s2)
> +{
> +       const struct syminfo *sym1 = s1;
> +       const struct syminfo *sym2 = s2;
> +
> +       if (sym1->section_index > sym2->section_index)
> +               return 1;
> +       if (sym1->section_index < sym2->section_index)
> +               return -1;
> +       if (sym1->addr > sym2->addr)
> +               return 1;
> +       if (sym1->addr < sym2->addr)
> +               return -1;
> +       if (sym1->symbol_index > sym2->symbol_index)
> +               return 1;
> +       if (sym1->symbol_index < sym2->symbol_index)
> +               return -1;
> +       return 0;
> +}
> +
> +static size_t symbol_count(struct elf_info *elf)
> +{
> +       size_t result = 0;
> +
> +       for (Elf_Sym *sym = elf->symtab_start; sym < elf->symtab_stop; sym++) {
> +               if (is_sym_searchable(elf, sym))
> +                       result++;
> +       }
> +       return result;
> +}
> +
> +/* Populate the search array that we just allocated.
> + * Be slightly paranoid here.  If the ELF file changes during processing,
> + * or if the behavior of is_sym_searchable() changes during processing,
> + * we want to catch it; neither of those is acceptable.
> + */
> +static void symsearch_populate(struct elf_info *elf,
> +                              struct syminfo *table,
> +                              size_t table_size)
> +{
> +       bool is_arm = (elf->hdr->e_machine == EM_ARM);
> +
> +       /* Start sentinel */
> +       if (table_size-- == 0)
> +               fatal("%s: size mismatch\n", __func__);
> +       table->symbol_index = 0;
> +       table->section_index = 0;
> +       table->addr = 0;
> +       table++;
> +
> +       for (Elf_Sym *sym = elf->symtab_start; sym < elf->symtab_stop; sym++) {
> +               if (is_sym_searchable(elf, sym)) {
> +                       if (table_size-- == 0)
> +                               fatal("%s: size mismatch\n", __func__);
> +                       table->symbol_index = sym - elf->symtab_start;
> +                       table->section_index = get_secindex(elf, sym);
> +                       table->addr = sym->st_value;
> +
> +                       /*
> +                        * For ARM Thumb instruction, the bit 0 of st_value is
> +                        * set if the symbol is STT_FUNC type. Mask it to get
> +                        * the address.
> +                        */
> +                       if (is_arm && ELF_ST_TYPE(sym->st_info) == STT_FUNC)
> +                               table->addr &= ~1;
> +
> +                       table++;
> +               }
> +       }
> +
> +       /* End sentinel; all values are unsigned so -1 wraps to max */
> +       if (table_size != 1)
> +               fatal("%s: size mismatch\n", __func__);
> +       table->symbol_index = -1;
> +       table->section_index = -1;
> +       table->addr = -1;
> +}
> +
> +void symsearch_init(struct elf_info *elf)
> +{
> +       /* +2 here to allocate space for the start and end sentinels */
> +       size_t table_size = symbol_count(elf) + 2;
> +
> +       elf->symsearch = NOFAIL(malloc(
> +                                       sizeof(struct symsearch) +
> +                                       sizeof(struct syminfo) * table_size));
> +       elf->symsearch->table_size = table_size;
> +
> +       symsearch_populate(elf, elf->symsearch->table, table_size);
> +       qsort(elf->symsearch->table, table_size,
> +             sizeof(struct syminfo), syminfo_compare);
> +
> +       /* A bit of paranoia; make sure that the end sentinel's address is
> +        * different than its predecessor.  Not doing this could cause
> +        * possible undefined behavior if anybody ever inserts a symbol
> +        * with section_index and addr both at their max values.
> +        * Doing this little bit of defensive programming is more efficient
> +        * than checking for array overruns later.
> +        */
> +       elf->symsearch->table[table_size - 1].addr =
> +               elf->symsearch->table[table_size - 2].addr + 1;
> +}
> +
> +void symsearch_finish(struct elf_info *elf)
> +{
> +       free(elf->symsearch);
> +       elf->symsearch = NULL;
> +}
> +
> +/* Find the syminfo which is in secndx and "nearest" to addr.
> + * allow_negative: allow returning a symbol whose address is > addr.
> + * min_distance: ignore symbols which are further away than this.
> + *
> + * Returns a nonzero index into the symsearch table for success.
> + * Returns NULL if no legal symbol is found within the requested range.
> + */
> +static size_t symsearch_find_impl(struct elf_info *elf, Elf_Addr addr,
> +                                 unsigned int secndx, bool allow_negative,
> +                                 Elf_Addr min_distance)
> +{
> +       /* Find the target in the array; it will lie between two elements.
> +        * Invariant here: table[lo] < target <= table[hi]
> +        * For the purposes of search, exact hits in the search array are
> +        * considered greater than the target.  This means that if we do
> +        * get an exact hit, then once the search terminates, table[hi]
> +        * will be the exact match which has the lowest symbol index.
> +        */
> +       struct syminfo *table = elf->symsearch->table;
> +       size_t hi = elf->symsearch->table_size - 1;
> +       size_t lo = 0;
> +       bool hi_is_usable = false;
> +       bool lo_is_usable = false;
> +       Elf_Addr hi_distance = -1;  // max Elf_Addr
> +       Elf_Addr lo_distance = -1;  // max Elf_Addr
> +       Elf_Addr min_distance_lo = min_distance;
> +       Elf_Addr min_distance_hi = allow_negative ? min_distance : 0;
> +
> +       for (;;) {
> +               size_t mid;
> +
> +               mid = lo + (hi - lo) / 2;
> +               if (mid == lo)
> +                       break;
> +               if (secndx > table[mid].section_index) {
> +                       lo = mid;
> +               } else if (secndx < table[mid].section_index) {
> +                       hi = mid;
> +               } else if (addr > table[mid].addr) {
> +                       lo = mid;
> +                       lo_distance = addr - table[mid].addr;
> +                       lo_is_usable = (lo_distance <= min_distance_lo);
> +               } else {
> +                       hi = mid;
> +                       hi_distance = table[mid].addr - addr;
> +                       hi_is_usable = (hi_distance <= min_distance_hi);
> +               }
> +       }
> +
> +       if (hi_is_usable && lo_is_usable) {
> +               lo_is_usable = (lo_distance <= hi_distance);
> +               hi_is_usable = (hi_distance <= lo_distance);
> +       }
> +
> +       if (!hi_is_usable)
> +               return lo_is_usable ? lo : 0;
> +
> +       if (hi_distance == 0)
> +               return hi;
> +
> +       /* Match quirks of existing behavior.  Advance hi to the last
> +        * matching entry in the search table.  We don't need to worry
> +        * about running off the end of the array due to the sentinel.
> +        */
> +       while (table[hi+1].addr == table[hi].addr &&
> +              table[hi+1].section_index == table[hi].section_index) {
> +               hi++;
> +       }
> +
> +       return (lo_is_usable &&
> +               table[lo].symbol_index > table[hi].symbol_index) ? lo : hi;
> +}
> +
> +Elf_Sym *symsearch_find_nearest(struct elf_info *elf, Elf_Addr addr,
> +                               unsigned int secndx, bool allow_negative,
> +                               Elf_Addr min_distance)
> +{
> +       size_t result = symsearch_find_impl(elf, addr, secndx,
> +                                           allow_negative, min_distance);
> +
> +       if (result == 0)
> +               return NULL;
> +
> +       return &elf->symtab_start[elf->symsearch->table[result].symbol_index];
> +}
> --
> 2.42.0.459.ge4e396fd5e-goog
>
Clément Léger Sept. 21, 2023, 12:07 p.m. UTC | #2
Hi Jack,

On RISC-V builds, we noticed a recent slow down after commit
ddb5cdbafaaa ("kbuild: generate KSYMTAB entries by modpost") was
introduced. We tracked it down to find_nearest_sym() being called a lot
and more specifically since we have a lot of local symbols that are
generated as part of PCREL accesses (even more when building in debug
mode, measured a count of 12964362 symbols in one vmlinux.o).

Without your changes, a typical riscv defconfig build + debug, modpost
took the following amount of time:

$ time scripts/mod/modpost -M -o Module.symvers -T modules.order vmlinux.o
real    4m21,976s
user    4m21,803s
sys     0m0,100s

With your changes:

$ time scripts/mod/modpost -M -o Module.symvers -T modules.order vmlinux.o
real    0m1,077s
user    0m0,980s
sys     0m0,095s

I guess you could further optimize it by allocating a binary tree for
each section since find_nearest_sym() searches for symbols in a specific
section, that would save a few comparisons. Not sure it will be way
faster nor simpler to implement though.

FWIW: Tested-by: Clément Léger <cleger@rivosinc.com>

Thanks,

Clément

On 18/09/2023 23:06, Jack Brennen wrote:
> Modify modpost to use binary search for converting addresses back
> into symbol references.  Previously it used linear search.
> 
> This change saves a few seconds of wall time for defconfig builds,
> but can save several minutes on allyesconfigs.
> 
> Before:
> $ make LLVM=1 -j128 allyesconfig vmlinux -s KCFLAGS="-Wno-error"
>         Elapsed (wall clock) time (h:mm:ss or m:ss): 13:30.31
> 
> After:
> $ make LLVM=1 -j128 allyesconfig vmlinux -s KCFLAGS="-Wno-error"
>         Elapsed (wall clock) time (h:mm:ss or m:ss): 11:43.43
> 
> Signed-off-by: Jack Brennen <jbrennen@google.com>
> Tested-by: Nick Desaulniers <ndesaulniers@google.com>
> ---
>  scripts/mod/Makefile    |   4 +-
>  scripts/mod/modpost.c   |  60 +----------
>  scripts/mod/modpost.h   |  25 +++++
>  scripts/mod/symsearch.c | 233 ++++++++++++++++++++++++++++++++++++++++
>  4 files changed, 265 insertions(+), 57 deletions(-)
>  create mode 100644 scripts/mod/symsearch.c
> 
> diff --git a/scripts/mod/Makefile b/scripts/mod/Makefile
> index c9e38ad937fd..3c54125eb373 100644
> --- a/scripts/mod/Makefile
> +++ b/scripts/mod/Makefile
> @@ -5,7 +5,7 @@ CFLAGS_REMOVE_empty.o += $(CC_FLAGS_LTO)
>  hostprogs-always-y	+= modpost mk_elfconfig
>  always-y		+= empty.o
>  
> -modpost-objs	:= modpost.o file2alias.o sumversion.o
> +modpost-objs	:= modpost.o file2alias.o sumversion.o symsearch.o
>  
>  devicetable-offsets-file := devicetable-offsets.h
>  
> @@ -16,7 +16,7 @@ targets += $(devicetable-offsets-file) devicetable-offsets.s
>  
>  # dependencies on generated files need to be listed explicitly
>  
> -$(obj)/modpost.o $(obj)/file2alias.o $(obj)/sumversion.o: $(obj)/elfconfig.h
> +$(obj)/modpost.o $(obj)/file2alias.o $(obj)/sumversion.o $(obj)/symsearch.o: $(obj)/elfconfig.h
>  $(obj)/file2alias.o: $(obj)/$(devicetable-offsets-file)
>  
>  quiet_cmd_elfconfig = MKELF   $@
> diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c
> index de499dce5265..975f235aca2c 100644
> --- a/scripts/mod/modpost.c
> +++ b/scripts/mod/modpost.c
> @@ -22,7 +22,6 @@
>  #include <errno.h>
>  #include "modpost.h"
>  #include "../../include/linux/license.h"
> -#include "../../include/linux/module_symbol.h"
>  
>  static bool module_enabled;
>  /* Are we using CONFIG_MODVERSIONS? */
> @@ -577,11 +576,14 @@ static int parse_elf(struct elf_info *info, const char *filename)
>  			*p = TO_NATIVE(*p);
>  	}
>  
> +	symsearch_init(info);
> +
>  	return 1;
>  }
>  
>  static void parse_elf_finish(struct elf_info *info)
>  {
> +	symsearch_finish(info);
>  	release_file(info->hdr, info->size);
>  }
>  
> @@ -1039,65 +1041,13 @@ static int secref_whitelist(const char *fromsec, const char *fromsym,
>  	return 1;
>  }
>  
> -/*
> - * If there's no name there, ignore it; likewise, ignore it if it's
> - * one of the magic symbols emitted used by current tools.
> - *
> - * Otherwise if find_symbols_between() returns those symbols, they'll
> - * fail the whitelist tests and cause lots of false alarms ... fixable
> - * only by merging __exit and __init sections into __text, bloating
> - * the kernel (which is especially evil on embedded platforms).
> - */
> -static inline int is_valid_name(struct elf_info *elf, Elf_Sym *sym)
> -{
> -	const char *name = elf->strtab + sym->st_name;
> -
> -	if (!name || !strlen(name))
> -		return 0;
> -	return !is_mapping_symbol(name);
> -}
> -
>  /* Look up the nearest symbol based on the section and the address */
>  static Elf_Sym *find_nearest_sym(struct elf_info *elf, Elf_Addr addr,
>  				 unsigned int secndx, bool allow_negative,
>  				 Elf_Addr min_distance)
>  {
> -	Elf_Sym *sym;
> -	Elf_Sym *near = NULL;
> -	Elf_Addr sym_addr, distance;
> -	bool is_arm = (elf->hdr->e_machine == EM_ARM);
> -
> -	for (sym = elf->symtab_start; sym < elf->symtab_stop; sym++) {
> -		if (get_secindex(elf, sym) != secndx)
> -			continue;
> -		if (!is_valid_name(elf, sym))
> -			continue;
> -
> -		sym_addr = sym->st_value;
> -
> -		/*
> -		 * For ARM Thumb instruction, the bit 0 of st_value is set
> -		 * if the symbol is STT_FUNC type. Mask it to get the address.
> -		 */
> -		if (is_arm && ELF_ST_TYPE(sym->st_info) == STT_FUNC)
> -			 sym_addr &= ~1;
> -
> -		if (addr >= sym_addr)
> -			distance = addr - sym_addr;
> -		else if (allow_negative)
> -			distance = sym_addr - addr;
> -		else
> -			continue;
> -
> -		if (distance <= min_distance) {
> -			min_distance = distance;
> -			near = sym;
> -		}
> -
> -		if (min_distance == 0)
> -			break;
> -	}
> -	return near;
> +	return symsearch_find_nearest(elf, addr, secndx,
> +				      allow_negative, min_distance);
>  }
>  
>  static Elf_Sym *find_fromsym(struct elf_info *elf, Elf_Addr addr,
> diff --git a/scripts/mod/modpost.h b/scripts/mod/modpost.h
> index 5f94c2c9f2d9..6413f26fcb6b 100644
> --- a/scripts/mod/modpost.h
> +++ b/scripts/mod/modpost.h
> @@ -10,6 +10,7 @@
>  #include <fcntl.h>
>  #include <unistd.h>
>  #include <elf.h>
> +#include "../../include/linux/module_symbol.h"
>  
>  #include "list.h"
>  #include "elfconfig.h"
> @@ -128,6 +129,8 @@ struct elf_info {
>  	 * take shndx from symtab_shndx_start[N] instead */
>  	Elf32_Word   *symtab_shndx_start;
>  	Elf32_Word   *symtab_shndx_stop;
> +
> +	struct symsearch *symsearch;
>  };
>  
>  /* Accessor for sym->st_shndx, hides ugliness of "64k sections" */
> @@ -154,6 +157,28 @@ static inline unsigned int get_secindex(const struct elf_info *info,
>  	return index;
>  }
>  
> +/*
> + * If there's no name there, ignore it; likewise, ignore it if it's
> + * one of the magic symbols emitted used by current tools.
> + *
> + * Internal symbols created by tools should be ignored by modpost.
> + */
> +static inline int is_valid_name(struct elf_info *elf, Elf_Sym *sym)
> +{
> +	const char *name = elf->strtab + sym->st_name;
> +
> +	if (!name || !strlen(name))
> +		return 0;
> +	return !is_mapping_symbol(name);
> +}
> +
> +/* symsearch.c */
> +void symsearch_init(struct elf_info *elf);
> +void symsearch_finish(struct elf_info *elf);
> +Elf_Sym *symsearch_find_nearest(struct elf_info *elf, Elf_Addr addr,
> +				unsigned int secndx, bool allow_negative,
> +				Elf_Addr min_distance);
> +
>  /* file2alias.c */
>  void handle_moddevtable(struct module *mod, struct elf_info *info,
>  			Elf_Sym *sym, const char *symname);
> diff --git a/scripts/mod/symsearch.c b/scripts/mod/symsearch.c
> new file mode 100644
> index 000000000000..aab79262512b
> --- /dev/null
> +++ b/scripts/mod/symsearch.c
> @@ -0,0 +1,233 @@
> +// SPDX-License-Identifier: GPL-2.0
> +
> +/* Helper functions for finding the symbol in an ELF which is "nearest"
> + * to a given address.
> + */
> +
> +#include "modpost.h"
> +
> +/* Struct used for binary search. */
> +struct syminfo {
> +	unsigned int symbol_index;
> +	unsigned int section_index;
> +	Elf_Addr addr;
> +};
> +
> +/* Container used to hold an entire binary search table.
> + * Entries in table are ascending, sorted first by section_index,
> + * then by addr, and last by symbol_index.  The sorting by
> + * symbol_index is used to duplicate the quirks of the prior
> + * find_nearest_sym() function, where exact matches to an address
> + * return the first symtab entry seen, but near misses return the
> + * last symtab entry seen.
> + * The first and last entries of the table are sentinels and their
> + * values only matter in two places:  when we sort the table, and
> + * on lookups, the end sentinel should not have an addr field which
> + * matches its immediate predecessor.  To meet these requirements,
> + * we initialize them to (0,0,0) and (max,max,max), and then after
> + * sorting, we tweak the end sentinel's addr field accordingly.
> + */
> +struct symsearch {
> +	size_t table_size;
> +	struct syminfo table[];
> +};
> +
> +static inline bool is_sym_searchable(struct elf_info *elf, Elf_Sym *sym)
> +{
> +	return is_valid_name(elf, sym) != 0;
> +}
> +
> +static int syminfo_compare(const void *s1, const void *s2)
> +{
> +	const struct syminfo *sym1 = s1;
> +	const struct syminfo *sym2 = s2;
> +
> +	if (sym1->section_index > sym2->section_index)
> +		return 1;
> +	if (sym1->section_index < sym2->section_index)
> +		return -1;
> +	if (sym1->addr > sym2->addr)
> +		return 1;
> +	if (sym1->addr < sym2->addr)
> +		return -1;
> +	if (sym1->symbol_index > sym2->symbol_index)
> +		return 1;
> +	if (sym1->symbol_index < sym2->symbol_index)
> +		return -1;
> +	return 0;
> +}
> +
> +static size_t symbol_count(struct elf_info *elf)
> +{
> +	size_t result = 0;
> +
> +	for (Elf_Sym *sym = elf->symtab_start; sym < elf->symtab_stop; sym++) {
> +		if (is_sym_searchable(elf, sym))
> +			result++;
> +	}
> +	return result;
> +}
> +
> +/* Populate the search array that we just allocated.
> + * Be slightly paranoid here.  If the ELF file changes during processing,
> + * or if the behavior of is_sym_searchable() changes during processing,
> + * we want to catch it; neither of those is acceptable.
> + */
> +static void symsearch_populate(struct elf_info *elf,
> +			       struct syminfo *table,
> +			       size_t table_size)
> +{
> +	bool is_arm = (elf->hdr->e_machine == EM_ARM);
> +
> +	/* Start sentinel */
> +	if (table_size-- == 0)
> +		fatal("%s: size mismatch\n", __func__);
> +	table->symbol_index = 0;
> +	table->section_index = 0;
> +	table->addr = 0;
> +	table++;
> +
> +	for (Elf_Sym *sym = elf->symtab_start; sym < elf->symtab_stop; sym++) {
> +		if (is_sym_searchable(elf, sym)) {
> +			if (table_size-- == 0)
> +				fatal("%s: size mismatch\n", __func__);
> +			table->symbol_index = sym - elf->symtab_start;
> +			table->section_index = get_secindex(elf, sym);
> +			table->addr = sym->st_value;
> +
> +			/*
> +			 * For ARM Thumb instruction, the bit 0 of st_value is
> +			 * set if the symbol is STT_FUNC type. Mask it to get
> +			 * the address.
> +			 */
> +			if (is_arm && ELF_ST_TYPE(sym->st_info) == STT_FUNC)
> +				table->addr &= ~1;
> +
> +			table++;
> +		}
> +	}
> +
> +	/* End sentinel; all values are unsigned so -1 wraps to max */
> +	if (table_size != 1)
> +		fatal("%s: size mismatch\n", __func__);
> +	table->symbol_index = -1;
> +	table->section_index = -1;
> +	table->addr = -1;
> +}
> +
> +void symsearch_init(struct elf_info *elf)
> +{
> +	/* +2 here to allocate space for the start and end sentinels */
> +	size_t table_size = symbol_count(elf) + 2;
> +
> +	elf->symsearch = NOFAIL(malloc(
> +					sizeof(struct symsearch) +
> +					sizeof(struct syminfo) * table_size));
> +	elf->symsearch->table_size = table_size;
> +
> +	symsearch_populate(elf, elf->symsearch->table, table_size);
> +	qsort(elf->symsearch->table, table_size,
> +	      sizeof(struct syminfo), syminfo_compare);
> +
> +	/* A bit of paranoia; make sure that the end sentinel's address is
> +	 * different than its predecessor.  Not doing this could cause
> +	 * possible undefined behavior if anybody ever inserts a symbol
> +	 * with section_index and addr both at their max values.
> +	 * Doing this little bit of defensive programming is more efficient
> +	 * than checking for array overruns later.
> +	 */
> +	elf->symsearch->table[table_size - 1].addr =
> +		elf->symsearch->table[table_size - 2].addr + 1;
> +}
> +
> +void symsearch_finish(struct elf_info *elf)
> +{
> +	free(elf->symsearch);
> +	elf->symsearch = NULL;
> +}
> +
> +/* Find the syminfo which is in secndx and "nearest" to addr.
> + * allow_negative: allow returning a symbol whose address is > addr.
> + * min_distance: ignore symbols which are further away than this.
> + *
> + * Returns a nonzero index into the symsearch table for success.
> + * Returns NULL if no legal symbol is found within the requested range.
> + */
> +static size_t symsearch_find_impl(struct elf_info *elf, Elf_Addr addr,
> +				  unsigned int secndx, bool allow_negative,
> +				  Elf_Addr min_distance)
> +{
> +	/* Find the target in the array; it will lie between two elements.
> +	 * Invariant here: table[lo] < target <= table[hi]
> +	 * For the purposes of search, exact hits in the search array are
> +	 * considered greater than the target.	This means that if we do
> +	 * get an exact hit, then once the search terminates, table[hi]
> +	 * will be the exact match which has the lowest symbol index.
> +	 */
> +	struct syminfo *table = elf->symsearch->table;
> +	size_t hi = elf->symsearch->table_size - 1;
> +	size_t lo = 0;
> +	bool hi_is_usable = false;
> +	bool lo_is_usable = false;
> +	Elf_Addr hi_distance = -1;  // max Elf_Addr
> +	Elf_Addr lo_distance = -1;  // max Elf_Addr
> +	Elf_Addr min_distance_lo = min_distance;
> +	Elf_Addr min_distance_hi = allow_negative ? min_distance : 0;
> +
> +	for (;;) {
> +		size_t mid;
> +
> +		mid = lo + (hi - lo) / 2;
> +		if (mid == lo)
> +			break;
> +		if (secndx > table[mid].section_index) {
> +			lo = mid;
> +		} else if (secndx < table[mid].section_index) {
> +			hi = mid;
> +		} else if (addr > table[mid].addr) {
> +			lo = mid;
> +			lo_distance = addr - table[mid].addr;
> +			lo_is_usable = (lo_distance <= min_distance_lo);
> +		} else {
> +			hi = mid;
> +			hi_distance = table[mid].addr - addr;
> +			hi_is_usable = (hi_distance <= min_distance_hi);
> +		}
> +	}
> +
> +	if (hi_is_usable && lo_is_usable) {
> +		lo_is_usable = (lo_distance <= hi_distance);
> +		hi_is_usable = (hi_distance <= lo_distance);
> +	}
> +
> +	if (!hi_is_usable)
> +		return lo_is_usable ? lo : 0;
> +
> +	if (hi_distance == 0)
> +		return hi;
> +
> +	/* Match quirks of existing behavior.  Advance hi to the last
> +	 * matching entry in the search table.	We don't need to worry
> +	 * about running off the end of the array due to the sentinel.
> +	 */
> +	while (table[hi+1].addr == table[hi].addr &&
> +	       table[hi+1].section_index == table[hi].section_index) {
> +		hi++;
> +	}
> +
> +	return (lo_is_usable &&
> +		table[lo].symbol_index > table[hi].symbol_index) ? lo : hi;
> +}
> +
> +Elf_Sym *symsearch_find_nearest(struct elf_info *elf, Elf_Addr addr,
> +				unsigned int secndx, bool allow_negative,
> +				Elf_Addr min_distance)
> +{
> +	size_t result = symsearch_find_impl(elf, addr, secndx,
> +					    allow_negative, min_distance);
> +
> +	if (result == 0)
> +		return NULL;
> +
> +	return &elf->symtab_start[elf->symsearch->table[result].symbol_index];
> +}
Masahiro Yamada Sept. 23, 2023, 8:49 a.m. UTC | #3
On Tue, Sep 19, 2023 at 6:06 AM Jack Brennen <jbrennen@google.com> wrote:
>
> Modify modpost to use binary search for converting addresses back
> into symbol references.  Previously it used linear search.
>
> This change saves a few seconds of wall time for defconfig builds,
> but can save several minutes on allyesconfigs.

Thanks.
Binary search is a good idea.


> Before:
> $ make LLVM=1 -j128 allyesconfig vmlinux -s KCFLAGS="-Wno-error"
>         Elapsed (wall clock) time (h:mm:ss or m:ss): 13:30.31

Instead of the time for the entire build,
can you put the time for the modpost command?

If you allyesconfig case,

 $ time scripts/mod/modpost -M -m -a -N -o vmlinux.symvers vmlinux.o





> diff --git a/scripts/mod/symsearch.c b/scripts/mod/symsearch.c
> new file mode 100644
> index 000000000000..aab79262512b
> --- /dev/null
> +++ b/scripts/mod/symsearch.c
> @@ -0,0 +1,233 @@
> +// SPDX-License-Identifier: GPL-2.0
> +
> +/* Helper functions for finding the symbol in an ELF which is "nearest"
> + * to a given address.
> + */
>

Can you use the following block comment style?

/*
 * Helper functions for finding the symbol in an ELF which is "nearest"
 * to a given address.
 */



> +#include "modpost.h"
> +
> +/* Struct used for binary search. */

I think this obvious comment is unneeded.



> +struct syminfo {
> +       unsigned int symbol_index;
> +       unsigned int section_index;
> +       Elf_Addr addr;
> +};
> +
> +/* Container used to hold an entire binary search table.
> + * Entries in table are ascending, sorted first by section_index,
> + * then by addr, and last by symbol_index.  The sorting by
> + * symbol_index is used to duplicate the quirks of the prior
> + * find_nearest_sym() function, where exact matches to an address
> + * return the first symtab entry seen, but near misses return the
> + * last symtab entry seen.

Preserving this quirk makes the code complicated.

I do not mind changing the behavior of the corner case.





> + * The first and last entries of the table are sentinels and their
> + * values only matter in two places:  when we sort the table, and
> + * on lookups, the end sentinel should not have an addr field which
> + * matches its immediate predecessor.  To meet these requirements,
> + * we initialize them to (0,0,0) and (max,max,max), and then after
> + * sorting, we tweak the end sentinel's addr field accordingly.
> + */
> +struct symsearch {
> +       size_t table_size;
> +       struct syminfo table[];
> +};



syminfo::symbol_index is unsigned int.
symsearch::table_size is size_t.


symbol_index of the last element is always larger than
elf->symsearch->table_size.

So, the code works only within 32-bit width anyway.












> +
> +static inline bool is_sym_searchable(struct elf_info *elf, Elf_Sym *sym)
> +{
> +       return is_valid_name(elf, sym) != 0;
> +}

If you call is_valid_name() directly, this function was unneeded?






> +
> +static int syminfo_compare(const void *s1, const void *s2)
> +{
> +       const struct syminfo *sym1 = s1;
> +       const struct syminfo *sym2 = s2;
> +
> +       if (sym1->section_index > sym2->section_index)
> +               return 1;
> +       if (sym1->section_index < sym2->section_index)
> +               return -1;
> +       if (sym1->addr > sym2->addr)
> +               return 1;
> +       if (sym1->addr < sym2->addr)
> +               return -1;
> +       if (sym1->symbol_index > sym2->symbol_index)
> +               return 1;
> +       if (sym1->symbol_index < sym2->symbol_index)
> +               return -1;
> +       return 0;
> +}
> +
> +static size_t symbol_count(struct elf_info *elf)
> +{
> +       size_t result = 0;
> +
> +       for (Elf_Sym *sym = elf->symtab_start; sym < elf->symtab_stop; sym++) {
> +               if (is_sym_searchable(elf, sym))
> +                       result++;
> +       }
> +       return result;
> +}
> +
> +/* Populate the search array that we just allocated.
> + * Be slightly paranoid here.  If the ELF file changes during processing,

I could not understand. In which case, the ELF file changes?

modpost loads the entire file to memory first..

In which scenario, the memory content changes?






> + * or if the behavior of is_sym_searchable() changes during processing,
> + * we want to catch it; neither of those is acceptable.
> + */
> +static void symsearch_populate(struct elf_info *elf,
> +                              struct syminfo *table,
> +                              size_t table_size)
> +{
> +       bool is_arm = (elf->hdr->e_machine == EM_ARM);
> +
> +       /* Start sentinel */
> +       if (table_size-- == 0)
> +               fatal("%s: size mismatch\n", __func__);
> +       table->symbol_index = 0;
> +       table->section_index = 0;
> +       table->addr = 0;
> +       table++;
> +
> +       for (Elf_Sym *sym = elf->symtab_start; sym < elf->symtab_stop; sym++) {
> +               if (is_sym_searchable(elf, sym)) {
> +                       if (table_size-- == 0)
> +                               fatal("%s: size mismatch\n", __func__);
> +                       table->symbol_index = sym - elf->symtab_start;
> +                       table->section_index = get_secindex(elf, sym);
> +                       table->addr = sym->st_value;
> +
> +                       /*
> +                        * For ARM Thumb instruction, the bit 0 of st_value is
> +                        * set if the symbol is STT_FUNC type. Mask it to get
> +                        * the address.
> +                        */
> +                       if (is_arm && ELF_ST_TYPE(sym->st_info) == STT_FUNC)
> +                               table->addr &= ~1;
> +
> +                       table++;
> +               }
> +       }
> +
> +       /* End sentinel; all values are unsigned so -1 wraps to max */
> +       if (table_size != 1)
> +               fatal("%s: size mismatch\n", __func__);
> +       table->symbol_index = -1;
> +       table->section_index = -1;
> +       table->addr = -1;
> +}
> +
> +void symsearch_init(struct elf_info *elf)
> +{
> +       /* +2 here to allocate space for the start and end sentinels */
> +       size_t table_size = symbol_count(elf) + 2;
> +
> +       elf->symsearch = NOFAIL(malloc(
> +                                       sizeof(struct symsearch) +
> +                                       sizeof(struct syminfo) * table_size));
> +       elf->symsearch->table_size = table_size;
> +
> +       symsearch_populate(elf, elf->symsearch->table, table_size);
> +       qsort(elf->symsearch->table, table_size,
> +             sizeof(struct syminfo), syminfo_compare);
> +
> +       /* A bit of paranoia; make sure that the end sentinel's address is
> +        * different than its predecessor.  Not doing this could cause
> +        * possible undefined behavior if anybody ever inserts a symbol
> +        * with section_index and addr both at their max values.

I could not understand this comment.

If section_index and addr both at their max values at [table_size - 2],
->table[table_size - 2].addr + 1 wraps to zero.

The table is not sorted any longer?




> +        * Doing this little bit of defensive programming is more efficient
> +        * than checking for array overruns later.
> +        */
> +       elf->symsearch->table[table_size - 1].addr =
> +               elf->symsearch->table[table_size - 2].addr + 1;
> +}
> +
> +void symsearch_finish(struct elf_info *elf)
> +{
> +       free(elf->symsearch);
> +       elf->symsearch = NULL;
> +}
> +
> +/* Find the syminfo which is in secndx and "nearest" to addr.
> + * allow_negative: allow returning a symbol whose address is > addr.
> + * min_distance: ignore symbols which are further away than this.
> + *
> + * Returns a nonzero index into the symsearch table for success.
> + * Returns NULL if no legal symbol is found within the requested range.
> + */
> +static size_t symsearch_find_impl(struct elf_info *elf, Elf_Addr addr,
> +                                 unsigned int secndx, bool allow_negative,
> +                                 Elf_Addr min_distance)
> +{
> +       /* Find the target in the array; it will lie between two elements.
> +        * Invariant here: table[lo] < target <= table[hi]
> +        * For the purposes of search, exact hits in the search array are
> +        * considered greater than the target.  This means that if we do
> +        * get an exact hit, then once the search terminates, table[hi]
> +        * will be the exact match which has the lowest symbol index.
> +        */
> +       struct syminfo *table = elf->symsearch->table;
> +       size_t hi = elf->symsearch->table_size - 1;
> +       size_t lo = 0;




The binary search code was implemented in a too complex way
to preserve the previous quirks.


I want to use the same comparison function for
qsort() and bsearch() to avoid paranoia.




How about this implementation?



static struct syminfo *symsearch_find_impl(struct elf_info *elf, Elf_Addr addr,
                                           unsigned int secndx, bool
allow_negative,
                                           Elf_Addr min_distance)
{
        struct syminfo target = { .symbol_index = -1, .section_index =
secndx, .addr = addr };
        struct syminfo *table = elf->symsearch->table;
        unsigned int hi = elf->symsearch->table_size - 1;
        unsigned int lo = 0;
        struct syminfo *result = NULL;
        Elf_Addr distance;

        while (lo < hi) {
                unsigned int mid = (lo + hi + 1) / 2;

                if (syminfo_compare(&table[mid], &target) > 0)
                        hi = mid - 1;
                else
                        lo = mid;
        }

        /*
         * The target resides between lo and (lo + 1).
         * If allow_negative is true, check both of them.
         */

        if (allow_negative && lo + 1 < elf->symsearch->table_size &&
            table[lo + 1].section_index == secndx) {
                distance = table[lo + 1].addr - addr;
                if (distance <= min_distance) {
                        min_distance = distance;
                        result = &table[lo + 1];
                }
        }

        if (table[lo].section_index == secndx) {
                distance = addr - table[lo].addr;
                if (distance <= min_distance)
                  result = &table[lo];
        }

        return result;
}

Elf_Sym *symsearch_find_nearest(struct elf_info *elf, Elf_Addr addr,
                                unsigned int secndx, bool allow_negative,
                                Elf_Addr min_distance)
{
        struct syminfo *result;

        result = symsearch_find_impl(elf, addr, secndx,
                                     allow_negative, min_distance);
        if (!result)
                return NULL;

        return &elf->symtab_start[result->symbol_index];
}



This does not preserve the previous quirks.

If there are multiple entries with the same address,
it always returns the last element.

I did not expect sentinels.

I did not do thorough tests, but it seems to be working for me.




Also, please call symsearch_find_nearest() directly
and remove symfind_nearest_sym().






--
Best Regards

Masahiro Yamada
Jack Brennen Sept. 23, 2023, 5:21 p.m. UTC | #4
Mainly just clarifying that we're willing to change the behavior in corner
cases? The existing behavior has one set of quirks about which symtab entry
is returned, and your proposed behavior has another different set of quirks.

It's probably OK to break builds which have an undocumented assumption
about the order of symtab entries; personally, I'd rather not risk that myself,
but if somebody with more experience is willing to back that decision, I'm
OK with it.

Also, there's an alternative approach that uses bsearch from the standard
library and a common comparison function between qsort and bsearch. I
considered this alternative earlier; maybe you would prefer it because it
eliminates having to reimplement a binary search algorithm.
I chose not to do it this way because of trying to duplicate the quirks.
If no duplication of the quirks is needed, this becomes easier.

The idea for that is to build a sorted array of syminfo that look like this:

(section_index, addr_lo, addr_hi, sym_lo, sym_hi)

What this represents is the situation where for any lookup in the
range from (section_index, addr_lo) to (section_index, addr_hi)
inclusive, the nearest symbol will be either sym_lo or sym_hi.
There are four different meanings for (sym_lo, sym_hi):

(sym_lo = 0)
This is a placeholder for a duplicated address, and it cannot
compare equal to anything. After we sort the array, we set all
of the duplicated addresses except for the last one to sym_lo = 0.

(sym_lo = MAX_SYM)
This is used to designate an address being looked up. When this
is seen, it compares equal to any other syminfo that has an
overlapping range.

(sym_lo != 0, sym_hi = 0)
This represents the last range in a section. There's no following
address that could match. Should also have addr_hi = MAX.

(sym_lo != 0, sym_hi != 0)
This represents a range in a section that's not the last range.
sym_hi may be usable to satisfy the lookup, but only if it's
closer than sym_lo and if allow_negative is true. Note that
the address of sym_hi will be addr_hi+1, so we don't need any
additional code to fetch that address.

Here's a sample comparison function:
int syminfo_compare(const void *a, const void *b) {
  const struct syminfo *sym1 = a;
  const struct syminfo *sym2 = b;

  if (sym1->section_index > sym2->section_index)
    return 1;
  if (sym1->section_index < sym2->section_index)
    return -1;
  if ((sym1->sym_lo == MAX_SYM && sym2->sym_lo != 0) ||
      (sym2->sym_lo == MAX_SYM && sym1->sym_lo != 0)) {
    /* Overlap is equality - test for it */
    if (sym1->addr_hi >= sym2->addr_lo &&
        sym2->addr_hi >= sym1->addr_lo) {
      return 0;
    }
    /* No overlap, fall through */
  }
  if (sym1->addr_lo > sym2->addr_lo)
    return 1;
  if (sym1->addr_lo < sym2->addr_lo)
    return -1;
  /* Note that if we are comparing a lookup (MAX_SYM) with
     a placeholder (0), the lookup always compares greater.
     This causes us to search to the "right" of the placeholder
     for a match, which is what we want. */
  if (sym1->sym_lo > sym2->sym_lo)
    return 1;
  if (sym1->sym_lo < sym2->sym_lo)
    return -1;
  return 0;
}

So this greatly simplifies the back-end searching. It's a bsearch()
which gives you either a miss, or one or two alternatives for the result.
On the front end, you have an extra step after sorting which massages the
search array into the right configuration.  There's actually not much code
needed to do that.

Is that of interest?  The leveraging of bsearch() in that way?

A few other responses below...

On Sat, Sep 23, 2023 at 4:50 AM Masahiro Yamada <masahiroy@kernel.org> wrote:
> > +/* Populate the search array that we just allocated.
> > + * Be slightly paranoid here.  If the ELF file changes during processing,
>
> I could not understand. In which case, the ELF file changes?
>
> modpost loads the entire file to memory first..
>
> In which scenario, the memory content changes?
>

modpost doesn't load the entire file, it uses mmap to map it into the
address space.The easiest way to imagine this being an issue is that some
buggy parallelization happens and something is modifying vmlinux.o while
modpost is processing it.  Of course it's probably acceptable to say,
"Don't do that!"

There are two alternatives here: actually read in the entire file, which
is certainly suboptimal, or just live with the fact that mmap makes no
guarantees about whether changes in the file are reflected in the memory map.

> > +       /* A bit of paranoia; make sure that the end sentinel's address is
> > +        * different than its predecessor.  Not doing this could cause
> > +        * possible undefined behavior if anybody ever inserts a symbol
> > +        * with section_index and addr both at their max values.
>
> I could not understand this comment.
>
> If section_index and addr both at their max values at [table_size - 2],
> ->table[table_size - 2].addr + 1 wraps to zero.
>
> The table is not sorted any longer?
>

That's correct, the table would not be sorted any longer. But by design,
we never do a relational comparison against the end sentinel. But if
we rework the search, either by your suggestion or by using bsearch(),
this sentinel goes away.
Fangrui Song Sept. 24, 2023, 10:20 p.m. UTC | #5
On 2023-09-23, Masahiro Yamada wrote:
>On Tue, Sep 19, 2023 at 6:06 AM Jack Brennen <jbrennen@google.com> wrote:
>>
>> Modify modpost to use binary search for converting addresses back
>> into symbol references.  Previously it used linear search.
>>
>> This change saves a few seconds of wall time for defconfig builds,
>> but can save several minutes on allyesconfigs.
>
>Thanks.
>Binary search is a good idea.
>
>
>> Before:
>> $ make LLVM=1 -j128 allyesconfig vmlinux -s KCFLAGS="-Wno-error"
>>         Elapsed (wall clock) time (h:mm:ss or m:ss): 13:30.31
>
>Instead of the time for the entire build,
>can you put the time for the modpost command?
>
>If you allyesconfig case,
>
> $ time scripts/mod/modpost -M -m -a -N -o vmlinux.symvers vmlinux.o
>
>
>
>
>
>> diff --git a/scripts/mod/symsearch.c b/scripts/mod/symsearch.c
>> new file mode 100644
>> index 000000000000..aab79262512b
>> --- /dev/null
>> +++ b/scripts/mod/symsearch.c
>> @@ -0,0 +1,233 @@
>> +// SPDX-License-Identifier: GPL-2.0
>> +
>> +/* Helper functions for finding the symbol in an ELF which is "nearest"
>> + * to a given address.
>> + */
>>
>
>Can you use the following block comment style?
>
>/*
> * Helper functions for finding the symbol in an ELF which is "nearest"
> * to a given address.
> */
>
>
>
>> +#include "modpost.h"
>> +
>> +/* Struct used for binary search. */
>
>I think this obvious comment is unneeded.
>
>
>
>> +struct syminfo {
>> +       unsigned int symbol_index;
>> +       unsigned int section_index;
>> +       Elf_Addr addr;
>> +};
>> +
>> +/* Container used to hold an entire binary search table.
>> + * Entries in table are ascending, sorted first by section_index,
>> + * then by addr, and last by symbol_index.  The sorting by
>> + * symbol_index is used to duplicate the quirks of the prior
>> + * find_nearest_sym() function, where exact matches to an address
>> + * return the first symtab entry seen, but near misses return the
>> + * last symtab entry seen.
>
>Preserving this quirk makes the code complicated.
>
>I do not mind changing the behavior of the corner case.
>
>
>
>
>
>> + * The first and last entries of the table are sentinels and their
>> + * values only matter in two places:  when we sort the table, and
>> + * on lookups, the end sentinel should not have an addr field which
>> + * matches its immediate predecessor.  To meet these requirements,
>> + * we initialize them to (0,0,0) and (max,max,max), and then after
>> + * sorting, we tweak the end sentinel's addr field accordingly.
>> + */
>> +struct symsearch {
>> +       size_t table_size;
>> +       struct syminfo table[];
>> +};
>
>
>
>syminfo::symbol_index is unsigned int.
>symsearch::table_size is size_t.
>
>
>symbol_index of the last element is always larger than
>elf->symsearch->table_size.
>
>So, the code works only within 32-bit width anyway.
>
>
>
>
>
>
>
>
>
>
>
>
>> +
>> +static inline bool is_sym_searchable(struct elf_info *elf, Elf_Sym *sym)
>> +{
>> +       return is_valid_name(elf, sym) != 0;
>> +}
>
>If you call is_valid_name() directly, this function was unneeded?
>
>
>
>
>
>
>> +
>> +static int syminfo_compare(const void *s1, const void *s2)
>> +{
>> +       const struct syminfo *sym1 = s1;
>> +       const struct syminfo *sym2 = s2;
>> +
>> +       if (sym1->section_index > sym2->section_index)
>> +               return 1;
>> +       if (sym1->section_index < sym2->section_index)
>> +               return -1;
>> +       if (sym1->addr > sym2->addr)
>> +               return 1;
>> +       if (sym1->addr < sym2->addr)
>> +               return -1;
>> +       if (sym1->symbol_index > sym2->symbol_index)
>> +               return 1;
>> +       if (sym1->symbol_index < sym2->symbol_index)
>> +               return -1;
>> +       return 0;
>> +}
>> +
>> +static size_t symbol_count(struct elf_info *elf)
>> +{
>> +       size_t result = 0;
>> +
>> +       for (Elf_Sym *sym = elf->symtab_start; sym < elf->symtab_stop; sym++) {
>> +               if (is_sym_searchable(elf, sym))
>> +                       result++;
>> +       }
>> +       return result;
>> +}
>> +
>> +/* Populate the search array that we just allocated.
>> + * Be slightly paranoid here.  If the ELF file changes during processing,
>
>I could not understand. In which case, the ELF file changes?
>
>modpost loads the entire file to memory first..
>
>In which scenario, the memory content changes?
>
>
>
>
>
>
>> + * or if the behavior of is_sym_searchable() changes during processing,
>> + * we want to catch it; neither of those is acceptable.
>> + */
>> +static void symsearch_populate(struct elf_info *elf,
>> +                              struct syminfo *table,
>> +                              size_t table_size)
>> +{
>> +       bool is_arm = (elf->hdr->e_machine == EM_ARM);
>> +
>> +       /* Start sentinel */
>> +       if (table_size-- == 0)
>> +               fatal("%s: size mismatch\n", __func__);
>> +       table->symbol_index = 0;
>> +       table->section_index = 0;
>> +       table->addr = 0;
>> +       table++;
>> +
>> +       for (Elf_Sym *sym = elf->symtab_start; sym < elf->symtab_stop; sym++) {
>> +               if (is_sym_searchable(elf, sym)) {
>> +                       if (table_size-- == 0)
>> +                               fatal("%s: size mismatch\n", __func__);
>> +                       table->symbol_index = sym - elf->symtab_start;
>> +                       table->section_index = get_secindex(elf, sym);
>> +                       table->addr = sym->st_value;
>> +
>> +                       /*
>> +                        * For ARM Thumb instruction, the bit 0 of st_value is
>> +                        * set if the symbol is STT_FUNC type. Mask it to get
>> +                        * the address.
>> +                        */
>> +                       if (is_arm && ELF_ST_TYPE(sym->st_info) == STT_FUNC)
>> +                               table->addr &= ~1;
>> +
>> +                       table++;
>> +               }
>> +       }
>> +
>> +       /* End sentinel; all values are unsigned so -1 wraps to max */
>> +       if (table_size != 1)
>> +               fatal("%s: size mismatch\n", __func__);
>> +       table->symbol_index = -1;
>> +       table->section_index = -1;
>> +       table->addr = -1;
>> +}
>> +
>> +void symsearch_init(struct elf_info *elf)
>> +{
>> +       /* +2 here to allocate space for the start and end sentinels */
>> +       size_t table_size = symbol_count(elf) + 2;
>> +
>> +       elf->symsearch = NOFAIL(malloc(
>> +                                       sizeof(struct symsearch) +
>> +                                       sizeof(struct syminfo) * table_size));
>> +       elf->symsearch->table_size = table_size;
>> +
>> +       symsearch_populate(elf, elf->symsearch->table, table_size);
>> +       qsort(elf->symsearch->table, table_size,
>> +             sizeof(struct syminfo), syminfo_compare);
>> +
>> +       /* A bit of paranoia; make sure that the end sentinel's address is
>> +        * different than its predecessor.  Not doing this could cause
>> +        * possible undefined behavior if anybody ever inserts a symbol
>> +        * with section_index and addr both at their max values.
>
>I could not understand this comment.
>
>If section_index and addr both at their max values at [table_size - 2],
>->table[table_size - 2].addr + 1 wraps to zero.
>
>The table is not sorted any longer?
>
>
>
>
>> +        * Doing this little bit of defensive programming is more efficient
>> +        * than checking for array overruns later.
>> +        */
>> +       elf->symsearch->table[table_size - 1].addr =
>> +               elf->symsearch->table[table_size - 2].addr + 1;
>> +}
>> +
>> +void symsearch_finish(struct elf_info *elf)
>> +{
>> +       free(elf->symsearch);
>> +       elf->symsearch = NULL;
>> +}
>> +
>> +/* Find the syminfo which is in secndx and "nearest" to addr.
>> + * allow_negative: allow returning a symbol whose address is > addr.
>> + * min_distance: ignore symbols which are further away than this.
>> + *
>> + * Returns a nonzero index into the symsearch table for success.
>> + * Returns NULL if no legal symbol is found within the requested range.
>> + */
>> +static size_t symsearch_find_impl(struct elf_info *elf, Elf_Addr addr,
>> +                                 unsigned int secndx, bool allow_negative,
>> +                                 Elf_Addr min_distance)
>> +{
>> +       /* Find the target in the array; it will lie between two elements.
>> +        * Invariant here: table[lo] < target <= table[hi]
>> +        * For the purposes of search, exact hits in the search array are
>> +        * considered greater than the target.  This means that if we do
>> +        * get an exact hit, then once the search terminates, table[hi]
>> +        * will be the exact match which has the lowest symbol index.
>> +        */
>> +       struct syminfo *table = elf->symsearch->table;
>> +       size_t hi = elf->symsearch->table_size - 1;
>> +       size_t lo = 0;
>
>
>
>
>The binary search code was implemented in a too complex way
>to preserve the previous quirks.
>
>
>I want to use the same comparison function for
>qsort() and bsearch() to avoid paranoia.
>
>
>
>
>How about this implementation?
>
>
>
>static struct syminfo *symsearch_find_impl(struct elf_info *elf, Elf_Addr addr,
>                                           unsigned int secndx, bool
>allow_negative,
>                                           Elf_Addr min_distance)
>{
>        struct syminfo target = { .symbol_index = -1, .section_index =
>secndx, .addr = addr };
>        struct syminfo *table = elf->symsearch->table;
>        unsigned int hi = elf->symsearch->table_size - 1;
>        unsigned int lo = 0;
>        struct syminfo *result = NULL;
>        Elf_Addr distance;
>
>        while (lo < hi) {
>                unsigned int mid = (lo + hi + 1) / 2;
>
>                if (syminfo_compare(&table[mid], &target) > 0)
>                        hi = mid - 1;
>                else
>                        lo = mid;
>        }
>
>        /*
>         * The target resides between lo and (lo + 1).
>         * If allow_negative is true, check both of them.
>         */
>
>        if (allow_negative && lo + 1 < elf->symsearch->table_size &&
>            table[lo + 1].section_index == secndx) {
>                distance = table[lo + 1].addr - addr;
>                if (distance <= min_distance) {
>                        min_distance = distance;
>                        result = &table[lo + 1];
>                }
>        }
>
>        if (table[lo].section_index == secndx) {
>                distance = addr - table[lo].addr;
>                if (distance <= min_distance)
>                  result = &table[lo];
>        }
>
>        return result;
>}

I think this implementation (shrinking [lo,hi] to [lo,mid-1] or
[mid,hi]) is better than the original one (shrinking [lo,hi] to [lo,mid]
or [mid,hi], a bit wasteful).

The original patch uses `if (mid == lo) break;`, which I consider not so
elegant.

However, the `- 1` part in `unsigned int hi = elf->symsearch->table_size - 1;` can be improved.
I'd prefer an implementation similar to typical C++ https://en.cppreference.com/w/cpp/algorithm/upper_bound implementation.

lo = 0;
hi = n;  // or replace hi with count
while (lo < hi) {
   mid = (lo + hi) / 2;  // we don't care about (lo+hi) overflow
   if (less_or_eq(&table[mid], &target))
     lo = mid+1;
   else
     hi = mid;
}

// lo == hi: the index of the first element that is > target
// if elements equal to target are present, they are on the left of lo

>Elf_Sym *symsearch_find_nearest(struct elf_info *elf, Elf_Addr addr,
>                                unsigned int secndx, bool allow_negative,
>                                Elf_Addr min_distance)
>{
>        struct syminfo *result;
>
>        result = symsearch_find_impl(elf, addr, secndx,
>                                     allow_negative, min_distance);
>        if (!result)
>                return NULL;
>
>        return &elf->symtab_start[result->symbol_index];
>}
>
>
>
>This does not preserve the previous quirks.
>
>If there are multiple entries with the same address,
>it always returns the last element.
>
>I did not expect sentinels.
>
>I did not do thorough tests, but it seems to be working for me.
>
>
>
>
>Also, please call symsearch_find_nearest() directly
>and remove symfind_nearest_sym().
>
>
>
>
>
>
>--
>Best Regards
>
>Masahiro Yamada
>
Masahiro Yamada Sept. 25, 2023, 2:23 p.m. UTC | #6
On Sun, Sep 24, 2023 at 2:21 AM Jack Brennen <jbrennen@google.com> wrote:
>
> Mainly just clarifying that we're willing to change the behavior in corner
> cases?


Yes.

I do not see a sensible reason in the previous quirk
(take the first for the exact match, but the last for others).




> The existing behavior has one set of quirks about which symtab entry
> is returned, and your proposed behavior has another different set of quirks.
>
> It's probably OK to break builds which have an undocumented assumption
> about the order of symtab entries; personally, I'd rather not risk that myself,
> but if somebody with more experience is willing to back that decision, I'm
> OK with it.
>
> Also, there's an alternative approach that uses bsearch from the standard
> library and a common comparison function between qsort and bsearch. I
> considered this alternative earlier; maybe you would prefer it because it
> eliminates having to reimplement a binary search algorithm.
> I chose not to do it this way because of trying to duplicate the quirks.
> If no duplication of the quirks is needed, this becomes easier.
>
> The idea for that is to build a sorted array of syminfo that look like this:
>
> (section_index, addr_lo, addr_hi, sym_lo, sym_hi)
>
> What this represents is the situation where for any lookup in the
> range from (section_index, addr_lo) to (section_index, addr_hi)
> inclusive, the nearest symbol will be either sym_lo or sym_hi.
> There are four different meanings for (sym_lo, sym_hi):
>
> (sym_lo = 0)
> This is a placeholder for a duplicated address, and it cannot
> compare equal to anything. After we sort the array, we set all
> of the duplicated addresses except for the last one to sym_lo = 0.
>
> (sym_lo = MAX_SYM)
> This is used to designate an address being looked up. When this
> is seen, it compares equal to any other syminfo that has an
> overlapping range.
>
> (sym_lo != 0, sym_hi = 0)
> This represents the last range in a section. There's no following
> address that could match. Should also have addr_hi = MAX.
>
> (sym_lo != 0, sym_hi != 0)
> This represents a range in a section that's not the last range.
> sym_hi may be usable to satisfy the lookup, but only if it's
> closer than sym_lo and if allow_negative is true. Note that
> the address of sym_hi will be addr_hi+1, so we don't need any
> additional code to fetch that address.
>
> Here's a sample comparison function:
> int syminfo_compare(const void *a, const void *b) {
>   const struct syminfo *sym1 = a;
>   const struct syminfo *sym2 = b;
>
>   if (sym1->section_index > sym2->section_index)
>     return 1;
>   if (sym1->section_index < sym2->section_index)
>     return -1;
>   if ((sym1->sym_lo == MAX_SYM && sym2->sym_lo != 0) ||
>       (sym2->sym_lo == MAX_SYM && sym1->sym_lo != 0)) {
>     /* Overlap is equality - test for it */
>     if (sym1->addr_hi >= sym2->addr_lo &&
>         sym2->addr_hi >= sym1->addr_lo) {
>       return 0;
>     }
>     /* No overlap, fall through */
>   }
>   if (sym1->addr_lo > sym2->addr_lo)
>     return 1;
>   if (sym1->addr_lo < sym2->addr_lo)
>     return -1;
>   /* Note that if we are comparing a lookup (MAX_SYM) with
>      a placeholder (0), the lookup always compares greater.
>      This causes us to search to the "right" of the placeholder
>      for a match, which is what we want. */
>   if (sym1->sym_lo > sym2->sym_lo)
>     return 1;
>   if (sym1->sym_lo < sym2->sym_lo)
>     return -1;
>   return 0;
> }
>
> So this greatly simplifies the back-end searching. It's a bsearch()
> which gives you either a miss, or one or two alternatives for the result.
> On the front end, you have an extra step after sorting which massages the
> search array into the right configuration.  There's actually not much code
> needed to do that.
>
> Is that of interest?  The leveraging of bsearch() in that way?


I am curious about how to use bsearch() if the whole implementation
is short, but I could not understand that logic fully.




> A few other responses below...
>
> On Sat, Sep 23, 2023 at 4:50 AM Masahiro Yamada <masahiroy@kernel.org> wrote:
> > > +/* Populate the search array that we just allocated.
> > > + * Be slightly paranoid here.  If the ELF file changes during processing,
> >
> > I could not understand. In which case, the ELF file changes?
> >
> > modpost loads the entire file to memory first..
> >
> > In which scenario, the memory content changes?
> >
>
> modpost doesn't load the entire file, it uses mmap to map it into the
> address space.The easiest way to imagine this being an issue is that some
> buggy parallelization happens and something is modifying vmlinux.o while
> modpost is processing it.  Of course it's probably acceptable to say,
> "Don't do that!"
>
> There are two alternatives here: actually read in the entire file, which
> is certainly suboptimal, or just live with the fact that mmap makes no
> guarantees about whether changes in the file are reflected in the memory map.

You are right. I missed the fact that grab_file() used mmap.

I am OK with the careful check.

Yet another possible alternative is, as Nick suggested, cut the first loop.
Use realloc() to grow the array as needed, but this is also suboptimal
if memory copy occurs.




> > > +       /* A bit of paranoia; make sure that the end sentinel's address is
> > > +        * different than its predecessor.  Not doing this could cause
> > > +        * possible undefined behavior if anybody ever inserts a symbol
> > > +        * with section_index and addr both at their max values.
> >
> > I could not understand this comment.
> >
> > If section_index and addr both at their max values at [table_size - 2],
> > ->table[table_size - 2].addr + 1 wraps to zero.
> >
> > The table is not sorted any longer?
> >
>
> That's correct, the table would not be sorted any longer. But by design,
> we never do a relational comparison against the end sentinel. But if
> we rework the search, either by your suggestion or by using bsearch(),
> this sentinel goes away.

Understood.
With mid = lo + (hi - lo) / 2,
the last sentinel is never set as mid.



--
Best Regards
Masahiro Yamada
Masahiro Yamada Sept. 25, 2023, 2:24 p.m. UTC | #7
On Mon, Sep 25, 2023 at 7:20 AM Fangrui Song <maskray@google.com> wrote:

> However, the `- 1` part in `unsigned int hi = elf->symsearch->table_size - 1;` can be improved.
> I'd prefer an implementation similar to typical C++ https://en.cppreference.com/w/cpp/algorithm/upper_bound implementation.
>
> lo = 0;
> hi = n;  // or replace hi with count
> while (lo < hi) {
>    mid = (lo + hi) / 2;  // we don't care about (lo+hi) overflow
>    if (less_or_eq(&table[mid], &target))
>      lo = mid+1;
>    else
>      hi = mid;
> }
>
> // lo == hi: the index of the first element that is > target
> // if elements equal to target are present, they are on the left of lo


Ah, it is much more elegant!

Thanks.
diff mbox series

Patch

diff --git a/scripts/mod/Makefile b/scripts/mod/Makefile
index c9e38ad937fd..3c54125eb373 100644
--- a/scripts/mod/Makefile
+++ b/scripts/mod/Makefile
@@ -5,7 +5,7 @@  CFLAGS_REMOVE_empty.o += $(CC_FLAGS_LTO)
 hostprogs-always-y	+= modpost mk_elfconfig
 always-y		+= empty.o
 
-modpost-objs	:= modpost.o file2alias.o sumversion.o
+modpost-objs	:= modpost.o file2alias.o sumversion.o symsearch.o
 
 devicetable-offsets-file := devicetable-offsets.h
 
@@ -16,7 +16,7 @@  targets += $(devicetable-offsets-file) devicetable-offsets.s
 
 # dependencies on generated files need to be listed explicitly
 
-$(obj)/modpost.o $(obj)/file2alias.o $(obj)/sumversion.o: $(obj)/elfconfig.h
+$(obj)/modpost.o $(obj)/file2alias.o $(obj)/sumversion.o $(obj)/symsearch.o: $(obj)/elfconfig.h
 $(obj)/file2alias.o: $(obj)/$(devicetable-offsets-file)
 
 quiet_cmd_elfconfig = MKELF   $@
diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c
index de499dce5265..975f235aca2c 100644
--- a/scripts/mod/modpost.c
+++ b/scripts/mod/modpost.c
@@ -22,7 +22,6 @@ 
 #include <errno.h>
 #include "modpost.h"
 #include "../../include/linux/license.h"
-#include "../../include/linux/module_symbol.h"
 
 static bool module_enabled;
 /* Are we using CONFIG_MODVERSIONS? */
@@ -577,11 +576,14 @@  static int parse_elf(struct elf_info *info, const char *filename)
 			*p = TO_NATIVE(*p);
 	}
 
+	symsearch_init(info);
+
 	return 1;
 }
 
 static void parse_elf_finish(struct elf_info *info)
 {
+	symsearch_finish(info);
 	release_file(info->hdr, info->size);
 }
 
@@ -1039,65 +1041,13 @@  static int secref_whitelist(const char *fromsec, const char *fromsym,
 	return 1;
 }
 
-/*
- * If there's no name there, ignore it; likewise, ignore it if it's
- * one of the magic symbols emitted used by current tools.
- *
- * Otherwise if find_symbols_between() returns those symbols, they'll
- * fail the whitelist tests and cause lots of false alarms ... fixable
- * only by merging __exit and __init sections into __text, bloating
- * the kernel (which is especially evil on embedded platforms).
- */
-static inline int is_valid_name(struct elf_info *elf, Elf_Sym *sym)
-{
-	const char *name = elf->strtab + sym->st_name;
-
-	if (!name || !strlen(name))
-		return 0;
-	return !is_mapping_symbol(name);
-}
-
 /* Look up the nearest symbol based on the section and the address */
 static Elf_Sym *find_nearest_sym(struct elf_info *elf, Elf_Addr addr,
 				 unsigned int secndx, bool allow_negative,
 				 Elf_Addr min_distance)
 {
-	Elf_Sym *sym;
-	Elf_Sym *near = NULL;
-	Elf_Addr sym_addr, distance;
-	bool is_arm = (elf->hdr->e_machine == EM_ARM);
-
-	for (sym = elf->symtab_start; sym < elf->symtab_stop; sym++) {
-		if (get_secindex(elf, sym) != secndx)
-			continue;
-		if (!is_valid_name(elf, sym))
-			continue;
-
-		sym_addr = sym->st_value;
-
-		/*
-		 * For ARM Thumb instruction, the bit 0 of st_value is set
-		 * if the symbol is STT_FUNC type. Mask it to get the address.
-		 */
-		if (is_arm && ELF_ST_TYPE(sym->st_info) == STT_FUNC)
-			 sym_addr &= ~1;
-
-		if (addr >= sym_addr)
-			distance = addr - sym_addr;
-		else if (allow_negative)
-			distance = sym_addr - addr;
-		else
-			continue;
-
-		if (distance <= min_distance) {
-			min_distance = distance;
-			near = sym;
-		}
-
-		if (min_distance == 0)
-			break;
-	}
-	return near;
+	return symsearch_find_nearest(elf, addr, secndx,
+				      allow_negative, min_distance);
 }
 
 static Elf_Sym *find_fromsym(struct elf_info *elf, Elf_Addr addr,
diff --git a/scripts/mod/modpost.h b/scripts/mod/modpost.h
index 5f94c2c9f2d9..6413f26fcb6b 100644
--- a/scripts/mod/modpost.h
+++ b/scripts/mod/modpost.h
@@ -10,6 +10,7 @@ 
 #include <fcntl.h>
 #include <unistd.h>
 #include <elf.h>
+#include "../../include/linux/module_symbol.h"
 
 #include "list.h"
 #include "elfconfig.h"
@@ -128,6 +129,8 @@  struct elf_info {
 	 * take shndx from symtab_shndx_start[N] instead */
 	Elf32_Word   *symtab_shndx_start;
 	Elf32_Word   *symtab_shndx_stop;
+
+	struct symsearch *symsearch;
 };
 
 /* Accessor for sym->st_shndx, hides ugliness of "64k sections" */
@@ -154,6 +157,28 @@  static inline unsigned int get_secindex(const struct elf_info *info,
 	return index;
 }
 
+/*
+ * If there's no name there, ignore it; likewise, ignore it if it's
+ * one of the magic symbols emitted used by current tools.
+ *
+ * Internal symbols created by tools should be ignored by modpost.
+ */
+static inline int is_valid_name(struct elf_info *elf, Elf_Sym *sym)
+{
+	const char *name = elf->strtab + sym->st_name;
+
+	if (!name || !strlen(name))
+		return 0;
+	return !is_mapping_symbol(name);
+}
+
+/* symsearch.c */
+void symsearch_init(struct elf_info *elf);
+void symsearch_finish(struct elf_info *elf);
+Elf_Sym *symsearch_find_nearest(struct elf_info *elf, Elf_Addr addr,
+				unsigned int secndx, bool allow_negative,
+				Elf_Addr min_distance);
+
 /* file2alias.c */
 void handle_moddevtable(struct module *mod, struct elf_info *info,
 			Elf_Sym *sym, const char *symname);
diff --git a/scripts/mod/symsearch.c b/scripts/mod/symsearch.c
new file mode 100644
index 000000000000..aab79262512b
--- /dev/null
+++ b/scripts/mod/symsearch.c
@@ -0,0 +1,233 @@ 
+// SPDX-License-Identifier: GPL-2.0
+
+/* Helper functions for finding the symbol in an ELF which is "nearest"
+ * to a given address.
+ */
+
+#include "modpost.h"
+
+/* Struct used for binary search. */
+struct syminfo {
+	unsigned int symbol_index;
+	unsigned int section_index;
+	Elf_Addr addr;
+};
+
+/* Container used to hold an entire binary search table.
+ * Entries in table are ascending, sorted first by section_index,
+ * then by addr, and last by symbol_index.  The sorting by
+ * symbol_index is used to duplicate the quirks of the prior
+ * find_nearest_sym() function, where exact matches to an address
+ * return the first symtab entry seen, but near misses return the
+ * last symtab entry seen.
+ * The first and last entries of the table are sentinels and their
+ * values only matter in two places:  when we sort the table, and
+ * on lookups, the end sentinel should not have an addr field which
+ * matches its immediate predecessor.  To meet these requirements,
+ * we initialize them to (0,0,0) and (max,max,max), and then after
+ * sorting, we tweak the end sentinel's addr field accordingly.
+ */
+struct symsearch {
+	size_t table_size;
+	struct syminfo table[];
+};
+
+static inline bool is_sym_searchable(struct elf_info *elf, Elf_Sym *sym)
+{
+	return is_valid_name(elf, sym) != 0;
+}
+
+static int syminfo_compare(const void *s1, const void *s2)
+{
+	const struct syminfo *sym1 = s1;
+	const struct syminfo *sym2 = s2;
+
+	if (sym1->section_index > sym2->section_index)
+		return 1;
+	if (sym1->section_index < sym2->section_index)
+		return -1;
+	if (sym1->addr > sym2->addr)
+		return 1;
+	if (sym1->addr < sym2->addr)
+		return -1;
+	if (sym1->symbol_index > sym2->symbol_index)
+		return 1;
+	if (sym1->symbol_index < sym2->symbol_index)
+		return -1;
+	return 0;
+}
+
+static size_t symbol_count(struct elf_info *elf)
+{
+	size_t result = 0;
+
+	for (Elf_Sym *sym = elf->symtab_start; sym < elf->symtab_stop; sym++) {
+		if (is_sym_searchable(elf, sym))
+			result++;
+	}
+	return result;
+}
+
+/* Populate the search array that we just allocated.
+ * Be slightly paranoid here.  If the ELF file changes during processing,
+ * or if the behavior of is_sym_searchable() changes during processing,
+ * we want to catch it; neither of those is acceptable.
+ */
+static void symsearch_populate(struct elf_info *elf,
+			       struct syminfo *table,
+			       size_t table_size)
+{
+	bool is_arm = (elf->hdr->e_machine == EM_ARM);
+
+	/* Start sentinel */
+	if (table_size-- == 0)
+		fatal("%s: size mismatch\n", __func__);
+	table->symbol_index = 0;
+	table->section_index = 0;
+	table->addr = 0;
+	table++;
+
+	for (Elf_Sym *sym = elf->symtab_start; sym < elf->symtab_stop; sym++) {
+		if (is_sym_searchable(elf, sym)) {
+			if (table_size-- == 0)
+				fatal("%s: size mismatch\n", __func__);
+			table->symbol_index = sym - elf->symtab_start;
+			table->section_index = get_secindex(elf, sym);
+			table->addr = sym->st_value;
+
+			/*
+			 * For ARM Thumb instruction, the bit 0 of st_value is
+			 * set if the symbol is STT_FUNC type. Mask it to get
+			 * the address.
+			 */
+			if (is_arm && ELF_ST_TYPE(sym->st_info) == STT_FUNC)
+				table->addr &= ~1;
+
+			table++;
+		}
+	}
+
+	/* End sentinel; all values are unsigned so -1 wraps to max */
+	if (table_size != 1)
+		fatal("%s: size mismatch\n", __func__);
+	table->symbol_index = -1;
+	table->section_index = -1;
+	table->addr = -1;
+}
+
+void symsearch_init(struct elf_info *elf)
+{
+	/* +2 here to allocate space for the start and end sentinels */
+	size_t table_size = symbol_count(elf) + 2;
+
+	elf->symsearch = NOFAIL(malloc(
+					sizeof(struct symsearch) +
+					sizeof(struct syminfo) * table_size));
+	elf->symsearch->table_size = table_size;
+
+	symsearch_populate(elf, elf->symsearch->table, table_size);
+	qsort(elf->symsearch->table, table_size,
+	      sizeof(struct syminfo), syminfo_compare);
+
+	/* A bit of paranoia; make sure that the end sentinel's address is
+	 * different than its predecessor.  Not doing this could cause
+	 * possible undefined behavior if anybody ever inserts a symbol
+	 * with section_index and addr both at their max values.
+	 * Doing this little bit of defensive programming is more efficient
+	 * than checking for array overruns later.
+	 */
+	elf->symsearch->table[table_size - 1].addr =
+		elf->symsearch->table[table_size - 2].addr + 1;
+}
+
+void symsearch_finish(struct elf_info *elf)
+{
+	free(elf->symsearch);
+	elf->symsearch = NULL;
+}
+
+/* Find the syminfo which is in secndx and "nearest" to addr.
+ * allow_negative: allow returning a symbol whose address is > addr.
+ * min_distance: ignore symbols which are further away than this.
+ *
+ * Returns a nonzero index into the symsearch table for success.
+ * Returns NULL if no legal symbol is found within the requested range.
+ */
+static size_t symsearch_find_impl(struct elf_info *elf, Elf_Addr addr,
+				  unsigned int secndx, bool allow_negative,
+				  Elf_Addr min_distance)
+{
+	/* Find the target in the array; it will lie between two elements.
+	 * Invariant here: table[lo] < target <= table[hi]
+	 * For the purposes of search, exact hits in the search array are
+	 * considered greater than the target.	This means that if we do
+	 * get an exact hit, then once the search terminates, table[hi]
+	 * will be the exact match which has the lowest symbol index.
+	 */
+	struct syminfo *table = elf->symsearch->table;
+	size_t hi = elf->symsearch->table_size - 1;
+	size_t lo = 0;
+	bool hi_is_usable = false;
+	bool lo_is_usable = false;
+	Elf_Addr hi_distance = -1;  // max Elf_Addr
+	Elf_Addr lo_distance = -1;  // max Elf_Addr
+	Elf_Addr min_distance_lo = min_distance;
+	Elf_Addr min_distance_hi = allow_negative ? min_distance : 0;
+
+	for (;;) {
+		size_t mid;
+
+		mid = lo + (hi - lo) / 2;
+		if (mid == lo)
+			break;
+		if (secndx > table[mid].section_index) {
+			lo = mid;
+		} else if (secndx < table[mid].section_index) {
+			hi = mid;
+		} else if (addr > table[mid].addr) {
+			lo = mid;
+			lo_distance = addr - table[mid].addr;
+			lo_is_usable = (lo_distance <= min_distance_lo);
+		} else {
+			hi = mid;
+			hi_distance = table[mid].addr - addr;
+			hi_is_usable = (hi_distance <= min_distance_hi);
+		}
+	}
+
+	if (hi_is_usable && lo_is_usable) {
+		lo_is_usable = (lo_distance <= hi_distance);
+		hi_is_usable = (hi_distance <= lo_distance);
+	}
+
+	if (!hi_is_usable)
+		return lo_is_usable ? lo : 0;
+
+	if (hi_distance == 0)
+		return hi;
+
+	/* Match quirks of existing behavior.  Advance hi to the last
+	 * matching entry in the search table.	We don't need to worry
+	 * about running off the end of the array due to the sentinel.
+	 */
+	while (table[hi+1].addr == table[hi].addr &&
+	       table[hi+1].section_index == table[hi].section_index) {
+		hi++;
+	}
+
+	return (lo_is_usable &&
+		table[lo].symbol_index > table[hi].symbol_index) ? lo : hi;
+}
+
+Elf_Sym *symsearch_find_nearest(struct elf_info *elf, Elf_Addr addr,
+				unsigned int secndx, bool allow_negative,
+				Elf_Addr min_distance)
+{
+	size_t result = symsearch_find_impl(elf, addr, secndx,
+					    allow_negative, min_distance);
+
+	if (result == 0)
+		return NULL;
+
+	return &elf->symtab_start[elf->symsearch->table[result].symbol_index];
+}