diff mbox series

[PATCHv3,bpf-next,09/26] libbpf: Add elf symbol iterator

Message ID 20230630083344.984305-10-jolsa@kernel.org (mailing list archive)
State Changes Requested
Delegated to: BPF
Headers show
Series bpf: Add multi uprobe link | expand

Checks

Context Check Description
netdev/series_format fail Series longer than 15 patches (and no cover letter)
netdev/tree_selection success Clearly marked for bpf-next, async
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 8 this patch: 8
netdev/cc_maintainers warning 3 maintainers not CCed: kpsingh@kernel.org martin.lau@linux.dev song@kernel.org
netdev/build_clang fail Errors and warnings before: 18 this patch: 18
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 8 this patch: 8
netdev/checkpatch warning WARNING: line length of 108 exceeds 80 columns WARNING: line length of 87 exceeds 80 columns WARNING: line length of 94 exceeds 80 columns
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0
bpf/vmtest-bpf-next-PR success PR summary
bpf/vmtest-bpf-next-VM_Test-1 success Logs for ${{ matrix.test }} on ${{ matrix.arch }} with ${{ matrix.toolchain_full }}
bpf/vmtest-bpf-next-VM_Test-2 success Logs for ShellCheck
bpf/vmtest-bpf-next-VM_Test-3 success Logs for build for aarch64 with gcc
bpf/vmtest-bpf-next-VM_Test-4 success Logs for build for s390x with gcc
bpf/vmtest-bpf-next-VM_Test-5 success Logs for build for x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-6 fail Logs for build for x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-7 success Logs for set-matrix
bpf/vmtest-bpf-next-VM_Test-8 success Logs for veristat

Commit Message

Jiri Olsa June 30, 2023, 8:33 a.m. UTC
Adding elf symbol iterator object (and some functions) that follow
open-coded iterator pattern and some functions to ease up iterating
elf object symbols.

The idea is to iterate single symbol section with:

  struct elf_sym_iter iter;
  struct elf_sym *sym;

  if (elf_sym_iter_new(&iter, elf, binary_path, SHT_DYNSYM))
        goto error;

  while ((sym = elf_sym_iter_next(&iter))) {
        ...
  }

I considered opening the elf inside the iterator and iterate all symbol
sections, but then it gets more complicated wrt user checks for when
the next section is processed.

Plus side is the we don't need 'exit' function, because caller/user is
in charge of that.

The returned iterated symbol object from elf_sym_iter_next function
is placed inside the struct elf_sym_iter, so no extra allocation or
argument is needed.

Suggested-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
---
 tools/lib/bpf/elf.c | 178 +++++++++++++++++++++++++++++---------------
 1 file changed, 117 insertions(+), 61 deletions(-)

Comments

Andrii Nakryiko July 6, 2023, 11:24 p.m. UTC | #1
On Fri, Jun 30, 2023 at 1:35 AM Jiri Olsa <jolsa@kernel.org> wrote:
>
> Adding elf symbol iterator object (and some functions) that follow
> open-coded iterator pattern and some functions to ease up iterating
> elf object symbols.
>
> The idea is to iterate single symbol section with:
>
>   struct elf_sym_iter iter;
>   struct elf_sym *sym;
>
>   if (elf_sym_iter_new(&iter, elf, binary_path, SHT_DYNSYM))
>         goto error;
>
>   while ((sym = elf_sym_iter_next(&iter))) {
>         ...
>   }
>
> I considered opening the elf inside the iterator and iterate all symbol
> sections, but then it gets more complicated wrt user checks for when
> the next section is processed.
>
> Plus side is the we don't need 'exit' function, because caller/user is
> in charge of that.
>
> The returned iterated symbol object from elf_sym_iter_next function
> is placed inside the struct elf_sym_iter, so no extra allocation or
> argument is needed.
>
> Suggested-by: Andrii Nakryiko <andrii@kernel.org>
> Signed-off-by: Jiri Olsa <jolsa@kernel.org>
> ---
>  tools/lib/bpf/elf.c | 178 +++++++++++++++++++++++++++++---------------
>  1 file changed, 117 insertions(+), 61 deletions(-)
>

A bunch of nits, but overall looks good. Please address nits, and add my ack

Acked-by: Andrii Nakryiko <andrii@kernel.org>

> diff --git a/tools/lib/bpf/elf.c b/tools/lib/bpf/elf.c
> index 74e35071d22e..fcce4bd2478f 100644
> --- a/tools/lib/bpf/elf.c
> +++ b/tools/lib/bpf/elf.c
> @@ -59,6 +59,108 @@ static Elf_Scn *elf_find_next_scn_by_type(Elf *elf, int sh_type, Elf_Scn *scn)
>         return NULL;
>  }
>
> +struct elf_sym {
> +       const char *name;
> +       GElf_Sym sym;
> +       GElf_Shdr sh;
> +};
> +

if we want to use elf_sym_iter outside of elf.c, this should be in
libbpf_internal.h?

> +struct elf_sym_iter {
> +       Elf *elf;
> +       Elf_Data *syms;
> +       size_t nr_syms;
> +       size_t strtabidx;
> +       size_t next_sym_idx;
> +       struct elf_sym sym;
> +       int st_type;
> +};
> +
> +static int elf_sym_iter_new(struct elf_sym_iter *iter,
> +                           Elf *elf, const char *binary_path,
> +                           int sh_type, int st_type)
> +{
> +       Elf_Scn *scn = NULL;
> +       GElf_Ehdr ehdr;
> +       GElf_Shdr sh;
> +
> +       memset(iter, 0, sizeof(*iter));
> +
> +       if (!gelf_getehdr(elf, &ehdr)) {
> +               pr_warn("elf: failed to get ehdr from %s: %s\n", binary_path, elf_errmsg(-1));
> +               return -EINVAL;
> +       }
> +
> +       scn = elf_find_next_scn_by_type(elf, sh_type, NULL);
> +       if (!scn) {
> +               pr_debug("elf: failed to find symbol table ELF sections in '%s'\n",
> +                        binary_path);
> +               return -ENOENT;
> +       }
> +
> +       if (!gelf_getshdr(scn, &sh))
> +               return -EINVAL;
> +
> +       iter->strtabidx = sh.sh_link;
> +       iter->syms = elf_getdata(scn, 0);
> +       if (!iter->syms) {
> +               pr_warn("elf: failed to get symbols for symtab section in '%s': %s\n",
> +                       binary_path, elf_errmsg(-1));
> +               return -EINVAL;
> +       }
> +       iter->nr_syms = iter->syms->d_size / sh.sh_entsize;
> +       iter->elf = elf;
> +       iter->st_type = st_type;
> +       return 0;
> +}
> +
> +static struct elf_sym *elf_sym_iter_next(struct elf_sym_iter *iter)
> +{
> +       struct elf_sym *ret = &iter->sym;
> +       GElf_Sym *sym = &ret->sym;
> +       const char *name = NULL;
> +       Elf_Scn *sym_scn;
> +       size_t idx;
> +
> +       for (idx = iter->next_sym_idx; idx < iter->nr_syms; idx++) {
> +               if (!gelf_getsym(iter->syms, idx, sym))
> +                       continue;
> +               if (GELF_ST_TYPE(sym->st_info) != iter->st_type)
> +                       continue;
> +               name = elf_strptr(iter->elf, iter->strtabidx, sym->st_name);
> +               if (!name)
> +                       continue;
> +
> +               /* Transform symbol's virtual address (absolute for
> +                * binaries and relative for shared libs) into file
> +                * offset, which is what kernel is expecting for
> +                * uprobe/uretprobe attachment.
> +                * See Documentation/trace/uprobetracer.rst for more
> +                * details.
> +                * This is done by looking up symbol's containing
> +                * section's header and using iter's virtual address
> +                * (sh_addr) and corresponding file offset (sh_offset)
> +                * to transform sym.st_value (virtual address) into
> +                * desired final file offset.
> +                */

this comment is misplaced? we don't do the translation here

> +               sym_scn = elf_getscn(iter->elf, sym->st_shndx);
> +               if (!sym_scn)
> +                       continue;
> +               if (!gelf_getshdr(sym_scn, &ret->sh))
> +                       continue;
> +
> +               iter->next_sym_idx = idx + 1;
> +               ret->name = name;
> +               return ret;
> +       }
> +
> +       return NULL;
> +}
> +
> +static unsigned long elf_sym_offset(struct elf_sym *sym)
> +{
> +       return sym->sym.st_value - sym->sh.sh_addr + sym->sh.sh_offset;
> +}
> +
>  /* Find offset of function name in the provided ELF object. "binary_path" is
>   * the path to the ELF binary represented by "elf", and only used for error
>   * reporting matters. "name" matches symbol name or name@@LIB for library
> @@ -90,64 +192,36 @@ long elf_find_func_offset(Elf *elf, const char *binary_path, const char *name)
>          * reported as a warning/error.
>          */
>         for (i = 0; i < ARRAY_SIZE(sh_types); i++) {
> -               size_t nr_syms, strtabidx, idx;
> -               Elf_Data *symbols = NULL;
> -               Elf_Scn *scn = NULL;
> +               struct elf_sym_iter iter;
> +               struct elf_sym *sym;
>                 int last_bind = -1;
> -               const char *sname;
> -               GElf_Shdr sh;
> +               int curr_bind;

OCD nit:

$ rg 'curr(_|\b)' | wc -l
8
$ rg 'cur(_|\b)' | wc -l
148

and those 8 I consider an unfortunate accident ;) let's standardize on
using "cur" consistently

>
> -               scn = elf_find_next_scn_by_type(elf, sh_types[i], NULL);
> -               if (!scn) {
> -                       pr_debug("elf: failed to find symbol table ELF sections in '%s'\n",
> -                                binary_path);
> -                       continue;
> -               }
> -               if (!gelf_getshdr(scn, &sh))
> -                       continue;
> -               strtabidx = sh.sh_link;
> -               symbols = elf_getdata(scn, 0);
> -               if (!symbols) {
> -                       pr_warn("elf: failed to get symbols for symtab section in '%s': %s\n",
> -                               binary_path, elf_errmsg(-1));
> -                       ret = -LIBBPF_ERRNO__FORMAT;
> +               ret = elf_sym_iter_new(&iter, elf, binary_path, sh_types[i], STT_FUNC);
> +               if (ret) {
> +                       if (ret == -ENOENT)
> +                               continue;
>                         goto out;

another styling nit: let's avoid unnecessary nesting of ifs:

if (ret == -ENOENT)
    continue;
if (ret)
    goto out;

simple and clean


>                 }
> -               nr_syms = symbols->d_size / sh.sh_entsize;
> -
> -               for (idx = 0; idx < nr_syms; idx++) {
> -                       int curr_bind;
> -                       GElf_Sym sym;
> -                       Elf_Scn *sym_scn;
> -                       GElf_Shdr sym_sh;
> -
> -                       if (!gelf_getsym(symbols, idx, &sym))
> -                               continue;
> -
> -                       if (GELF_ST_TYPE(sym.st_info) != STT_FUNC)
> -                               continue;
> -
> -                       sname = elf_strptr(elf, strtabidx, sym.st_name);
> -                       if (!sname)
> -                               continue;
> -
> -                       curr_bind = GELF_ST_BIND(sym.st_info);
>
> +               while ((sym = elf_sym_iter_next(&iter))) {
>                         /* User can specify func, func@@LIB or func@@LIB_VERSION. */
> -                       if (strncmp(sname, name, name_len) != 0)
> +                       if (strncmp(sym->name, name, name_len) != 0)
>                                 continue;
>                         /* ...but we don't want a search for "foo" to match 'foo2" also, so any
>                          * additional characters in sname should be of the form "@@LIB".
>                          */
> -                       if (!is_name_qualified && sname[name_len] != '\0' && sname[name_len] != '@')
> +                       if (!is_name_qualified && sym->name[name_len] != '\0' && sym->name[name_len] != '@')
>                                 continue;
>
> -                       if (ret >= 0) {
> +                       curr_bind = GELF_ST_BIND(sym->sym.st_info);
> +
> +                       if (ret > 0) {

used to be >=, why the change?

>                                 /* handle multiple matches */
>                                 if (last_bind != STB_WEAK && curr_bind != STB_WEAK) {
>                                         /* Only accept one non-weak bind. */
>                                         pr_warn("elf: ambiguous match for '%s', '%s' in '%s'\n",
> -                                               sname, name, binary_path);
> +                                               sym->name, name, binary_path);
>                                         ret = -LIBBPF_ERRNO__FORMAT;
>                                         goto out;
>                                 } else if (curr_bind == STB_WEAK) {
> @@ -158,25 +232,7 @@ long elf_find_func_offset(Elf *elf, const char *binary_path, const char *name)
>                                 }
>                         }
>
> -                       /* Transform symbol's virtual address (absolute for
> -                        * binaries and relative for shared libs) into file
> -                        * offset, which is what kernel is expecting for
> -                        * uprobe/uretprobe attachment.
> -                        * See Documentation/trace/uprobetracer.rst for more
> -                        * details.
> -                        * This is done by looking up symbol's containing
> -                        * section's header and using it's virtual address
> -                        * (sh_addr) and corresponding file offset (sh_offset)
> -                        * to transform sym.st_value (virtual address) into
> -                        * desired final file offset.
> -                        */
> -                       sym_scn = elf_getscn(elf, sym.st_shndx);
> -                       if (!sym_scn)
> -                               continue;
> -                       if (!gelf_getshdr(sym_scn, &sym_sh))
> -                               continue;
> -
> -                       ret = sym.st_value - sym_sh.sh_addr + sym_sh.sh_offset;
> +                       ret = elf_sym_offset(sym);
>                         last_bind = curr_bind;
>                 }
>                 if (ret > 0)
> --
> 2.41.0
>
Jiri Olsa July 11, 2023, 9:03 a.m. UTC | #2
On Thu, Jul 06, 2023 at 04:24:48PM -0700, Andrii Nakryiko wrote:

SNIP

> > Suggested-by: Andrii Nakryiko <andrii@kernel.org>
> > Signed-off-by: Jiri Olsa <jolsa@kernel.org>
> > ---
> >  tools/lib/bpf/elf.c | 178 +++++++++++++++++++++++++++++---------------
> >  1 file changed, 117 insertions(+), 61 deletions(-)
> >
> 
> A bunch of nits, but overall looks good. Please address nits, and add my ack
> 
> Acked-by: Andrii Nakryiko <andrii@kernel.org>
> 
> > diff --git a/tools/lib/bpf/elf.c b/tools/lib/bpf/elf.c
> > index 74e35071d22e..fcce4bd2478f 100644
> > --- a/tools/lib/bpf/elf.c
> > +++ b/tools/lib/bpf/elf.c
> > @@ -59,6 +59,108 @@ static Elf_Scn *elf_find_next_scn_by_type(Elf *elf, int sh_type, Elf_Scn *scn)
> >         return NULL;
> >  }
> >
> > +struct elf_sym {
> > +       const char *name;
> > +       GElf_Sym sym;
> > +       GElf_Shdr sh;
> > +};
> > +
> 
> if we want to use elf_sym_iter outside of elf.c, this should be in
> libbpf_internal.h?

yes eventually, but all the helper functions using elf_sym_iter that
I added later are in elf.c, so there's no need atm

SNIP

> > +
> > +static struct elf_sym *elf_sym_iter_next(struct elf_sym_iter *iter)
> > +{
> > +       struct elf_sym *ret = &iter->sym;
> > +       GElf_Sym *sym = &ret->sym;
> > +       const char *name = NULL;
> > +       Elf_Scn *sym_scn;
> > +       size_t idx;
> > +
> > +       for (idx = iter->next_sym_idx; idx < iter->nr_syms; idx++) {
> > +               if (!gelf_getsym(iter->syms, idx, sym))
> > +                       continue;
> > +               if (GELF_ST_TYPE(sym->st_info) != iter->st_type)
> > +                       continue;
> > +               name = elf_strptr(iter->elf, iter->strtabidx, sym->st_name);
> > +               if (!name)
> > +                       continue;
> > +
> > +               /* Transform symbol's virtual address (absolute for
> > +                * binaries and relative for shared libs) into file
> > +                * offset, which is what kernel is expecting for
> > +                * uprobe/uretprobe attachment.
> > +                * See Documentation/trace/uprobetracer.rst for more
> > +                * details.
> > +                * This is done by looking up symbol's containing
> > +                * section's header and using iter's virtual address
> > +                * (sh_addr) and corresponding file offset (sh_offset)
> > +                * to transform sym.st_value (virtual address) into
> > +                * desired final file offset.
> > +                */
> 
> this comment is misplaced? we don't do the translation here

right, should be placed at the elf_sym_offset function

> 
> > +               sym_scn = elf_getscn(iter->elf, sym->st_shndx);
> > +               if (!sym_scn)
> > +                       continue;
> > +               if (!gelf_getshdr(sym_scn, &ret->sh))
> > +                       continue;
> > +
> > +               iter->next_sym_idx = idx + 1;
> > +               ret->name = name;
> > +               return ret;
> > +       }
> > +
> > +       return NULL;
> > +}
> > +
> > +static unsigned long elf_sym_offset(struct elf_sym *sym)
> > +{
> > +       return sym->sym.st_value - sym->sh.sh_addr + sym->sh.sh_offset;
> > +}
> > +
> >  /* Find offset of function name in the provided ELF object. "binary_path" is
> >   * the path to the ELF binary represented by "elf", and only used for error
> >   * reporting matters. "name" matches symbol name or name@@LIB for library
> > @@ -90,64 +192,36 @@ long elf_find_func_offset(Elf *elf, const char *binary_path, const char *name)
> >          * reported as a warning/error.
> >          */
> >         for (i = 0; i < ARRAY_SIZE(sh_types); i++) {
> > -               size_t nr_syms, strtabidx, idx;
> > -               Elf_Data *symbols = NULL;
> > -               Elf_Scn *scn = NULL;
> > +               struct elf_sym_iter iter;
> > +               struct elf_sym *sym;
> >                 int last_bind = -1;
> > -               const char *sname;
> > -               GElf_Shdr sh;
> > +               int curr_bind;
> 
> OCD nit:
> 
> $ rg 'curr(_|\b)' | wc -l
> 8
> $ rg 'cur(_|\b)' | wc -l
> 148
> 
> and those 8 I consider an unfortunate accident ;) let's standardize on
> using "cur" consistently

ok.. probably easier to make that change than treat ocd ;-)

> 
> >
> > -               scn = elf_find_next_scn_by_type(elf, sh_types[i], NULL);
> > -               if (!scn) {
> > -                       pr_debug("elf: failed to find symbol table ELF sections in '%s'\n",
> > -                                binary_path);
> > -                       continue;
> > -               }
> > -               if (!gelf_getshdr(scn, &sh))
> > -                       continue;
> > -               strtabidx = sh.sh_link;
> > -               symbols = elf_getdata(scn, 0);
> > -               if (!symbols) {
> > -                       pr_warn("elf: failed to get symbols for symtab section in '%s': %s\n",
> > -                               binary_path, elf_errmsg(-1));
> > -                       ret = -LIBBPF_ERRNO__FORMAT;
> > +               ret = elf_sym_iter_new(&iter, elf, binary_path, sh_types[i], STT_FUNC);
> > +               if (ret) {
> > +                       if (ret == -ENOENT)
> > +                               continue;
> >                         goto out;
> 
> another styling nit: let's avoid unnecessary nesting of ifs:
> 
> if (ret == -ENOENT)
>     continue;
> if (ret)
>     goto out;
> 
> simple and clean

ok

> 
> 
> >                 }
> > -               nr_syms = symbols->d_size / sh.sh_entsize;
> > -
> > -               for (idx = 0; idx < nr_syms; idx++) {
> > -                       int curr_bind;
> > -                       GElf_Sym sym;
> > -                       Elf_Scn *sym_scn;
> > -                       GElf_Shdr sym_sh;
> > -
> > -                       if (!gelf_getsym(symbols, idx, &sym))
> > -                               continue;
> > -
> > -                       if (GELF_ST_TYPE(sym.st_info) != STT_FUNC)
> > -                               continue;
> > -
> > -                       sname = elf_strptr(elf, strtabidx, sym.st_name);
> > -                       if (!sname)
> > -                               continue;
> > -
> > -                       curr_bind = GELF_ST_BIND(sym.st_info);
> >
> > +               while ((sym = elf_sym_iter_next(&iter))) {
> >                         /* User can specify func, func@@LIB or func@@LIB_VERSION. */
> > -                       if (strncmp(sname, name, name_len) != 0)
> > +                       if (strncmp(sym->name, name, name_len) != 0)
> >                                 continue;
> >                         /* ...but we don't want a search for "foo" to match 'foo2" also, so any
> >                          * additional characters in sname should be of the form "@@LIB".
> >                          */
> > -                       if (!is_name_qualified && sname[name_len] != '\0' && sname[name_len] != '@')
> > +                       if (!is_name_qualified && sym->name[name_len] != '\0' && sym->name[name_len] != '@')
> >                                 continue;
> >
> > -                       if (ret >= 0) {
> > +                       curr_bind = GELF_ST_BIND(sym->sym.st_info);
> > +
> > +                       if (ret > 0) {
> 
> used to be >=, why the change?

the original code initialized ret with -ENOENT and did not change
its value till this point, so the condition never triggered for
the first loop, but it did for the new code because we now use ret
earlier in:

	ret = elf_sym_iter_new(&iter, elf, binary_path, sh_types[i], STT_FUNC);

also the check makes sense to me only if ret > 0 .. when we find
a duplicate value for symbol

jirka
Andrii Nakryiko July 11, 2023, 4:59 p.m. UTC | #3
On Tue, Jul 11, 2023 at 2:03 AM Jiri Olsa <olsajiri@gmail.com> wrote:
>
> On Thu, Jul 06, 2023 at 04:24:48PM -0700, Andrii Nakryiko wrote:
>
> SNIP
>
> > > Suggested-by: Andrii Nakryiko <andrii@kernel.org>
> > > Signed-off-by: Jiri Olsa <jolsa@kernel.org>
> > > ---
> > >  tools/lib/bpf/elf.c | 178 +++++++++++++++++++++++++++++---------------
> > >  1 file changed, 117 insertions(+), 61 deletions(-)
> > >
> >
> > A bunch of nits, but overall looks good. Please address nits, and add my ack
> >
> > Acked-by: Andrii Nakryiko <andrii@kernel.org>
> >
> > > diff --git a/tools/lib/bpf/elf.c b/tools/lib/bpf/elf.c
> > > index 74e35071d22e..fcce4bd2478f 100644
> > > --- a/tools/lib/bpf/elf.c
> > > +++ b/tools/lib/bpf/elf.c
> > > @@ -59,6 +59,108 @@ static Elf_Scn *elf_find_next_scn_by_type(Elf *elf, int sh_type, Elf_Scn *scn)
> > >         return NULL;
> > >  }
> > >
> > > +struct elf_sym {
> > > +       const char *name;
> > > +       GElf_Sym sym;
> > > +       GElf_Shdr sh;
> > > +};
> > > +
> >
> > if we want to use elf_sym_iter outside of elf.c, this should be in
> > libbpf_internal.h?
>
> yes eventually, but all the helper functions using elf_sym_iter that
> I added later are in elf.c, so there's no need atm
>
> SNIP
>
> > > +
> > > +static struct elf_sym *elf_sym_iter_next(struct elf_sym_iter *iter)
> > > +{
> > > +       struct elf_sym *ret = &iter->sym;
> > > +       GElf_Sym *sym = &ret->sym;
> > > +       const char *name = NULL;
> > > +       Elf_Scn *sym_scn;
> > > +       size_t idx;
> > > +
> > > +       for (idx = iter->next_sym_idx; idx < iter->nr_syms; idx++) {
> > > +               if (!gelf_getsym(iter->syms, idx, sym))
> > > +                       continue;
> > > +               if (GELF_ST_TYPE(sym->st_info) != iter->st_type)
> > > +                       continue;
> > > +               name = elf_strptr(iter->elf, iter->strtabidx, sym->st_name);
> > > +               if (!name)
> > > +                       continue;
> > > +
> > > +               /* Transform symbol's virtual address (absolute for
> > > +                * binaries and relative for shared libs) into file
> > > +                * offset, which is what kernel is expecting for
> > > +                * uprobe/uretprobe attachment.
> > > +                * See Documentation/trace/uprobetracer.rst for more
> > > +                * details.
> > > +                * This is done by looking up symbol's containing
> > > +                * section's header and using iter's virtual address
> > > +                * (sh_addr) and corresponding file offset (sh_offset)
> > > +                * to transform sym.st_value (virtual address) into
> > > +                * desired final file offset.
> > > +                */
> >
> > this comment is misplaced? we don't do the translation here
>
> right, should be placed at the elf_sym_offset function
>
> >
> > > +               sym_scn = elf_getscn(iter->elf, sym->st_shndx);
> > > +               if (!sym_scn)
> > > +                       continue;
> > > +               if (!gelf_getshdr(sym_scn, &ret->sh))
> > > +                       continue;
> > > +
> > > +               iter->next_sym_idx = idx + 1;
> > > +               ret->name = name;
> > > +               return ret;
> > > +       }
> > > +
> > > +       return NULL;
> > > +}
> > > +
> > > +static unsigned long elf_sym_offset(struct elf_sym *sym)
> > > +{
> > > +       return sym->sym.st_value - sym->sh.sh_addr + sym->sh.sh_offset;
> > > +}
> > > +
> > >  /* Find offset of function name in the provided ELF object. "binary_path" is
> > >   * the path to the ELF binary represented by "elf", and only used for error
> > >   * reporting matters. "name" matches symbol name or name@@LIB for library
> > > @@ -90,64 +192,36 @@ long elf_find_func_offset(Elf *elf, const char *binary_path, const char *name)
> > >          * reported as a warning/error.
> > >          */
> > >         for (i = 0; i < ARRAY_SIZE(sh_types); i++) {
> > > -               size_t nr_syms, strtabidx, idx;
> > > -               Elf_Data *symbols = NULL;
> > > -               Elf_Scn *scn = NULL;
> > > +               struct elf_sym_iter iter;
> > > +               struct elf_sym *sym;
> > >                 int last_bind = -1;
> > > -               const char *sname;
> > > -               GElf_Shdr sh;
> > > +               int curr_bind;
> >
> > OCD nit:
> >
> > $ rg 'curr(_|\b)' | wc -l
> > 8
> > $ rg 'cur(_|\b)' | wc -l
> > 148
> >
> > and those 8 I consider an unfortunate accident ;) let's standardize on
> > using "cur" consistently
>
> ok.. probably easier to make that change than treat ocd ;-)
>

thanks :)


> >
> > >
> > > -               scn = elf_find_next_scn_by_type(elf, sh_types[i], NULL);
> > > -               if (!scn) {
> > > -                       pr_debug("elf: failed to find symbol table ELF sections in '%s'\n",
> > > -                                binary_path);
> > > -                       continue;
> > > -               }
> > > -               if (!gelf_getshdr(scn, &sh))
> > > -                       continue;
> > > -               strtabidx = sh.sh_link;
> > > -               symbols = elf_getdata(scn, 0);
> > > -               if (!symbols) {
> > > -                       pr_warn("elf: failed to get symbols for symtab section in '%s': %s\n",
> > > -                               binary_path, elf_errmsg(-1));
> > > -                       ret = -LIBBPF_ERRNO__FORMAT;
> > > +               ret = elf_sym_iter_new(&iter, elf, binary_path, sh_types[i], STT_FUNC);
> > > +               if (ret) {
> > > +                       if (ret == -ENOENT)
> > > +                               continue;
> > >                         goto out;
> >
> > another styling nit: let's avoid unnecessary nesting of ifs:
> >
> > if (ret == -ENOENT)
> >     continue;
> > if (ret)
> >     goto out;
> >
> > simple and clean
>
> ok
>
> >
> >
> > >                 }
> > > -               nr_syms = symbols->d_size / sh.sh_entsize;
> > > -
> > > -               for (idx = 0; idx < nr_syms; idx++) {
> > > -                       int curr_bind;
> > > -                       GElf_Sym sym;
> > > -                       Elf_Scn *sym_scn;
> > > -                       GElf_Shdr sym_sh;
> > > -
> > > -                       if (!gelf_getsym(symbols, idx, &sym))
> > > -                               continue;
> > > -
> > > -                       if (GELF_ST_TYPE(sym.st_info) != STT_FUNC)
> > > -                               continue;
> > > -
> > > -                       sname = elf_strptr(elf, strtabidx, sym.st_name);
> > > -                       if (!sname)
> > > -                               continue;
> > > -
> > > -                       curr_bind = GELF_ST_BIND(sym.st_info);
> > >
> > > +               while ((sym = elf_sym_iter_next(&iter))) {
> > >                         /* User can specify func, func@@LIB or func@@LIB_VERSION. */
> > > -                       if (strncmp(sname, name, name_len) != 0)
> > > +                       if (strncmp(sym->name, name, name_len) != 0)
> > >                                 continue;
> > >                         /* ...but we don't want a search for "foo" to match 'foo2" also, so any
> > >                          * additional characters in sname should be of the form "@@LIB".
> > >                          */
> > > -                       if (!is_name_qualified && sname[name_len] != '\0' && sname[name_len] != '@')
> > > +                       if (!is_name_qualified && sym->name[name_len] != '\0' && sym->name[name_len] != '@')
> > >                                 continue;
> > >
> > > -                       if (ret >= 0) {
> > > +                       curr_bind = GELF_ST_BIND(sym->sym.st_info);
> > > +
> > > +                       if (ret > 0) {
> >
> > used to be >=, why the change?
>
> the original code initialized ret with -ENOENT and did not change
> its value till this point, so the condition never triggered for
> the first loop, but it did for the new code because we now use ret
> earlier in:
>
>         ret = elf_sym_iter_new(&iter, elf, binary_path, sh_types[i], STT_FUNC);
>
> also the check makes sense to me only if ret > 0 .. when we find
> a duplicate value for symbol

there is a subtle difference if we have some unresolved func with addr
0 or something. But you are right, it's probably not very sensical, we
can keep it as > 0.

>
> jirka
diff mbox series

Patch

diff --git a/tools/lib/bpf/elf.c b/tools/lib/bpf/elf.c
index 74e35071d22e..fcce4bd2478f 100644
--- a/tools/lib/bpf/elf.c
+++ b/tools/lib/bpf/elf.c
@@ -59,6 +59,108 @@  static Elf_Scn *elf_find_next_scn_by_type(Elf *elf, int sh_type, Elf_Scn *scn)
 	return NULL;
 }
 
+struct elf_sym {
+	const char *name;
+	GElf_Sym sym;
+	GElf_Shdr sh;
+};
+
+struct elf_sym_iter {
+	Elf *elf;
+	Elf_Data *syms;
+	size_t nr_syms;
+	size_t strtabidx;
+	size_t next_sym_idx;
+	struct elf_sym sym;
+	int st_type;
+};
+
+static int elf_sym_iter_new(struct elf_sym_iter *iter,
+			    Elf *elf, const char *binary_path,
+			    int sh_type, int st_type)
+{
+	Elf_Scn *scn = NULL;
+	GElf_Ehdr ehdr;
+	GElf_Shdr sh;
+
+	memset(iter, 0, sizeof(*iter));
+
+	if (!gelf_getehdr(elf, &ehdr)) {
+		pr_warn("elf: failed to get ehdr from %s: %s\n", binary_path, elf_errmsg(-1));
+		return -EINVAL;
+	}
+
+	scn = elf_find_next_scn_by_type(elf, sh_type, NULL);
+	if (!scn) {
+		pr_debug("elf: failed to find symbol table ELF sections in '%s'\n",
+			 binary_path);
+		return -ENOENT;
+	}
+
+	if (!gelf_getshdr(scn, &sh))
+		return -EINVAL;
+
+	iter->strtabidx = sh.sh_link;
+	iter->syms = elf_getdata(scn, 0);
+	if (!iter->syms) {
+		pr_warn("elf: failed to get symbols for symtab section in '%s': %s\n",
+			binary_path, elf_errmsg(-1));
+		return -EINVAL;
+	}
+	iter->nr_syms = iter->syms->d_size / sh.sh_entsize;
+	iter->elf = elf;
+	iter->st_type = st_type;
+	return 0;
+}
+
+static struct elf_sym *elf_sym_iter_next(struct elf_sym_iter *iter)
+{
+	struct elf_sym *ret = &iter->sym;
+	GElf_Sym *sym = &ret->sym;
+	const char *name = NULL;
+	Elf_Scn *sym_scn;
+	size_t idx;
+
+	for (idx = iter->next_sym_idx; idx < iter->nr_syms; idx++) {
+		if (!gelf_getsym(iter->syms, idx, sym))
+			continue;
+		if (GELF_ST_TYPE(sym->st_info) != iter->st_type)
+			continue;
+		name = elf_strptr(iter->elf, iter->strtabidx, sym->st_name);
+		if (!name)
+			continue;
+
+		/* Transform symbol's virtual address (absolute for
+		 * binaries and relative for shared libs) into file
+		 * offset, which is what kernel is expecting for
+		 * uprobe/uretprobe attachment.
+		 * See Documentation/trace/uprobetracer.rst for more
+		 * details.
+		 * This is done by looking up symbol's containing
+		 * section's header and using iter's virtual address
+		 * (sh_addr) and corresponding file offset (sh_offset)
+		 * to transform sym.st_value (virtual address) into
+		 * desired final file offset.
+		 */
+		sym_scn = elf_getscn(iter->elf, sym->st_shndx);
+		if (!sym_scn)
+			continue;
+		if (!gelf_getshdr(sym_scn, &ret->sh))
+			continue;
+
+		iter->next_sym_idx = idx + 1;
+		ret->name = name;
+		return ret;
+	}
+
+	return NULL;
+}
+
+static unsigned long elf_sym_offset(struct elf_sym *sym)
+{
+	return sym->sym.st_value - sym->sh.sh_addr + sym->sh.sh_offset;
+}
+
 /* Find offset of function name in the provided ELF object. "binary_path" is
  * the path to the ELF binary represented by "elf", and only used for error
  * reporting matters. "name" matches symbol name or name@@LIB for library
@@ -90,64 +192,36 @@  long elf_find_func_offset(Elf *elf, const char *binary_path, const char *name)
 	 * reported as a warning/error.
 	 */
 	for (i = 0; i < ARRAY_SIZE(sh_types); i++) {
-		size_t nr_syms, strtabidx, idx;
-		Elf_Data *symbols = NULL;
-		Elf_Scn *scn = NULL;
+		struct elf_sym_iter iter;
+		struct elf_sym *sym;
 		int last_bind = -1;
-		const char *sname;
-		GElf_Shdr sh;
+		int curr_bind;
 
-		scn = elf_find_next_scn_by_type(elf, sh_types[i], NULL);
-		if (!scn) {
-			pr_debug("elf: failed to find symbol table ELF sections in '%s'\n",
-				 binary_path);
-			continue;
-		}
-		if (!gelf_getshdr(scn, &sh))
-			continue;
-		strtabidx = sh.sh_link;
-		symbols = elf_getdata(scn, 0);
-		if (!symbols) {
-			pr_warn("elf: failed to get symbols for symtab section in '%s': %s\n",
-				binary_path, elf_errmsg(-1));
-			ret = -LIBBPF_ERRNO__FORMAT;
+		ret = elf_sym_iter_new(&iter, elf, binary_path, sh_types[i], STT_FUNC);
+		if (ret) {
+			if (ret == -ENOENT)
+				continue;
 			goto out;
 		}
-		nr_syms = symbols->d_size / sh.sh_entsize;
-
-		for (idx = 0; idx < nr_syms; idx++) {
-			int curr_bind;
-			GElf_Sym sym;
-			Elf_Scn *sym_scn;
-			GElf_Shdr sym_sh;
-
-			if (!gelf_getsym(symbols, idx, &sym))
-				continue;
-
-			if (GELF_ST_TYPE(sym.st_info) != STT_FUNC)
-				continue;
-
-			sname = elf_strptr(elf, strtabidx, sym.st_name);
-			if (!sname)
-				continue;
-
-			curr_bind = GELF_ST_BIND(sym.st_info);
 
+		while ((sym = elf_sym_iter_next(&iter))) {
 			/* User can specify func, func@@LIB or func@@LIB_VERSION. */
-			if (strncmp(sname, name, name_len) != 0)
+			if (strncmp(sym->name, name, name_len) != 0)
 				continue;
 			/* ...but we don't want a search for "foo" to match 'foo2" also, so any
 			 * additional characters in sname should be of the form "@@LIB".
 			 */
-			if (!is_name_qualified && sname[name_len] != '\0' && sname[name_len] != '@')
+			if (!is_name_qualified && sym->name[name_len] != '\0' && sym->name[name_len] != '@')
 				continue;
 
-			if (ret >= 0) {
+			curr_bind = GELF_ST_BIND(sym->sym.st_info);
+
+			if (ret > 0) {
 				/* handle multiple matches */
 				if (last_bind != STB_WEAK && curr_bind != STB_WEAK) {
 					/* Only accept one non-weak bind. */
 					pr_warn("elf: ambiguous match for '%s', '%s' in '%s'\n",
-						sname, name, binary_path);
+						sym->name, name, binary_path);
 					ret = -LIBBPF_ERRNO__FORMAT;
 					goto out;
 				} else if (curr_bind == STB_WEAK) {
@@ -158,25 +232,7 @@  long elf_find_func_offset(Elf *elf, const char *binary_path, const char *name)
 				}
 			}
 
-			/* Transform symbol's virtual address (absolute for
-			 * binaries and relative for shared libs) into file
-			 * offset, which is what kernel is expecting for
-			 * uprobe/uretprobe attachment.
-			 * See Documentation/trace/uprobetracer.rst for more
-			 * details.
-			 * This is done by looking up symbol's containing
-			 * section's header and using it's virtual address
-			 * (sh_addr) and corresponding file offset (sh_offset)
-			 * to transform sym.st_value (virtual address) into
-			 * desired final file offset.
-			 */
-			sym_scn = elf_getscn(elf, sym.st_shndx);
-			if (!sym_scn)
-				continue;
-			if (!gelf_getshdr(sym_scn, &sym_sh))
-				continue;
-
-			ret = sym.st_value - sym_sh.sh_addr + sym_sh.sh_offset;
+			ret = elf_sym_offset(sym);
 			last_bind = curr_bind;
 		}
 		if (ret > 0)