Message ID | 20230904022444.1695820-3-hengqi.chen@gmail.com (mailing list archive) |
---|---|
State | Superseded |
Delegated to: | BPF |
Headers | show |
Series | libbpf: Support symbol versioning for uprobe | expand |
On Mon, Sep 04, 2023 at 02:24:44AM +0000, Hengqi Chen wrote: > Currently, we allow users to specify symbol name for uprobe in a qualified > form, i.e. func@@LIB or func@@LIB_VERSION. For dynamic symbols, their version > info is actually stored in .gnu.version and .gnu.version_d sections of the ELF > objects. So dynamic symbols and the qualified name won't match. Add support for > symbol versioning ([0]) so that we can handle the above case. > > [0]: https://refspecs.linuxfoundation.org/LSB_3.0.0/LSB-PDA/LSB-PDA.junk/symversion.html > > Signed-off-by: Hengqi Chen <hengqi.chen@gmail.com> > --- > tools/lib/bpf/elf.c | 98 +++++++++++++++++++++++++++++++++++++++++---- > 1 file changed, 90 insertions(+), 8 deletions(-) > > diff --git a/tools/lib/bpf/elf.c b/tools/lib/bpf/elf.c > index 5c9e588b17da..ed3d9880eaa4 100644 > --- a/tools/lib/bpf/elf.c > +++ b/tools/lib/bpf/elf.c > @@ -9,6 +9,7 @@ > #include "str_error.h" > > #define STRERR_BUFSIZE 128 > +#define HIDDEN_BIT 16 hum, the docs says it's bit 15 ? SNIP > @@ -138,12 +155,57 @@ static struct elf_sym *elf_sym_iter_next(struct elf_sym_iter *iter) > > iter->next_sym_idx = idx + 1; > ret->name = name; > + ret->ver = 0; > + ret->hidden = false; > + > + if (iter->versyms) { > + if (!gelf_getversym(iter->versyms, idx, &versym)) > + continue; > + ret->ver = versym & ~(1 << HIDDEN_BIT); > + ret->hidden = versym & (1 << HIDDEN_BIT); > + } > return ret; > } > > return NULL; > } > > +static const char *elf_get_vername(Elf *elf, int ver) > +{ > + GElf_Verdaux verdaux; > + GElf_Verdef verdef; > + Elf_Data *verdefs; > + size_t strtabidx; > + GElf_Shdr sh; > + Elf_Scn *scn; > + int offset; > + > + scn = elf_find_next_scn_by_type(elf, SHT_GNU_verdef, NULL); > + if (!scn) > + return NULL; > + if (!gelf_getshdr(scn, &sh)) > + return NULL; > + strtabidx = sh.sh_link; > + verdefs = elf_getdata(scn, 0); should we read verdefs same as you did for versyms in elf_sym_iter_new, so you don't need to read that every time? > + > + offset = 0; > + while (gelf_getverdef(verdefs, offset, &verdef)) { > + if (verdef.vd_ndx != ver) { > + if (!verdef.vd_next) > + break; > + > + offset += verdef.vd_next; > + continue; > + } > + > + if (!gelf_getverdaux(verdefs, offset + verdef.vd_aux, &verdaux)) > + break; > + > + return elf_strptr(elf, strtabidx, verdaux.vda_name); > + > + } > + return NULL; > +} > > /* Transform symbol's virtual address (absolute for binaries and relative > * for shared libs) into file offset, which is what kernel is expecting > @@ -191,6 +253,9 @@ long elf_find_func_offset(Elf *elf, const char *binary_path, const char *name) > for (i = 0; i < ARRAY_SIZE(sh_types); i++) { > struct elf_sym_iter iter; > struct elf_sym *sym; > + size_t sname_len; > + char sname[256]; is this enough? not sure if there's symbol max size, maybe we could also use asprintf below > + const char *ver; > int last_bind = -1; > int cur_bind; > > @@ -201,14 +266,31 @@ long elf_find_func_offset(Elf *elf, const char *binary_path, const char *name) > goto out; > > while ((sym = elf_sym_iter_next(&iter))) { > - /* User can specify func, func@@LIB or func@@LIB_VERSION. */ > - if (strncmp(sym->name, name, name_len) != 0) > - continue; > - /* ...but we don't want a search for "foo" to match 'foo2" also, so any > - * additional characters in sname should be of the form "@@LIB". > - */ > - if (!is_name_qualified && sym->name[name_len] != '\0' && sym->name[name_len] != '@') > - continue; > + if (sh_types[i] == SHT_DYNSYM && is_name_qualified) { > + if (sym->hidden) > + continue; > + > + sname_len = strlen(sym->name); > + if (strncmp(sym->name, name, sname_len) != 0) > + continue; > + > + ver = elf_get_vername(elf, sym->ver); > + if (!ver) > + continue; > + > + snprintf(sname, sizeof(sname), "%s@@%s", sym->name, ver); > + if (strncmp(sname, name, name_len) != 0) > + continue; > + } else { > + /* User can specify func, func@@LIB or func@@LIB_VERSION. */ > + if (strncmp(sym->name, name, name_len) != 0) > + continue; > + /* ...but we don't want a search for "foo" to match 'foo2" also, so any > + * additional characters in sname should be of the form "@@LIB". > + */ > + if (!is_name_qualified && sym->name[name_len] != '\0' && sym->name[name_len] != '@') > + continue; hum, I never checked the versioned symbols, but it looks like we don't get symbols in 'symbol@version' form, so I wonder how that worked before would be great to have a selftest for that also I had to add change below to test that through prog's section, I think we need allow '@' in there thanks, jirka --- diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 96ff1aa4bf6a..a30f3c48f891 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -11512,8 +11512,11 @@ static int attach_uprobe(const struct bpf_program *prog, long cookie, struct bpf *link = NULL; - n = sscanf(prog->sec_name, "%m[^/]/%m[^:]:%m[a-zA-Z0-9_.]+%li", + n = sscanf(prog->sec_name, "%m[^/]/%m[^:]:%m[a-zA-Z0-9_.@]+%li", &probe_type, &binary_path, &func_name, &offset);
Hi, Jiri: On 9/4/23 22:10, Jiri Olsa wrote: > On Mon, Sep 04, 2023 at 02:24:44AM +0000, Hengqi Chen wrote: >> Currently, we allow users to specify symbol name for uprobe in a qualified >> form, i.e. func@@LIB or func@@LIB_VERSION. For dynamic symbols, their version >> info is actually stored in .gnu.version and .gnu.version_d sections of the ELF >> objects. So dynamic symbols and the qualified name won't match. Add support for >> symbol versioning ([0]) so that we can handle the above case. >> >> [0]: https://refspecs.linuxfoundation.org/LSB_3.0.0/LSB-PDA/LSB-PDA.junk/symversion.html >> >> Signed-off-by: Hengqi Chen <hengqi.chen@gmail.com> >> --- >> tools/lib/bpf/elf.c | 98 +++++++++++++++++++++++++++++++++++++++++---- >> 1 file changed, 90 insertions(+), 8 deletions(-) >> >> diff --git a/tools/lib/bpf/elf.c b/tools/lib/bpf/elf.c >> index 5c9e588b17da..ed3d9880eaa4 100644 >> --- a/tools/lib/bpf/elf.c >> +++ b/tools/lib/bpf/elf.c >> @@ -9,6 +9,7 @@ >> #include "str_error.h" >> >> #define STRERR_BUFSIZE 128 >> +#define HIDDEN_BIT 16 > > hum, the docs says it's bit 15 ? Ahh, right, should be 15. > > SNIP > >> @@ -138,12 +155,57 @@ static struct elf_sym *elf_sym_iter_next(struct elf_sym_iter *iter) >> >> iter->next_sym_idx = idx + 1; >> ret->name = name; >> + ret->ver = 0; >> + ret->hidden = false; >> + >> + if (iter->versyms) { >> + if (!gelf_getversym(iter->versyms, idx, &versym)) >> + continue; >> + ret->ver = versym & ~(1 << HIDDEN_BIT); >> + ret->hidden = versym & (1 << HIDDEN_BIT); >> + } >> return ret; >> } >> >> return NULL; >> } >> >> +static const char *elf_get_vername(Elf *elf, int ver) >> +{ >> + GElf_Verdaux verdaux; >> + GElf_Verdef verdef; >> + Elf_Data *verdefs; >> + size_t strtabidx; >> + GElf_Shdr sh; >> + Elf_Scn *scn; >> + int offset; >> + >> + scn = elf_find_next_scn_by_type(elf, SHT_GNU_verdef, NULL); >> + if (!scn) >> + return NULL; >> + if (!gelf_getshdr(scn, &sh)) >> + return NULL; >> + strtabidx = sh.sh_link; >> + verdefs = elf_getdata(scn, 0); > > should we read verdefs same as you did for versyms in elf_sym_iter_new, > so you don't need to read that every time? > It looks weird to retrieve version from elf_sym_iter, and we should not reach here too many times. >> + >> + offset = 0; >> + while (gelf_getverdef(verdefs, offset, &verdef)) { >> + if (verdef.vd_ndx != ver) { >> + if (!verdef.vd_next) >> + break; >> + >> + offset += verdef.vd_next; >> + continue; >> + } >> + >> + if (!gelf_getverdaux(verdefs, offset + verdef.vd_aux, &verdaux)) >> + break; >> + >> + return elf_strptr(elf, strtabidx, verdaux.vda_name); >> + >> + } >> + return NULL; >> +} >> >> /* Transform symbol's virtual address (absolute for binaries and relative >> * for shared libs) into file offset, which is what kernel is expecting >> @@ -191,6 +253,9 @@ long elf_find_func_offset(Elf *elf, const char *binary_path, const char *name) >> for (i = 0; i < ARRAY_SIZE(sh_types); i++) { >> struct elf_sym_iter iter; >> struct elf_sym *sym; >> + size_t sname_len; >> + char sname[256]; > > is this enough? not sure if there's symbol max size, > maybe we could also use asprintf below > OK, will use asprintf instead. >> + const char *ver; >> int last_bind = -1; >> int cur_bind; >> >> @@ -201,14 +266,31 @@ long elf_find_func_offset(Elf *elf, const char *binary_path, const char *name) >> goto out; >> >> while ((sym = elf_sym_iter_next(&iter))) { >> - /* User can specify func, func@@LIB or func@@LIB_VERSION. */ >> - if (strncmp(sym->name, name, name_len) != 0) >> - continue; >> - /* ...but we don't want a search for "foo" to match 'foo2" also, so any >> - * additional characters in sname should be of the form "@@LIB". >> - */ >> - if (!is_name_qualified && sym->name[name_len] != '\0' && sym->name[name_len] != '@') >> - continue; >> + if (sh_types[i] == SHT_DYNSYM && is_name_qualified) { >> + if (sym->hidden) >> + continue; >> + >> + sname_len = strlen(sym->name); >> + if (strncmp(sym->name, name, sname_len) != 0) >> + continue; >> + >> + ver = elf_get_vername(elf, sym->ver); >> + if (!ver) >> + continue; >> + >> + snprintf(sname, sizeof(sname), "%s@@%s", sym->name, ver); >> + if (strncmp(sname, name, name_len) != 0) >> + continue; >> + } else { >> + /* User can specify func, func@@LIB or func@@LIB_VERSION. */ >> + if (strncmp(sym->name, name, name_len) != 0) >> + continue; >> + /* ...but we don't want a search for "foo" to match 'foo2" also, so any >> + * additional characters in sname should be of the form "@@LIB". >> + */ >> + if (!is_name_qualified && sym->name[name_len] != '\0' && sym->name[name_len] != '@') >> + continue; > > hum, I never checked the versioned symbols, but it looks like we > don't get symbols in 'symbol@version' form, so I wonder how that > worked before > > would be great to have a selftest for that > > also I had to add change below to test that through prog's section, > I think we need allow '@' in there > Let me try. > thanks, > jirka > > > --- > diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c > index 96ff1aa4bf6a..a30f3c48f891 100644 > --- a/tools/lib/bpf/libbpf.c > +++ b/tools/lib/bpf/libbpf.c > @@ -11512,8 +11512,11 @@ static int attach_uprobe(const struct bpf_program *prog, long cookie, struct bpf > > *link = NULL; > > - n = sscanf(prog->sec_name, "%m[^/]/%m[^:]:%m[a-zA-Z0-9_.]+%li", > + n = sscanf(prog->sec_name, "%m[^/]/%m[^:]:%m[a-zA-Z0-9_.@]+%li", > &probe_type, &binary_path, &func_name, &offset); --- Hengqi
diff --git a/tools/lib/bpf/elf.c b/tools/lib/bpf/elf.c index 5c9e588b17da..ed3d9880eaa4 100644 --- a/tools/lib/bpf/elf.c +++ b/tools/lib/bpf/elf.c @@ -9,6 +9,7 @@ #include "str_error.h" #define STRERR_BUFSIZE 128 +#define HIDDEN_BIT 16 int elf_open(const char *binary_path, struct elf_fd *elf_fd) { @@ -64,11 +65,14 @@ struct elf_sym { const char *name; GElf_Sym sym; GElf_Shdr sh; + int ver; + bool hidden; }; struct elf_sym_iter { Elf *elf; Elf_Data *syms; + Elf_Data *versyms; size_t nr_syms; size_t strtabidx; size_t next_sym_idx; @@ -111,6 +115,18 @@ static int elf_sym_iter_new(struct elf_sym_iter *iter, iter->nr_syms = iter->syms->d_size / sh.sh_entsize; iter->elf = elf; iter->st_type = st_type; + + /* Version symbol table only meaningful to dynsym only */ + if (sh_type != SHT_DYNSYM) + return 0; + + scn = elf_find_next_scn_by_type(elf, SHT_GNU_versym, NULL); + if (!scn) + return 0; + if (!gelf_getshdr(scn, &sh)) + return -EINVAL; + iter->versyms = elf_getdata(scn, 0); + return 0; } @@ -119,6 +135,7 @@ static struct elf_sym *elf_sym_iter_next(struct elf_sym_iter *iter) struct elf_sym *ret = &iter->sym; GElf_Sym *sym = &ret->sym; const char *name = NULL; + GElf_Versym versym; Elf_Scn *sym_scn; size_t idx; @@ -138,12 +155,57 @@ static struct elf_sym *elf_sym_iter_next(struct elf_sym_iter *iter) iter->next_sym_idx = idx + 1; ret->name = name; + ret->ver = 0; + ret->hidden = false; + + if (iter->versyms) { + if (!gelf_getversym(iter->versyms, idx, &versym)) + continue; + ret->ver = versym & ~(1 << HIDDEN_BIT); + ret->hidden = versym & (1 << HIDDEN_BIT); + } return ret; } return NULL; } +static const char *elf_get_vername(Elf *elf, int ver) +{ + GElf_Verdaux verdaux; + GElf_Verdef verdef; + Elf_Data *verdefs; + size_t strtabidx; + GElf_Shdr sh; + Elf_Scn *scn; + int offset; + + scn = elf_find_next_scn_by_type(elf, SHT_GNU_verdef, NULL); + if (!scn) + return NULL; + if (!gelf_getshdr(scn, &sh)) + return NULL; + strtabidx = sh.sh_link; + verdefs = elf_getdata(scn, 0); + + offset = 0; + while (gelf_getverdef(verdefs, offset, &verdef)) { + if (verdef.vd_ndx != ver) { + if (!verdef.vd_next) + break; + + offset += verdef.vd_next; + continue; + } + + if (!gelf_getverdaux(verdefs, offset + verdef.vd_aux, &verdaux)) + break; + + return elf_strptr(elf, strtabidx, verdaux.vda_name); + + } + return NULL; +} /* Transform symbol's virtual address (absolute for binaries and relative * for shared libs) into file offset, which is what kernel is expecting @@ -191,6 +253,9 @@ long elf_find_func_offset(Elf *elf, const char *binary_path, const char *name) for (i = 0; i < ARRAY_SIZE(sh_types); i++) { struct elf_sym_iter iter; struct elf_sym *sym; + size_t sname_len; + char sname[256]; + const char *ver; int last_bind = -1; int cur_bind; @@ -201,14 +266,31 @@ long elf_find_func_offset(Elf *elf, const char *binary_path, const char *name) goto out; while ((sym = elf_sym_iter_next(&iter))) { - /* User can specify func, func@@LIB or func@@LIB_VERSION. */ - if (strncmp(sym->name, name, name_len) != 0) - continue; - /* ...but we don't want a search for "foo" to match 'foo2" also, so any - * additional characters in sname should be of the form "@@LIB". - */ - if (!is_name_qualified && sym->name[name_len] != '\0' && sym->name[name_len] != '@') - continue; + if (sh_types[i] == SHT_DYNSYM && is_name_qualified) { + if (sym->hidden) + continue; + + sname_len = strlen(sym->name); + if (strncmp(sym->name, name, sname_len) != 0) + continue; + + ver = elf_get_vername(elf, sym->ver); + if (!ver) + continue; + + snprintf(sname, sizeof(sname), "%s@@%s", sym->name, ver); + if (strncmp(sname, name, name_len) != 0) + continue; + } else { + /* User can specify func, func@@LIB or func@@LIB_VERSION. */ + if (strncmp(sym->name, name, name_len) != 0) + continue; + /* ...but we don't want a search for "foo" to match 'foo2" also, so any + * additional characters in sname should be of the form "@@LIB". + */ + if (!is_name_qualified && sym->name[name_len] != '\0' && sym->name[name_len] != '@') + continue; + } cur_bind = GELF_ST_BIND(sym->sym.st_info);
Currently, we allow users to specify symbol name for uprobe in a qualified form, i.e. func@@LIB or func@@LIB_VERSION. For dynamic symbols, their version info is actually stored in .gnu.version and .gnu.version_d sections of the ELF objects. So dynamic symbols and the qualified name won't match. Add support for symbol versioning ([0]) so that we can handle the above case. [0]: https://refspecs.linuxfoundation.org/LSB_3.0.0/LSB-PDA/LSB-PDA.junk/symversion.html Signed-off-by: Hengqi Chen <hengqi.chen@gmail.com> --- tools/lib/bpf/elf.c | 98 +++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 90 insertions(+), 8 deletions(-) -- 2.39.3