diff mbox series

[14/19] libbpf: Add btf__find_by_pattern_kind function

Message ID 20210605111034.1810858-15-jolsa@kernel.org (mailing list archive)
State Changes Requested
Delegated to: BPF
Headers show
Series x86/ftrace/bpf: Add batch support for direct/tracing attach | expand

Checks

Context Check Description
netdev/tree_selection success Not a local patch

Commit Message

Jiri Olsa June 5, 2021, 11:10 a.m. UTC
Adding btf__find_by_pattern_kind function that returns
array of BTF ids for given function name pattern.

Using libc's regex.h support for that.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
---
 tools/lib/bpf/btf.c | 68 +++++++++++++++++++++++++++++++++++++++++++++
 tools/lib/bpf/btf.h |  3 ++
 2 files changed, 71 insertions(+)

Comments

Andrii Nakryiko June 9, 2021, 5:29 a.m. UTC | #1
On Sat, Jun 5, 2021 at 4:14 AM Jiri Olsa <jolsa@kernel.org> wrote:
>
> Adding btf__find_by_pattern_kind function that returns
> array of BTF ids for given function name pattern.
>
> Using libc's regex.h support for that.
>
> Signed-off-by: Jiri Olsa <jolsa@kernel.org>
> ---
>  tools/lib/bpf/btf.c | 68 +++++++++++++++++++++++++++++++++++++++++++++
>  tools/lib/bpf/btf.h |  3 ++
>  2 files changed, 71 insertions(+)
>
> diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
> index b46760b93bb4..421dd6c1e44a 100644
> --- a/tools/lib/bpf/btf.c
> +++ b/tools/lib/bpf/btf.c
> @@ -1,6 +1,7 @@
>  // SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
>  /* Copyright (c) 2018 Facebook */
>
> +#define _GNU_SOURCE
>  #include <byteswap.h>
>  #include <endian.h>
>  #include <stdio.h>
> @@ -16,6 +17,7 @@
>  #include <linux/err.h>
>  #include <linux/btf.h>
>  #include <gelf.h>
> +#include <regex.h>
>  #include "btf.h"
>  #include "bpf.h"
>  #include "libbpf.h"
> @@ -711,6 +713,72 @@ __s32 btf__find_by_name_kind(const struct btf *btf, const char *type_name,
>         return libbpf_err(-ENOENT);
>  }
>
> +static bool is_wildcard(char c)
> +{
> +       static const char *wildchars = "*?[|";
> +
> +       return strchr(wildchars, c);
> +}
> +
> +int btf__find_by_pattern_kind(const struct btf *btf,
> +                             const char *type_pattern, __u32 kind,
> +                             __s32 **__ids)
> +{
> +       __u32 i, nr_types = btf__get_nr_types(btf);
> +       __s32 *ids = NULL;
> +       int cnt = 0, alloc = 0, ret;
> +       regex_t regex;
> +       char *pattern;
> +
> +       if (kind == BTF_KIND_UNKN || !strcmp(type_pattern, "void"))
> +               return 0;
> +
> +       /* When the pattern does not start with wildcard, treat it as
> +        * if we'd want to match it from the beginning of the string.
> +        */

This assumption is absolutely atrocious. If we say it's regexp, then
it has to always be regexp, not something based on some random
heuristic based on the first character.

Taking a step back, though. Do we really need to provide this API? Why
applications can't implement it on their own, given regexp
functionality is provided by libc. Which I didn't know, actually, so
that's pretty nice, assuming that it's also available in more minimal
implementations like musl.

> +       asprintf(&pattern, "%s%s",
> +                is_wildcard(type_pattern[0]) ? "^" : "",
> +                type_pattern);
> +
> +       ret = regcomp(&regex, pattern, REG_EXTENDED);
> +       if (ret) {
> +               pr_warn("failed to compile regex\n");
> +               free(pattern);
> +               return -EINVAL;
> +       }
> +
> +       free(pattern);
> +
> +       for (i = 1; i <= nr_types; i++) {
> +               const struct btf_type *t = btf__type_by_id(btf, i);
> +               const char *name;
> +               __s32 *p;
> +
> +               if (btf_kind(t) != kind)
> +                       continue;
> +               name = btf__name_by_offset(btf, t->name_off);
> +               if (name && regexec(&regex, name, 0, NULL, 0))
> +                       continue;
> +               if (cnt == alloc) {
> +                       alloc = max(100, alloc * 3 / 2);
> +                       p = realloc(ids, alloc * sizeof(__u32));

this memory allocation and re-allocation on behalf of users is another
argument against this API

> +                       if (!p) {
> +                               free(ids);
> +                               regfree(&regex);
> +                               return -ENOMEM;
> +                       }
> +                       ids = p;
> +               }
> +
> +               ids[cnt] = i;
> +               cnt++;
> +       }
> +
> +       regfree(&regex);
> +       *__ids = ids;
> +       return cnt ?: -ENOENT;
> +}
> +
>  static bool btf_is_modifiable(const struct btf *btf)
>  {
>         return (void *)btf->hdr != btf->raw_data;
> diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h
> index b54f1c3ebd57..036857aded94 100644
> --- a/tools/lib/bpf/btf.h
> +++ b/tools/lib/bpf/btf.h
> @@ -371,6 +371,9 @@ btf_var_secinfos(const struct btf_type *t)
>         return (struct btf_var_secinfo *)(t + 1);
>  }
>
> +int btf__find_by_pattern_kind(const struct btf *btf,
> +                             const char *type_pattern, __u32 kind,
> +                             __s32 **__ids);
>  #ifdef __cplusplus
>  } /* extern "C" */
>  #endif
> --
> 2.31.1
>
Jiri Olsa June 9, 2021, 1:59 p.m. UTC | #2
On Tue, Jun 08, 2021 at 10:29:19PM -0700, Andrii Nakryiko wrote:
> On Sat, Jun 5, 2021 at 4:14 AM Jiri Olsa <jolsa@kernel.org> wrote:
> >
> > Adding btf__find_by_pattern_kind function that returns
> > array of BTF ids for given function name pattern.
> >
> > Using libc's regex.h support for that.
> >
> > Signed-off-by: Jiri Olsa <jolsa@kernel.org>
> > ---
> >  tools/lib/bpf/btf.c | 68 +++++++++++++++++++++++++++++++++++++++++++++
> >  tools/lib/bpf/btf.h |  3 ++
> >  2 files changed, 71 insertions(+)
> >
> > diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
> > index b46760b93bb4..421dd6c1e44a 100644
> > --- a/tools/lib/bpf/btf.c
> > +++ b/tools/lib/bpf/btf.c
> > @@ -1,6 +1,7 @@
> >  // SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
> >  /* Copyright (c) 2018 Facebook */
> >
> > +#define _GNU_SOURCE
> >  #include <byteswap.h>
> >  #include <endian.h>
> >  #include <stdio.h>
> > @@ -16,6 +17,7 @@
> >  #include <linux/err.h>
> >  #include <linux/btf.h>
> >  #include <gelf.h>
> > +#include <regex.h>
> >  #include "btf.h"
> >  #include "bpf.h"
> >  #include "libbpf.h"
> > @@ -711,6 +713,72 @@ __s32 btf__find_by_name_kind(const struct btf *btf, const char *type_name,
> >         return libbpf_err(-ENOENT);
> >  }
> >
> > +static bool is_wildcard(char c)
> > +{
> > +       static const char *wildchars = "*?[|";
> > +
> > +       return strchr(wildchars, c);
> > +}
> > +
> > +int btf__find_by_pattern_kind(const struct btf *btf,
> > +                             const char *type_pattern, __u32 kind,
> > +                             __s32 **__ids)
> > +{
> > +       __u32 i, nr_types = btf__get_nr_types(btf);
> > +       __s32 *ids = NULL;
> > +       int cnt = 0, alloc = 0, ret;
> > +       regex_t regex;
> > +       char *pattern;
> > +
> > +       if (kind == BTF_KIND_UNKN || !strcmp(type_pattern, "void"))
> > +               return 0;
> > +
> > +       /* When the pattern does not start with wildcard, treat it as
> > +        * if we'd want to match it from the beginning of the string.
> > +        */
> 
> This assumption is absolutely atrocious. If we say it's regexp, then
> it has to always be regexp, not something based on some random
> heuristic based on the first character.
> 
> Taking a step back, though. Do we really need to provide this API? Why
> applications can't implement it on their own, given regexp
> functionality is provided by libc. Which I didn't know, actually, so
> that's pretty nice, assuming that it's also available in more minimal
> implementations like musl.
> 

so the only purpose for this function is to support wildcards in
tests like:

  SEC("fentry.multi/bpf_fentry_test*")

so the generic skeleton attach function can work.. but that can be
removed and the test programs can be attached manually through some
other attach function that will have list of functions as argument

jirka

> > +       asprintf(&pattern, "%s%s",
> > +                is_wildcard(type_pattern[0]) ? "^" : "",
> > +                type_pattern);
> > +
> > +       ret = regcomp(&regex, pattern, REG_EXTENDED);
> > +       if (ret) {
> > +               pr_warn("failed to compile regex\n");
> > +               free(pattern);
> > +               return -EINVAL;
> > +       }
> > +
> > +       free(pattern);
> > +
> > +       for (i = 1; i <= nr_types; i++) {
> > +               const struct btf_type *t = btf__type_by_id(btf, i);
> > +               const char *name;
> > +               __s32 *p;
> > +
> > +               if (btf_kind(t) != kind)
> > +                       continue;
> > +               name = btf__name_by_offset(btf, t->name_off);
> > +               if (name && regexec(&regex, name, 0, NULL, 0))
> > +                       continue;
> > +               if (cnt == alloc) {
> > +                       alloc = max(100, alloc * 3 / 2);
> > +                       p = realloc(ids, alloc * sizeof(__u32));
> 
> this memory allocation and re-allocation on behalf of users is another
> argument against this API
> 
> > +                       if (!p) {
> > +                               free(ids);
> > +                               regfree(&regex);
> > +                               return -ENOMEM;
> > +                       }
> > +                       ids = p;
> > +               }
> > +
> > +               ids[cnt] = i;
> > +               cnt++;
> > +       }
> > +
> > +       regfree(&regex);
> > +       *__ids = ids;
> > +       return cnt ?: -ENOENT;
> > +}
> > +
> >  static bool btf_is_modifiable(const struct btf *btf)
> >  {
> >         return (void *)btf->hdr != btf->raw_data;
> > diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h
> > index b54f1c3ebd57..036857aded94 100644
> > --- a/tools/lib/bpf/btf.h
> > +++ b/tools/lib/bpf/btf.h
> > @@ -371,6 +371,9 @@ btf_var_secinfos(const struct btf_type *t)
> >         return (struct btf_var_secinfo *)(t + 1);
> >  }
> >
> > +int btf__find_by_pattern_kind(const struct btf *btf,
> > +                             const char *type_pattern, __u32 kind,
> > +                             __s32 **__ids);
> >  #ifdef __cplusplus
> >  } /* extern "C" */
> >  #endif
> > --
> > 2.31.1
> >
>
Jiri Olsa June 9, 2021, 2:19 p.m. UTC | #3
On Wed, Jun 09, 2021 at 03:59:47PM +0200, Jiri Olsa wrote:

SNIP

> > > +
> > > +       /* When the pattern does not start with wildcard, treat it as
> > > +        * if we'd want to match it from the beginning of the string.
> > > +        */
> > 
> > This assumption is absolutely atrocious. If we say it's regexp, then
> > it has to always be regexp, not something based on some random
> > heuristic based on the first character.
> > 
> > Taking a step back, though. Do we really need to provide this API? Why
> > applications can't implement it on their own, given regexp
> > functionality is provided by libc. Which I didn't know, actually, so
> > that's pretty nice, assuming that it's also available in more minimal
> > implementations like musl.
> > 
> 
> so the only purpose for this function is to support wildcards in
> tests like:
> 
>   SEC("fentry.multi/bpf_fentry_test*")
> 
> so the generic skeleton attach function can work.. but that can be
> removed and the test programs can be attached manually through some
> other attach function that will have list of functions as argument

nah, no other attach function is needed, we have that support now in
link_create ready to use ;-) sry

jirka
diff mbox series

Patch

diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index b46760b93bb4..421dd6c1e44a 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -1,6 +1,7 @@ 
 // SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
 /* Copyright (c) 2018 Facebook */
 
+#define _GNU_SOURCE
 #include <byteswap.h>
 #include <endian.h>
 #include <stdio.h>
@@ -16,6 +17,7 @@ 
 #include <linux/err.h>
 #include <linux/btf.h>
 #include <gelf.h>
+#include <regex.h>
 #include "btf.h"
 #include "bpf.h"
 #include "libbpf.h"
@@ -711,6 +713,72 @@  __s32 btf__find_by_name_kind(const struct btf *btf, const char *type_name,
 	return libbpf_err(-ENOENT);
 }
 
+static bool is_wildcard(char c)
+{
+	static const char *wildchars = "*?[|";
+
+	return strchr(wildchars, c);
+}
+
+int btf__find_by_pattern_kind(const struct btf *btf,
+			      const char *type_pattern, __u32 kind,
+			      __s32 **__ids)
+{
+	__u32 i, nr_types = btf__get_nr_types(btf);
+	__s32 *ids = NULL;
+	int cnt = 0, alloc = 0, ret;
+	regex_t regex;
+	char *pattern;
+
+	if (kind == BTF_KIND_UNKN || !strcmp(type_pattern, "void"))
+		return 0;
+
+	/* When the pattern does not start with wildcard, treat it as
+	 * if we'd want to match it from the beginning of the string.
+	 */
+	asprintf(&pattern, "%s%s",
+		 is_wildcard(type_pattern[0]) ? "^" : "",
+		 type_pattern);
+
+	ret = regcomp(&regex, pattern, REG_EXTENDED);
+	if (ret) {
+		pr_warn("failed to compile regex\n");
+		free(pattern);
+		return -EINVAL;
+	}
+
+	free(pattern);
+
+	for (i = 1; i <= nr_types; i++) {
+		const struct btf_type *t = btf__type_by_id(btf, i);
+		const char *name;
+		__s32 *p;
+
+		if (btf_kind(t) != kind)
+			continue;
+		name = btf__name_by_offset(btf, t->name_off);
+		if (name && regexec(&regex, name, 0, NULL, 0))
+			continue;
+		if (cnt == alloc) {
+			alloc = max(100, alloc * 3 / 2);
+			p = realloc(ids, alloc * sizeof(__u32));
+			if (!p) {
+				free(ids);
+				regfree(&regex);
+				return -ENOMEM;
+			}
+			ids = p;
+		}
+
+		ids[cnt] = i;
+		cnt++;
+	}
+
+	regfree(&regex);
+	*__ids = ids;
+	return cnt ?: -ENOENT;
+}
+
 static bool btf_is_modifiable(const struct btf *btf)
 {
 	return (void *)btf->hdr != btf->raw_data;
diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h
index b54f1c3ebd57..036857aded94 100644
--- a/tools/lib/bpf/btf.h
+++ b/tools/lib/bpf/btf.h
@@ -371,6 +371,9 @@  btf_var_secinfos(const struct btf_type *t)
 	return (struct btf_var_secinfo *)(t + 1);
 }
 
+int btf__find_by_pattern_kind(const struct btf *btf,
+			      const char *type_pattern, __u32 kind,
+			      __s32 **__ids);
 #ifdef __cplusplus
 } /* extern "C" */
 #endif