diff mbox series

[bpf-next,1/2] libbpf: improve string handling for uprobe name-based attach

Message ID 1649195156-9465-2-git-send-email-alan.maguire@oracle.com (mailing list archive)
State Superseded
Delegated to: BPF
Headers show
Series libbpf: uprobe name-based attach followups | expand

Checks

Context Check Description
bpf/vmtest-bpf-next-VM_Test-1 success Logs for Kernel LATEST on ubuntu-latest + selftests
bpf/vmtest-bpf-next-PR success PR summary
bpf/vmtest-bpf-next-VM_Test-2 success Logs for Kernel LATEST on z15 + selftests
netdev/tree_selection success Clearly marked for bpf-next, async
netdev/fixes_present success Fixes tag not required for -next series
netdev/subject_prefix success Link
netdev/cover_letter success Series has a cover letter
netdev/patch_count success Link
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 0 this patch: 0
netdev/cc_maintainers success CCed 10 of 10 maintainers
netdev/build_clang success Errors and warnings before: 0 this patch: 0
netdev/module_param success Was 0 now: 0
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 0 this patch: 0
netdev/checkpatch warning WARNING: line length of 94 exceeds 80 columns WARNING: line length of 97 exceeds 80 columns
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Alan Maguire April 5, 2022, 9:45 p.m. UTC
For uprobe attach, libraries are identified by matching a ".so"
substring in the binary path.  This matches a lot of patterns that do
not conform to library .so[.version] suffixes, so instead match a ".so"
_suffix_, and if that fails match a ".so." substring for the versioned
library case.

For uprobe auto-attach, the parsing can be simplified for the SEC()
name to a single ssscanf(); the return value of the sscanf can then
be used to distinguish between sections that simply specify
"u[ret]probe" (and thus cannot auto-attach), those that specify
"u[ret]probe/binary_path:function+offset" etc.

Suggested-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alan Maguire <alan.maguire@oracle.com>
---
 tools/lib/bpf/libbpf.c          | 77 ++++++++++++++++-------------------------
 tools/lib/bpf/libbpf_internal.h |  5 +++
 2 files changed, 35 insertions(+), 47 deletions(-)

Comments

Andrii Nakryiko April 6, 2022, 12:06 a.m. UTC | #1
On Tue, Apr 5, 2022 at 2:46 PM Alan Maguire <alan.maguire@oracle.com> wrote:
>
> For uprobe attach, libraries are identified by matching a ".so"
> substring in the binary path.  This matches a lot of patterns that do
> not conform to library .so[.version] suffixes, so instead match a ".so"
> _suffix_, and if that fails match a ".so." substring for the versioned
> library case.
>

You are making two separate changes in one patch, let's split them.

> For uprobe auto-attach, the parsing can be simplified for the SEC()
> name to a single ssscanf(); the return value of the sscanf can then

too many sss :)

> be used to distinguish between sections that simply specify
> "u[ret]probe" (and thus cannot auto-attach), those that specify
> "u[ret]probe/binary_path:function+offset" etc.
>
> Suggested-by: Andrii Nakryiko <andrii@kernel.org>
> Signed-off-by: Alan Maguire <alan.maguire@oracle.com>
> ---
>  tools/lib/bpf/libbpf.c          | 77 ++++++++++++++++-------------------------
>  tools/lib/bpf/libbpf_internal.h |  5 +++
>  2 files changed, 35 insertions(+), 47 deletions(-)
>
> diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
> index 91ce94b..3f23e88 100644
> --- a/tools/lib/bpf/libbpf.c
> +++ b/tools/lib/bpf/libbpf.c
> @@ -10750,7 +10750,7 @@ static int resolve_full_path(const char *file, char *result, size_t result_sz)
>         const char *search_paths[3] = {};
>         int i;
>
> -       if (strstr(file, ".so")) {
> +       if (str_has_sfx(file, ".so") || strstr(file, ".so.")) {
>                 search_paths[0] = getenv("LD_LIBRARY_PATH");
>                 search_paths[1] = "/usr/lib64:/usr/lib";
>                 search_paths[2] = arch_specific_lib_paths();
> @@ -10897,60 +10897,43 @@ static int resolve_full_path(const char *file, char *result, size_t result_sz)
>  static int attach_uprobe(const struct bpf_program *prog, long cookie, struct bpf_link **link)
>  {
>         DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, opts);
> -       char *func, *probe_name, *func_end;
> -       char *func_name, binary_path[512];
> -       unsigned long long raw_offset;
> +       char *probe_type = NULL, *binary_path = NULL, *func_name = NULL;
> +       int n, ret = -EINVAL;
>         size_t offset = 0;
> -       int n;
>
>         *link = NULL;
>
> -       opts.retprobe = str_has_pfx(prog->sec_name, "uretprobe");
> -       if (opts.retprobe)
> -               probe_name = prog->sec_name + sizeof("uretprobe") - 1;
> -       else
> -               probe_name = prog->sec_name + sizeof("uprobe") - 1;
> -       if (probe_name[0] == '/')
> -               probe_name++;
> -
> -       /* handle SEC("u[ret]probe") - format is valid, but auto-attach is impossible. */
> -       if (strlen(probe_name) == 0)
> -               return 0;
> -
> -       snprintf(binary_path, sizeof(binary_path), "%s", probe_name);
> -       /* ':' should be prior to function+offset */
> -       func_name = strrchr(binary_path, ':');
> -       if (!func_name) {
> +       n = sscanf(prog->sec_name, "%m[^/]/%m[^:]:%m[a-zA-Z0-9_.]+%zu",

note that previously you were using %li for offset which allows
decimal and hexadecimal formats, I think that's convenient, let's
allow that still

> +                  &probe_type, &binary_path, &func_name, &offset);
> +       switch (n) {
> +       case 1:
> +               /* handle SEC("u[ret]probe") - format is valid, but auto-attach is impossible. */
> +               ret = 0;
> +               break;
> +       case 2:
>                 pr_warn("section '%s' missing ':function[+offset]' specification\n",
>                         prog->sec_name);

please use 'prog '%s': ' prefix in these attach_xxx() functions for consistency

> -               return -EINVAL;
> -       }
> -       func_name[0] = '\0';
> -       func_name++;
> -       n = sscanf(func_name, "%m[a-zA-Z0-9_.]+%li", &func, &offset);
> -       if (n < 1) {
> -               pr_warn("uprobe name '%s' is invalid\n", func_name);
> -               return -EINVAL;
> -       }
> -       if (opts.retprobe && offset != 0) {
> -               free(func);
> -               pr_warn("uretprobes do not support offset specification\n");
> -               return -EINVAL;
> -       }
> -
> -       /* Is func a raw address? */
> -       errno = 0;
> -       raw_offset = strtoull(func, &func_end, 0);
> -       if (!errno && !*func_end) {
> -               free(func);
> -               func = NULL;
> -               offset = (size_t)raw_offset;
> +               break;
> +       case 3:
> +       case 4:
> +               opts.retprobe = str_has_pfx(prog->sec_name, "uretprobe");

you just parsed probe_type, strcmp() against that instead, no need for
prefix check

> +               if (opts.retprobe && offset != 0) {
> +                       pr_warn("uretprobes do not support offset specification\n");
> +                       break;
> +               }
> +               opts.func_name = func_name;
> +               *link = bpf_program__attach_uprobe_opts(prog, -1, binary_path, offset, &opts);
> +               ret = libbpf_get_error(*link);
> +               break;
> +       default:
> +               pr_warn("uprobe name '%s' is invalid\n", prog->sec_name);

Add "prog '%s': " prefix. Also, the section name is not an uprobe
name. Maybe "prog '%s': invalid format of section definition '%s'\n"?

> +               break;
>         }
> -       opts.func_name = func;
> +       free(probe_type);
> +       free(binary_path);
> +       free(func_name);
>
> -       *link = bpf_program__attach_uprobe_opts(prog, -1, binary_path, offset, &opts);
> -       free(func);
> -       return libbpf_get_error(*link);
> +       return ret;
>  }
>
>  struct bpf_link *bpf_program__attach_uprobe(const struct bpf_program *prog,
> diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h
> index b6247dc..155702a 100644
> --- a/tools/lib/bpf/libbpf_internal.h
> +++ b/tools/lib/bpf/libbpf_internal.h
> @@ -103,6 +103,11 @@
>  #define str_has_pfx(str, pfx) \
>         (strncmp(str, pfx, __builtin_constant_p(pfx) ? sizeof(pfx) - 1 : strlen(pfx)) == 0)
>
> +/* similar for suffix */
> +#define str_has_sfx(str, sfx) \
> +       (strlen(sfx) <= strlen(str) ? \
> +        strncmp(str + strlen(str) - strlen(sfx), sfx, strlen(sfx)) == 0 : 0)
> +

so str_has_pfx() is a macro to avoid strlen() for string literals.
Here you don't do any optimization like that and instead calculating
and recalculating strlen() multiple times. Just make this a static
inline helper function?

and you don't need strncmp() anymore, strcmp() is as safe after all
the strlen() checks and calculations



>  /* Symbol versioning is different between static and shared library.
>   * Properly versioned symbols are needed for shared library, but
>   * only the symbol of the new version is needed for static library.
> --
> 1.8.3.1
>
diff mbox series

Patch

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 91ce94b..3f23e88 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -10750,7 +10750,7 @@  static int resolve_full_path(const char *file, char *result, size_t result_sz)
 	const char *search_paths[3] = {};
 	int i;
 
-	if (strstr(file, ".so")) {
+	if (str_has_sfx(file, ".so") || strstr(file, ".so.")) {
 		search_paths[0] = getenv("LD_LIBRARY_PATH");
 		search_paths[1] = "/usr/lib64:/usr/lib";
 		search_paths[2] = arch_specific_lib_paths();
@@ -10897,60 +10897,43 @@  static int resolve_full_path(const char *file, char *result, size_t result_sz)
 static int attach_uprobe(const struct bpf_program *prog, long cookie, struct bpf_link **link)
 {
 	DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, opts);
-	char *func, *probe_name, *func_end;
-	char *func_name, binary_path[512];
-	unsigned long long raw_offset;
+	char *probe_type = NULL, *binary_path = NULL, *func_name = NULL;
+	int n, ret = -EINVAL;
 	size_t offset = 0;
-	int n;
 
 	*link = NULL;
 
-	opts.retprobe = str_has_pfx(prog->sec_name, "uretprobe");
-	if (opts.retprobe)
-		probe_name = prog->sec_name + sizeof("uretprobe") - 1;
-	else
-		probe_name = prog->sec_name + sizeof("uprobe") - 1;
-	if (probe_name[0] == '/')
-		probe_name++;
-
-	/* handle SEC("u[ret]probe") - format is valid, but auto-attach is impossible. */
-	if (strlen(probe_name) == 0)
-		return 0;
-
-	snprintf(binary_path, sizeof(binary_path), "%s", probe_name);
-	/* ':' should be prior to function+offset */
-	func_name = strrchr(binary_path, ':');
-	if (!func_name) {
+	n = sscanf(prog->sec_name, "%m[^/]/%m[^:]:%m[a-zA-Z0-9_.]+%zu",
+		   &probe_type, &binary_path, &func_name, &offset);
+	switch (n) {
+	case 1:
+		/* handle SEC("u[ret]probe") - format is valid, but auto-attach is impossible. */
+		ret = 0;
+		break;
+	case 2:
 		pr_warn("section '%s' missing ':function[+offset]' specification\n",
 			prog->sec_name);
-		return -EINVAL;
-	}
-	func_name[0] = '\0';
-	func_name++;
-	n = sscanf(func_name, "%m[a-zA-Z0-9_.]+%li", &func, &offset);
-	if (n < 1) {
-		pr_warn("uprobe name '%s' is invalid\n", func_name);
-		return -EINVAL;
-	}
-	if (opts.retprobe && offset != 0) {
-		free(func);
-		pr_warn("uretprobes do not support offset specification\n");
-		return -EINVAL;
-	}
-
-	/* Is func a raw address? */
-	errno = 0;
-	raw_offset = strtoull(func, &func_end, 0);
-	if (!errno && !*func_end) {
-		free(func);
-		func = NULL;
-		offset = (size_t)raw_offset;
+		break;
+	case 3:
+	case 4:
+		opts.retprobe = str_has_pfx(prog->sec_name, "uretprobe");
+		if (opts.retprobe && offset != 0) {
+			pr_warn("uretprobes do not support offset specification\n");
+			break;
+		}
+		opts.func_name = func_name;
+		*link = bpf_program__attach_uprobe_opts(prog, -1, binary_path, offset, &opts);
+		ret = libbpf_get_error(*link);
+		break;
+	default:
+		pr_warn("uprobe name '%s' is invalid\n", prog->sec_name);
+		break;
 	}
-	opts.func_name = func;
+	free(probe_type);
+	free(binary_path);
+	free(func_name);
 
-	*link = bpf_program__attach_uprobe_opts(prog, -1, binary_path, offset, &opts);
-	free(func);
-	return libbpf_get_error(*link);
+	return ret;
 }
 
 struct bpf_link *bpf_program__attach_uprobe(const struct bpf_program *prog,
diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h
index b6247dc..155702a 100644
--- a/tools/lib/bpf/libbpf_internal.h
+++ b/tools/lib/bpf/libbpf_internal.h
@@ -103,6 +103,11 @@ 
 #define str_has_pfx(str, pfx) \
 	(strncmp(str, pfx, __builtin_constant_p(pfx) ? sizeof(pfx) - 1 : strlen(pfx)) == 0)
 
+/* similar for suffix */
+#define str_has_sfx(str, sfx) \
+	(strlen(sfx) <= strlen(str) ? \
+	 strncmp(str + strlen(str) - strlen(sfx), sfx, strlen(sfx)) == 0 : 0)
+
 /* Symbol versioning is different between static and shared library.
  * Properly versioned symbols are needed for shared library, but
  * only the symbol of the new version is needed for static library.