diff mbox series

[bpf-next,v3] libbpf: introduce legacy kprobe events support

Message ID 20210625044459.1249282-1-rafaeldtinoco@gmail.com (mailing list archive)
State Changes Requested
Delegated to: BPF
Headers show
Series [bpf-next,v3] libbpf: introduce legacy kprobe events support | expand

Checks

Context Check Description
netdev/cover_letter success Link
netdev/fixes_present success Link
netdev/patch_count success Link
netdev/tree_selection success Clearly marked for bpf-next
netdev/subject_prefix success Link
netdev/cc_maintainers fail 9 maintainers not CCed: netdev@vger.kernel.org yhs@fb.com kpsingh@kernel.org daniel@iogearbox.net andrii@kernel.org kafai@fb.com ast@kernel.org john.fastabend@gmail.com songliubraving@fb.com
netdev/source_inline fail Was 0 now: 2
netdev/verify_signedoff success Link
netdev/module_param success Was 0 now: 0
netdev/build_32bit success Errors and warnings before: 0 this patch: 0
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/verify_fixes success Link
netdev/checkpatch fail CHECK: Alignment should match open parenthesis CHECK: spaces preferred around that '|' (ctx:VxV) ERROR: "foo* bar" should be "foo *bar" ERROR: do not use assignment in if condition ERROR: space required after that ',' (ctx:VxV) WARNING: line length of 81 exceeds 80 columns WARNING: line length of 85 exceeds 80 columns WARNING: line length of 93 exceeds 80 columns
netdev/build_allmodconfig_warn success Errors and warnings before: 0 this patch: 0
netdev/header_inline success Link

Commit Message

Rafael David Tinoco June 25, 2021, 4:44 a.m. UTC
Allow kprobe tracepoint events creation through legacy interface, as the
kprobe dynamic PMUs support, used by default, was only created in v4.17.

This enables CO.RE support for older kernels.

Signed-off-by: Rafael David Tinoco <rafaeldtinoco@gmail.com>
---
 tools/lib/bpf/libbpf.c | 125 ++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 123 insertions(+), 2 deletions(-)

Comments

Rafael David Tinoco June 25, 2021, 5:01 a.m. UTC | #1
> Allow kprobe tracepoint events creation through legacy interface, as the
> kprobe dynamic PMUs support, used by default, was only created in v4.17.
> 
> This enables CO.RE support for older kernels.
> 
> Signed-off-by: Rafael David Tinoco <rafaeldtinoco@gmail.com>

Related to:
https://github.com/libbpf/libbpf/issues/317

> ---
> tools/lib/bpf/libbpf.c | 125 ++++++++++++++++++++++++++++++++++++++++-
> 1 file changed, 123 insertions(+), 2 deletions(-)
> 
> diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
> index 1e04ce724240..72a22c4d8295 100644
> --- a/tools/lib/bpf/libbpf.c
> +++ b/tools/lib/bpf/libbpf.c

[snip]

> static int bpf_link__detach_perf_event(struct bpf_link *link)
> {
> 	int err;
> @@ -10152,6 +10197,12 @@ static int bpf_link__detach_perf_event(struct bpf_link *link)
> 		err = -errno;
> 
> 	close(link->fd);

It needed the perf event fd closure for the ‘kprobe_events’ to allow releasing.

> +
> +	if (link->legacy.name) {
> +		remove_kprobe_event_legacy(link->legacy.name, link->legacy.retprobe);
> +		free(link->legacy.name);
> +	}
> +
> 	return libbpf_err(err);
> }

[snip]

Tested with: https://github.com/rafaeldtinoco/portablebpf (w/ CO.RE) in kernels 5.8 and 4.15.
Rafael David Tinoco July 7, 2021, 1:38 p.m. UTC | #2
On Fri, Jun 25, 2021, at 01:44, Rafael David Tinoco wrote:
> Allow kprobe tracepoint events creation through legacy interface, as the
> kprobe dynamic PMUs support, used by default, was only created in v4.17.
> 
> This enables CO.RE support for older kernels.
> 
> Signed-off-by: Rafael David Tinoco <rafaeldtinoco@gmail.com>

ping
Andrii Nakryiko July 7, 2021, 9:52 p.m. UTC | #3
On Thu, Jun 24, 2021 at 9:45 PM Rafael David Tinoco
<rafaeldtinoco@gmail.com> wrote:
>
> Allow kprobe tracepoint events creation through legacy interface, as the
> kprobe dynamic PMUs support, used by default, was only created in v4.17.
>
> This enables CO.RE support for older kernels.
>
> Signed-off-by: Rafael David Tinoco <rafaeldtinoco@gmail.com>
> ---

Sorry for the delay, I've been out on vacation for the last two weeks.

>  tools/lib/bpf/libbpf.c | 125 ++++++++++++++++++++++++++++++++++++++++-
>  1 file changed, 123 insertions(+), 2 deletions(-)
>
> diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
> index 1e04ce724240..72a22c4d8295 100644
> --- a/tools/lib/bpf/libbpf.c
> +++ b/tools/lib/bpf/libbpf.c
> @@ -10007,6 +10007,10 @@ struct bpf_link {
>         char *pin_path;         /* NULL, if not pinned */
>         int fd;                 /* hook FD, -1 if not applicable */
>         bool disconnected;
> +       struct {
> +               char *name;
> +               bool retprobe;
> +       } legacy;

we shouldn't extend common bpf_link with kprobe-specific parts. We
used to have something like this (for other use cases):

struct bpf_link_kprobe {
    struct bpf_link link;
    char *legacy_name;
    bool is_retprobe;
};

And then internally do container_of() to "cast" struct bpf_link to
struct bpf_link_kprobe. External API should still operate on struct
bpf_link everywhere.

>  };
>
>  /* Replace link's underlying BPF program with the new one */
> @@ -10143,6 +10147,47 @@ int bpf_link__unpin(struct bpf_link *link)
>         return 0;
>  }
>
> +static int poke_kprobe_events(bool add, const char *name, bool retprobe)
> +{
> +       int fd, ret = 0;
> +       char probename[32], cmd[160];
> +       const char *file = "/sys/kernel/debug/tracing/kprobe_events";
> +
> +       memset(probename, 0, sizeof(probename));

nit: char probename[32] = {} instead of explicit memset

32 seems pretty small, though, is that kernel restriction? if not,
let's maybe bump it to 128 or so?

> +
> +       if (retprobe)
> +               ret = snprintf(probename, sizeof(probename), "kprobes/%s_ret", name);
> +       else
> +               ret = snprintf(probename, sizeof(probename), "kprobes/%s", name);

BCC seems to be adding _bcc_<pid> prefix, so maybe let's do the same
here with s/bcc/libbpf/?

> +
> +       if (ret <= strlen("kprobes/"))
> +               return -EINVAL;

why would snprintf() fail at all?

> +
> +       if (add)
> +               snprintf(cmd, sizeof(cmd),"%c:%s %s", retprobe ? 'r' : 'p', probename, name);

missing space before "

> +       else
> +               snprintf(cmd, sizeof(cmd), "-:%s", probename);
> +
> +       if (!(fd = open(file, O_WRONLY|O_APPEND, 0)))

let's not do these assignments inside if, we are not trying to save
lines of code here, but it's harder to read

also need spaces around operator |

> +               return -errno;
> +       if ((ret = write(fd, cmd, strlen(cmd))) < 0)
> +               ret = -errno;
> +
> +       close(fd);
> +
> +       return ret;
> +}
> +
> +static inline int add_kprobe_event_legacy(const char* name, bool retprobe)

char *name

> +{
> +       return poke_kprobe_events(true, name, retprobe);
> +}
> +
> +static inline int remove_kprobe_event_legacy(const char* name, bool retprobe)

char *name

> +{
> +       return poke_kprobe_events(false, name, retprobe);
> +}
> +
>  static int bpf_link__detach_perf_event(struct bpf_link *link)
>  {
>         int err;
> @@ -10152,6 +10197,12 @@ static int bpf_link__detach_perf_event(struct bpf_link *link)
>                 err = -errno;
>
>         close(link->fd);
> +
> +       if (link->legacy.name) {
> +               remove_kprobe_event_legacy(link->legacy.name, link->legacy.retprobe);
> +               free(link->legacy.name);
> +       }

instead of this check in bpf_link__detach_perf_event, attach_kprobe
should install its own bpf_link__detach_kprobe_legacy callback

> +
>         return libbpf_err(err);
>  }
>
> @@ -10229,6 +10280,23 @@ static int parse_uint_from_file(const char *file, const char *fmt)
>         return ret;
>  }
>
> +static bool determine_kprobe_legacy(void)
> +{
> +       const char *file = "/sys/bus/event_source/devices/kprobe/type";
> +
> +       return access(file, 0) == 0 ? false : true;
> +}
> +
> +static int determine_kprobe_perf_type_legacy(const char *func_name)
> +{
> +       char file[96];
> +       const char *fname = "/sys/kernel/debug/tracing/events/kprobes/%s/id";
> +
> +       snprintf(file, sizeof(file), fname, func_name);
> +
> +       return parse_uint_from_file(file, "%d\n");
> +}
> +
>  static int determine_kprobe_perf_type(void)
>  {
>         const char *file = "/sys/bus/event_source/devices/kprobe/type";
> @@ -10304,6 +10372,43 @@ static int perf_event_open_probe(bool uprobe, bool retprobe, const char *name,
>         return pfd;
>  }
>
> +static int perf_event_open_probe_legacy(bool uprobe, bool retprobe, const char *name,
> +                                       uint64_t offset, int pid)

you are not using offset here, let's pass it into
add_kprobe_event_legacy and use it when attaching as "p:kprobes/%s
%s+123" in poke_kprobe_events? There are separate patches that are
adding ability to attach kprobe at offset, so let's support that
(internally) from the get go for legacy case as well.

also, it's not generic perf_event_open, it's specifically kprobe, so
let's call it with kprobe in the name (e.g., kprobe_open_legacy or
something)

> +{
> +       struct perf_event_attr attr = {};
> +       char errmsg[STRERR_BUFSIZE];
> +       int type, pfd, err;
> +
> +       if (uprobe) // unsupported

please don't use C++ style comments

> +               return -EINVAL;

was about to suggest using -ENOTSUP instead, but we shouldn't even
pass bool uprobe, as it's not yet used in uprobes. We can add it
later, if necessary.

> +
> +       if ((err = add_kprobe_event_legacy(name, retprobe)) < 0) {

same, here and below, let's not do unnecessary assignments inside the if clause

> +               pr_warn("failed to add legacy kprobe event: %s\n",
> +               libbpf_strerror_r(err, errmsg, sizeof(errmsg)));

align with the "failed ..." argument, it looks like independent
statement, not an argument

> +               return err;
> +       }
> +       if ((type = determine_kprobe_perf_type_legacy(name)) < 0) {
> +               pr_warn("failed to determine legacy kprobe event id: %s\n",
> +               libbpf_strerror_r(type, errmsg, sizeof(errmsg)));

same as above

> +               return type;
> +       }
> +       attr.size = sizeof(attr);
> +       attr.config = type;
> +       attr.type = PERF_TYPE_TRACEPOINT;
> +
> +       pfd = syscall(__NR_perf_event_open, &attr,
> +                     pid < 0 ? -1 : pid, /* pid */
> +                     pid == -1 ? 0 : -1, /* cpu */
> +                     -1 /* group_fd */,  PERF_FLAG_FD_CLOEXEC);
> +       if (pfd < 0) {
> +               err = -errno;
> +               pr_warn("legacy kprobe perf_event_open() failed: %s\n",
> +                       libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
> +               return err;
> +       }
> +       return pfd;
> +}
> +
>  struct bpf_link *bpf_program__attach_kprobe(struct bpf_program *prog,
>                                             bool retprobe,
>                                             const char *func_name)
> @@ -10311,9 +10416,18 @@ struct bpf_link *bpf_program__attach_kprobe(struct bpf_program *prog,
>         char errmsg[STRERR_BUFSIZE];
>         struct bpf_link *link;
>         int pfd, err;
> +       bool legacy = false;

no need to initialize it, you unconditionally assign it below

>
> -       pfd = perf_event_open_probe(false /* uprobe */, retprobe, func_name,
> -                                   0 /* offset */, -1 /* pid */);
> +       if (!(legacy = determine_kprobe_legacy()))

another assignment in if

> +               pfd = perf_event_open_probe(false /* uprobe */,
> +                                           retprobe, func_name,
> +                                            0 /* offset */,
> +                                           -1 /* pid */);
> +       else
> +               pfd = perf_event_open_probe_legacy(false /* uprobe */,
> +                                           retprobe, func_name,
> +                                            0 /* offset */,
> +                                           -1 /* pid */);
>         if (pfd < 0) {
>                 pr_warn("prog '%s': failed to create %s '%s' perf event: %s\n",
>                         prog->name, retprobe ? "kretprobe" : "kprobe", func_name,

we can't use bpf_program__attach_perf_event as is now, because we need
to allocate a different struct bpf_link_kprobe. Let's extract the
PERF_EVENT_IOC_SET_BPF and PERF_EVENT_IOC_ENABLE logic into a helper
and use it from both bpf_program__attach_perf_event and
bpf_program__attach_kprobe. It's actually good because we can check
silly errors (like prog_fd < 0) before we create perf_event FD now.

> @@ -10329,6 +10443,13 @@ struct bpf_link *bpf_program__attach_kprobe(struct bpf_program *prog,
>                         libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
>                 return libbpf_err_ptr(err);
>         }
> +
> +       if (legacy) {
> +               /* needed history for the legacy probe cleanup */
> +               link->legacy.name = strdup(func_name);
> +               link->legacy.retprobe = retprobe;
> +       }
> +
>         return link;
>  }
>
> --
> 2.27.0
>
Rafael David Tinoco July 19, 2021, 1:59 a.m. UTC | #4
> > --- a/tools/lib/bpf/libbpf.c
> > +++ b/tools/lib/bpf/libbpf.c
> > @@ -10007,6 +10007,10 @@ struct bpf_link {
> >         char *pin_path;         /* NULL, if not pinned */
> >         int fd;                 /* hook FD, -1 if not applicable */
> >         bool disconnected;
> > +       struct {
> > +               char *name;
> > +               bool retprobe;
> > +       } legacy;
>
> we shouldn't extend common bpf_link with kprobe-specific parts. We
> used to have something like this (for other use cases):
>
> struct bpf_link_kprobe {
>     struct bpf_link link;
>     char *legacy_name;
>     bool is_retprobe;
> };

would this:

struct bpf_link {
    int (*detach)(struct bpf_link *link);
    int (*destroy)(struct bpf_link *link);
    char *pin_path;
    int fd;
    bool disconnected;
};

struct bpf_link_kprobe {
    char *legacy_name;
    bool is_retprobe;
    struct bpf_link *link;
};

be ok ?

> And then internally do container_of() to "cast" struct bpf_link to
> struct bpf_link_kprobe. External API should still operate on struct
> bpf_link everywhere.

and what about this:

static struct bpf_link*
bpf_program__attach_kprobe_opts(struct bpf_program *prog,
                                const char *func_name,
                                struct bpf_program_attach_kprobe_opts *opts)
{
	char errmsg[STRERR_BUFSIZE];
	struct bpf_link_kprobe *kplink;
	int pfd, err;
	bool legacy;

	legacy = determine_kprobe_legacy();
	if (!legacy) {
		pfd = perf_event_open_probe(false /* uprobe */,
		                            opts->retprobe,
		                            func_name,
		                            0 /* offset */,
		                            -1 /* pid */);
	} else {
		pfd = perf_event_open_kprobe_legacy(opts->retprobe,
		                                    func_name,
		                                    0 /* offset */,
		                                    -1 /* pid */);
	}
	if (pfd < 0) {
		pr_warn("prog '%s': failed to create %s '%s' perf event: %s\n",
		        prog->name, opts->retprobe ? "kretprobe" : "kprobe", func_name,
		        libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
		return libbpf_err_ptr(pfd);
	}
	kplink = calloc(1, sizeof(struct bpf_link_kprobe));
	if (!kplink)
		return libbpf_err_ptr(-ENOMEM);
	kplink->link = bpf_program__attach_perf_event(prog, pfd);
	err = libbpf_get_error(link);
	if (err) {
		free(kplink);
		close(pfd);
		pr_warn("prog '%s': failed to attach to %s '%s': %s\n",
		        prog->name, opts->retprobe ? "kretprobe" : "kprobe", func_name,
		        libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
		return libbpf_err_ptr(err);
	}
	if (legacy) {
		kplink->legacy_name = strdup(func_name);
		kplink->is_retprobe = opts->retprobe;
	}

	return kplink->link;
}

And use this 'kplink->link' pointer as the bpf_link pointer for all kprobe
functions. For the detachment we would have something like:

static int bpf_link__detach_perf_event(struct bpf_link *link) {
	struct bpf_link *const *plink;
	struct bpf_link_kprobe *kplink;
	int err;

	plink = (struct bpf_link *const *) link;
	kplink = container_of(plink, struct bpf_link_kprobe, link);
	err = ioctl(link->fd, PERF_EVENT_IOC_DISABLE, 0);
	if (err)
		err = -errno;
	close(link->fd);
	if (kplink) {
		remove_kprobe_event_legacy(kplink->legacy_name, kplink->is_retprobe);
		free(kplink->legacy_name);
		free(kplink);
	}

	return libbpf_err(err);
}

for the bpf_link__detach_perf_event(): This would also clean the container at
the detachment. (Next comment talks about having this here versus having a
legacy kprobe detachment callback).

[snip]

> >  static int bpf_link__detach_perf_event(struct bpf_link *link)
> >  {
> >         int err;
> > @@ -10152,6 +10197,12 @@ static int bpf_link__detach_perf_event(struct bpf_link *link)
> >                 err = -errno;
> >
> >         close(link->fd);
> > +
> > +       if (link->legacy.name) {
> > +               remove_kprobe_event_legacy(link->legacy.name, link->legacy.retprobe);
> > +               free(link->legacy.name);
> > +       }
>
> instead of this check in bpf_link__detach_perf_event, attach_kprobe
> should install its own bpf_link__detach_kprobe_legacy callback

attach_kprobe_opts() could pass a pointer to link->detach->callback through the
opts I suppose (or now, the kplink->link->detach->callback). This way the
default would still be bpf_link__detach_perf_event() but we could create a
function bpf_link__detach_perf_event_legacy_kprobe() with what was previously
showed (about kplink freeing). This is not needed with the version showed
before the [snip] though.

> > +static int perf_event_open_probe_legacy(bool uprobe, bool retprobe, const char *name,
> > +                                       uint64_t offset, int pid)
>
> you are not using offset here, let's pass it into
> add_kprobe_event_legacy and use it when attaching as "p:kprobes/%s
> %s+123" in poke_kprobe_events? There are separate patches that are
> adding ability to attach kprobe at offset, so let's support that
> (internally) from the get go for legacy case as well.
>
> also, it's not generic perf_event_open, it's specifically kprobe, so
> let's call it with kprobe in the name (e.g., kprobe_open_legacy or
> something)

I'm calling it now perf_event_open_kprobe_legacy() and it calls:

static inline int add_kprobe_event_legacy(const char *name, bool retprobe, uint64_t offset)
{
	return poke_kprobe_events(true, name, retprobe, offset);
}

and then we set {kprobes/kretprobes}/funcname_pid, also supporting offset:

static int poke_kprobe_events(bool add, const char *name, bool retprobe, uint64_t offset) {
	int fd, ret = 0;
	char cmd[192] = {}, probename[128] = {}, probefunc[128] = {};
	const char *file = "/sys/kernel/debug/tracing/kprobe_events";

	if (retprobe)
		ret = snprintf(probename, sizeof(probename), "kretprobes/%s_libbpf_%u", name, getpid());
	else
		ret = snprintf(probename, sizeof(probename), "kprobes/%s_libbpf_%u", name, getpid());
	if (offset)
		ret = snprintf(probefunc, sizeof(probefunc), "%s+%lu", name, offset);
	if (ret)
		return -EINVAL;
	if (add) {
		snprintf(cmd, sizeof(cmd), "%c:%s %s",
				 retprobe ? 'r' : 'p',
				 probename,
		         offset ? probefunc : name);
	} else {
		snprintf(cmd, sizeof(cmd), "-:%s", probename);
	}
	fd = open(file, O_WRONLY | O_APPEND, 0);
	if (!fd)
		return -errno;
	ret = write(fd, cmd, strlen(cmd));
	if (ret)
		ret = -errno;
	close(fd);

	return ret;
}

[snip]

> > +               pfd = perf_event_open_probe(false /* uprobe */,
> > +                                           retprobe, func_name,
> > +                                            0 /* offset */,
> > +                                           -1 /* pid */);
> > +       else
> > +               pfd = perf_event_open_probe_legacy(false /* uprobe */,
> > +                                           retprobe, func_name,
> > +                                            0 /* offset */,
> > +                                           -1 /* pid */);
> >         if (pfd < 0) {
> >                 pr_warn("prog '%s': failed to create %s '%s' perf event: %s\n",
> >                         prog->name, retprobe ? "kretprobe" : "kprobe", func_name,
>
> we can't use bpf_program__attach_perf_event as is now, because we need
> to allocate a different struct bpf_link_kprobe.

We could do the container encapsulation using heap in
bpf_program_attach_kprobe_opts() or attach_kprobe() like I'm showing here, no ?

...
	kplink = calloc(1, sizeof(struct bpf_link_kprobe));
	if (!kplink)
		return libbpf_err_ptr(-ENOMEM);
	kplink->link = bpf_program__attach_perf_event(prog, pfd);
...

and then free all this structure (bpf_link and its encapsulation at the
detachment, like said previously also). This way we don't have to change
bpf_program__attach_perf_event() which would continue to serve
bpf_program__attach_tracepoint() and bpf_program__attach_uprobe() unmodified.
This way, kprobe would have a container for all cases and uprobe and tracepoint
could have a container in the future if needed.

> Let's extract the
> PERF_EVENT_IOC_SET_BPF and PERF_EVENT_IOC_ENABLE logic into a helper
> and use it from both bpf_program__attach_perf_event and
> bpf_program__attach_kprobe. It's actually good because we can check
> silly errors (like prog_fd < 0) before we create perf_event FD now.

Okay, but I'm considering this orthogonal to what you said previously (on
changing bpf_program__attach_perf_event). UNLESS you really prefer me to do the
container allocation in bpf_program__attach_perf_event() but then we would have
to free the container in all detachments (kprobe, tracepoint and uprobe) as it
couldn't be placed in stack (or it would eventually be lost, no ?).
Andrii Nakryiko July 20, 2021, 12:10 a.m. UTC | #5
On Sun, Jul 18, 2021 at 6:59 PM Rafael David Tinoco
<rafaeldtinoco@gmail.com> wrote:
>
> > > --- a/tools/lib/bpf/libbpf.c
> > > +++ b/tools/lib/bpf/libbpf.c
> > > @@ -10007,6 +10007,10 @@ struct bpf_link {
> > >         char *pin_path;         /* NULL, if not pinned */
> > >         int fd;                 /* hook FD, -1 if not applicable */
> > >         bool disconnected;
> > > +       struct {
> > > +               char *name;
> > > +               bool retprobe;
> > > +       } legacy;
> >
> > we shouldn't extend common bpf_link with kprobe-specific parts. We
> > used to have something like this (for other use cases):
> >
> > struct bpf_link_kprobe {
> >     struct bpf_link link;
> >     char *legacy_name;
> >     bool is_retprobe;
> > };
>
> would this:
>
> struct bpf_link {
>     int (*detach)(struct bpf_link *link);
>     int (*destroy)(struct bpf_link *link);
>     char *pin_path;
>     int fd;
>     bool disconnected;
> };
>
> struct bpf_link_kprobe {
>     char *legacy_name;
>     bool is_retprobe;
>     struct bpf_link *link;
> };
>
> be ok ?

No.

>
> > And then internally do container_of() to "cast" struct bpf_link to
> > struct bpf_link_kprobe. External API should still operate on struct
> > bpf_link everywhere.
>
> and what about this:
>
> static struct bpf_link*
> bpf_program__attach_kprobe_opts(struct bpf_program *prog,
>                                 const char *func_name,
>                                 struct bpf_program_attach_kprobe_opts *opts)
> {
>         char errmsg[STRERR_BUFSIZE];
>         struct bpf_link_kprobe *kplink;
>         int pfd, err;
>         bool legacy;
>
>         legacy = determine_kprobe_legacy();
>         if (!legacy) {
>                 pfd = perf_event_open_probe(false /* uprobe */,
>                                             opts->retprobe,
>                                             func_name,
>                                             0 /* offset */,
>                                             -1 /* pid */);
>         } else {
>                 pfd = perf_event_open_kprobe_legacy(opts->retprobe,
>                                                     func_name,
>                                                     0 /* offset */,
>                                                     -1 /* pid */);
>         }
>         if (pfd < 0) {
>                 pr_warn("prog '%s': failed to create %s '%s' perf event: %s\n",
>                         prog->name, opts->retprobe ? "kretprobe" : "kprobe", func_name,
>                         libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
>                 return libbpf_err_ptr(pfd);
>         }
>         kplink = calloc(1, sizeof(struct bpf_link_kprobe));
>         if (!kplink)
>                 return libbpf_err_ptr(-ENOMEM);
>         kplink->link = bpf_program__attach_perf_event(prog, pfd);
>         err = libbpf_get_error(link);
>         if (err) {
>                 free(kplink);
>                 close(pfd);
>                 pr_warn("prog '%s': failed to attach to %s '%s': %s\n",
>                         prog->name, opts->retprobe ? "kretprobe" : "kprobe", func_name,
>                         libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
>                 return libbpf_err_ptr(err);
>         }
>         if (legacy) {
>                 kplink->legacy_name = strdup(func_name);
>                 kplink->is_retprobe = opts->retprobe;
>         }
>
>         return kplink->link;
> }
>
> And use this 'kplink->link' pointer as the bpf_link pointer for all kprobe
> functions. For the detachment we would have something like:
>
> static int bpf_link__detach_perf_event(struct bpf_link *link) {
>         struct bpf_link *const *plink;
>         struct bpf_link_kprobe *kplink;
>         int err;
>
>         plink = (struct bpf_link *const *) link;
>         kplink = container_of(plink, struct bpf_link_kprobe, link);

Did you check if this works? Also how that could ever even work for
non-kprobe but perf_event-based cases, like tracepoint? And why are we
even discussing these "alternatives"? What's wrong with the way I
proposed earlier? For container_of() to work you have to do it the way
I described in my previous email with one struct being embedded in
another one.

>         err = ioctl(link->fd, PERF_EVENT_IOC_DISABLE, 0);
>         if (err)
>                 err = -errno;
>         close(link->fd);
>         if (kplink) {
>                 remove_kprobe_event_legacy(kplink->legacy_name, kplink->is_retprobe);
>                 free(kplink->legacy_name);
>                 free(kplink);
>         }
>
>         return libbpf_err(err);
> }
>
> for the bpf_link__detach_perf_event(): This would also clean the container at
> the detachment. (Next comment talks about having this here versus having a
> legacy kprobe detachment callback).
>
> [snip]
>
> > >  static int bpf_link__detach_perf_event(struct bpf_link *link)
> > >  {
> > >         int err;
> > > @@ -10152,6 +10197,12 @@ static int bpf_link__detach_perf_event(struct bpf_link *link)
> > >                 err = -errno;
> > >
> > >         close(link->fd);
> > > +
> > > +       if (link->legacy.name) {
> > > +               remove_kprobe_event_legacy(link->legacy.name, link->legacy.retprobe);
> > > +               free(link->legacy.name);
> > > +       }
> >
> > instead of this check in bpf_link__detach_perf_event, attach_kprobe
> > should install its own bpf_link__detach_kprobe_legacy callback
>
> attach_kprobe_opts() could pass a pointer to link->detach->callback through the
> opts I suppose (or now, the kplink->link->detach->callback). This way the
> default would still be bpf_link__detach_perf_event() but we could create a
> function bpf_link__detach_perf_event_legacy_kprobe() with what was previously
> showed (about kplink freeing). This is not needed with the version showed
> before the [snip] though.
>
> > > +static int perf_event_open_probe_legacy(bool uprobe, bool retprobe, const char *name,
> > > +                                       uint64_t offset, int pid)
> >
> > you are not using offset here, let's pass it into
> > add_kprobe_event_legacy and use it when attaching as "p:kprobes/%s
> > %s+123" in poke_kprobe_events? There are separate patches that are
> > adding ability to attach kprobe at offset, so let's support that
> > (internally) from the get go for legacy case as well.
> >
> > also, it's not generic perf_event_open, it's specifically kprobe, so
> > let's call it with kprobe in the name (e.g., kprobe_open_legacy or
> > something)
>
> I'm calling it now perf_event_open_kprobe_legacy() and it calls:
>
> static inline int add_kprobe_event_legacy(const char *name, bool retprobe, uint64_t offset)
> {
>         return poke_kprobe_events(true, name, retprobe, offset);
> }
>
> and then we set {kprobes/kretprobes}/funcname_pid, also supporting offset:
>
> static int poke_kprobe_events(bool add, const char *name, bool retprobe, uint64_t offset) {
>         int fd, ret = 0;
>         char cmd[192] = {}, probename[128] = {}, probefunc[128] = {};
>         const char *file = "/sys/kernel/debug/tracing/kprobe_events";
>
>         if (retprobe)
>                 ret = snprintf(probename, sizeof(probename), "kretprobes/%s_libbpf_%u", name, getpid());
>         else
>                 ret = snprintf(probename, sizeof(probename), "kprobes/%s_libbpf_%u", name, getpid());
>         if (offset)
>                 ret = snprintf(probefunc, sizeof(probefunc), "%s+%lu", name, offset);
>         if (ret)
>                 return -EINVAL;
>         if (add) {
>                 snprintf(cmd, sizeof(cmd), "%c:%s %s",
>                                  retprobe ? 'r' : 'p',
>                                  probename,
>                          offset ? probefunc : name);
>         } else {
>                 snprintf(cmd, sizeof(cmd), "-:%s", probename);
>         }
>         fd = open(file, O_WRONLY | O_APPEND, 0);
>         if (!fd)
>                 return -errno;
>         ret = write(fd, cmd, strlen(cmd));
>         if (ret)
>                 ret = -errno;
>         close(fd);
>
>         return ret;
> }
>
> [snip]
>
> > > +               pfd = perf_event_open_probe(false /* uprobe */,
> > > +                                           retprobe, func_name,
> > > +                                            0 /* offset */,
> > > +                                           -1 /* pid */);
> > > +       else
> > > +               pfd = perf_event_open_probe_legacy(false /* uprobe */,
> > > +                                           retprobe, func_name,
> > > +                                            0 /* offset */,
> > > +                                           -1 /* pid */);
> > >         if (pfd < 0) {
> > >                 pr_warn("prog '%s': failed to create %s '%s' perf event: %s\n",
> > >                         prog->name, retprobe ? "kretprobe" : "kprobe", func_name,
> >
> > we can't use bpf_program__attach_perf_event as is now, because we need
> > to allocate a different struct bpf_link_kprobe.
>
> We could do the container encapsulation using heap in
> bpf_program_attach_kprobe_opts() or attach_kprobe() like I'm showing here, no ?
>
> ...
>         kplink = calloc(1, sizeof(struct bpf_link_kprobe));
>         if (!kplink)
>                 return libbpf_err_ptr(-ENOMEM);
>         kplink->link = bpf_program__attach_perf_event(prog, pfd);
> ...
>
> and then free all this structure (bpf_link and its encapsulation at the
> detachment, like said previously also). This way we don't have to change
> bpf_program__attach_perf_event() which would continue to serve
> bpf_program__attach_tracepoint() and bpf_program__attach_uprobe() unmodified.
> This way, kprobe would have a container for all cases and uprobe and tracepoint
> could have a container in the future if needed.
>
> > Let's extract the
> > PERF_EVENT_IOC_SET_BPF and PERF_EVENT_IOC_ENABLE logic into a helper
> > and use it from both bpf_program__attach_perf_event and
> > bpf_program__attach_kprobe. It's actually good because we can check
> > silly errors (like prog_fd < 0) before we create perf_event FD now.
>
> Okay, but I'm considering this orthogonal to what you said previously (on
> changing bpf_program__attach_perf_event). UNLESS you really prefer me to do the
> container allocation in bpf_program__attach_perf_event() but then we would have
> to free the container in all detachments (kprobe, tracepoint and uprobe) as it
> couldn't be placed in stack (or it would eventually be lost, no ?).

container will be different for kprobe/uprobe and
tracepoint/perf_event. So allocation has to happen separately from
PERF_EVENT_IOC_SET_BPF and PERF_EVENT_IOC_ENABLE.
Rafael David Tinoco July 20, 2021, 4:16 a.m. UTC | #6
> >         plink = (struct bpf_link *const *) link;
> >         kplink = container_of(plink, struct bpf_link_kprobe, link);
> 
> Did you check if this works? Also how that could ever even work for
> non-kprobe but perf_event-based cases, like tracepoint? And why are we
> even discussing these "alternatives"? What's wrong with the way I
> proposed earlier? For container_of() to work you have to do it the way
> I described in my previous email with one struct being embedded in
> another one.

Nevermind. Sorry for the noise. I'll do the container encapsulation like
you said so. I was being lazy and trying to find a way to change just the
kprobes path instead of thinking in the overall project.

Sorry again, will come up with what you said in the first place.
diff mbox series

Patch

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 1e04ce724240..72a22c4d8295 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -10007,6 +10007,10 @@  struct bpf_link {
 	char *pin_path;		/* NULL, if not pinned */
 	int fd;			/* hook FD, -1 if not applicable */
 	bool disconnected;
+	struct {
+		char *name;
+		bool retprobe;
+	} legacy;
 };
 
 /* Replace link's underlying BPF program with the new one */
@@ -10143,6 +10147,47 @@  int bpf_link__unpin(struct bpf_link *link)
 	return 0;
 }
 
+static int poke_kprobe_events(bool add, const char *name, bool retprobe)
+{
+	int fd, ret = 0;
+	char probename[32], cmd[160];
+	const char *file = "/sys/kernel/debug/tracing/kprobe_events";
+
+	memset(probename, 0, sizeof(probename));
+
+	if (retprobe)
+		ret = snprintf(probename, sizeof(probename), "kprobes/%s_ret", name);
+	else
+		ret = snprintf(probename, sizeof(probename), "kprobes/%s", name);
+
+	if (ret <= strlen("kprobes/"))
+		return -EINVAL;
+
+	if (add)
+		snprintf(cmd, sizeof(cmd),"%c:%s %s", retprobe ? 'r' : 'p', probename, name);
+	else
+		snprintf(cmd, sizeof(cmd), "-:%s", probename);
+
+	if (!(fd = open(file, O_WRONLY|O_APPEND, 0)))
+		return -errno;
+	if ((ret = write(fd, cmd, strlen(cmd))) < 0)
+		ret = -errno;
+
+	close(fd);
+
+	return ret;
+}
+
+static inline int add_kprobe_event_legacy(const char* name, bool retprobe)
+{
+	return poke_kprobe_events(true, name, retprobe);
+}
+
+static inline int remove_kprobe_event_legacy(const char* name, bool retprobe)
+{
+	return poke_kprobe_events(false, name, retprobe);
+}
+
 static int bpf_link__detach_perf_event(struct bpf_link *link)
 {
 	int err;
@@ -10152,6 +10197,12 @@  static int bpf_link__detach_perf_event(struct bpf_link *link)
 		err = -errno;
 
 	close(link->fd);
+
+	if (link->legacy.name) {
+		remove_kprobe_event_legacy(link->legacy.name, link->legacy.retprobe);
+		free(link->legacy.name);
+	}
+
 	return libbpf_err(err);
 }
 
@@ -10229,6 +10280,23 @@  static int parse_uint_from_file(const char *file, const char *fmt)
 	return ret;
 }
 
+static bool determine_kprobe_legacy(void)
+{
+	const char *file = "/sys/bus/event_source/devices/kprobe/type";
+
+	return access(file, 0) == 0 ? false : true;
+}
+
+static int determine_kprobe_perf_type_legacy(const char *func_name)
+{
+	char file[96];
+	const char *fname = "/sys/kernel/debug/tracing/events/kprobes/%s/id";
+
+	snprintf(file, sizeof(file), fname, func_name);
+
+	return parse_uint_from_file(file, "%d\n");
+}
+
 static int determine_kprobe_perf_type(void)
 {
 	const char *file = "/sys/bus/event_source/devices/kprobe/type";
@@ -10304,6 +10372,43 @@  static int perf_event_open_probe(bool uprobe, bool retprobe, const char *name,
 	return pfd;
 }
 
+static int perf_event_open_probe_legacy(bool uprobe, bool retprobe, const char *name,
+					uint64_t offset, int pid)
+{
+	struct perf_event_attr attr = {};
+	char errmsg[STRERR_BUFSIZE];
+	int type, pfd, err;
+
+	if (uprobe) // unsupported
+		return -EINVAL;
+
+	if ((err = add_kprobe_event_legacy(name, retprobe)) < 0) {
+		pr_warn("failed to add legacy kprobe event: %s\n",
+		libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
+		return err;
+	}
+	if ((type = determine_kprobe_perf_type_legacy(name)) < 0) {
+		pr_warn("failed to determine legacy kprobe event id: %s\n",
+		libbpf_strerror_r(type, errmsg, sizeof(errmsg)));
+		return type;
+	}
+	attr.size = sizeof(attr);
+	attr.config = type;
+	attr.type = PERF_TYPE_TRACEPOINT;
+
+	pfd = syscall(__NR_perf_event_open, &attr,
+		      pid < 0 ? -1 : pid, /* pid */
+		      pid == -1 ? 0 : -1, /* cpu */
+		      -1 /* group_fd */,  PERF_FLAG_FD_CLOEXEC);
+	if (pfd < 0) {
+		err = -errno;
+		pr_warn("legacy kprobe perf_event_open() failed: %s\n",
+			libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
+		return err;
+	}
+	return pfd;
+}
+
 struct bpf_link *bpf_program__attach_kprobe(struct bpf_program *prog,
 					    bool retprobe,
 					    const char *func_name)
@@ -10311,9 +10416,18 @@  struct bpf_link *bpf_program__attach_kprobe(struct bpf_program *prog,
 	char errmsg[STRERR_BUFSIZE];
 	struct bpf_link *link;
 	int pfd, err;
+	bool legacy = false;
 
-	pfd = perf_event_open_probe(false /* uprobe */, retprobe, func_name,
-				    0 /* offset */, -1 /* pid */);
+	if (!(legacy = determine_kprobe_legacy()))
+		pfd = perf_event_open_probe(false /* uprobe */,
+					    retprobe, func_name,
+					     0 /* offset */,
+					    -1 /* pid */);
+	else
+		pfd = perf_event_open_probe_legacy(false /* uprobe */,
+					    retprobe, func_name,
+					     0 /* offset */,
+					    -1 /* pid */);
 	if (pfd < 0) {
 		pr_warn("prog '%s': failed to create %s '%s' perf event: %s\n",
 			prog->name, retprobe ? "kretprobe" : "kprobe", func_name,
@@ -10329,6 +10443,13 @@  struct bpf_link *bpf_program__attach_kprobe(struct bpf_program *prog,
 			libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
 		return libbpf_err_ptr(err);
 	}
+
+	if (legacy) {
+		/* needed history for the legacy probe cleanup */
+		link->legacy.name = strdup(func_name);
+		link->legacy.retprobe = retprobe;
+	}
+
 	return link;
 }