diff mbox series

[v5,2/9] fprobe: Add ftrace based probe APIs

Message ID 164311271777.1933078.9066058105807126444.stgit@devnote2 (mailing list archive)
State Superseded
Headers show
Series fprobe: Introduce fprobe function entry/exit probe | expand

Checks

Context Check Description
netdev/tree_selection success Not a local patch

Commit Message

Masami Hiramatsu (Google) Jan. 25, 2022, 12:11 p.m. UTC
The fprobe is a wrapper API for ftrace function tracer.
Unlike kprobes, this probes only supports the function entry, but
it can probe multiple functions by one fprobe. The usage is almost
same as the kprobe, user will specify the function names by
fprobe::syms, the number of syms by fprobe::nentry,
and the user handler by fprobe::entry_handler.

struct fprobe fp = { 0 };
const char *targets[] = { "func1", "func2", "func3"};

fp.handler = user_handler;
fp.nentry = ARRAY_SIZE(targets);
fp.syms = targets;

ret = register_fprobe(&fp);

CAUTION: if user entry handler changes registers including
ip address, it will be applied when returns from the
entry handler. So user handler must recover it.

Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
---
 Changes in v4:
  - Fix a memory leak when symbol lookup failed.
  - Use ftrace location address instead of symbol address.
  - Convert the given symbol address to ftrace location automatically.
  - Rename fprobe::ftrace to fprobe::ops.
  - Update the Kconfig description.
---
 include/linux/fprobe.h |   80 ++++++++++++++++++++++++++++
 kernel/trace/Kconfig   |   12 ++++
 kernel/trace/Makefile  |    1 
 kernel/trace/fprobe.c  |  135 ++++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 228 insertions(+)
 create mode 100644 include/linux/fprobe.h
 create mode 100644 kernel/trace/fprobe.c

Comments

Steven Rostedt Jan. 25, 2022, 4:21 p.m. UTC | #1
On Tue, 25 Jan 2022 21:11:57 +0900
Masami Hiramatsu <mhiramat@kernel.org> wrote:

> The fprobe is a wrapper API for ftrace function tracer.
> Unlike kprobes, this probes only supports the function entry, but
> it can probe multiple functions by one fprobe. The usage is almost
> same as the kprobe, user will specify the function names by
> fprobe::syms, the number of syms by fprobe::nentry,
> and the user handler by fprobe::entry_handler.
> 
> struct fprobe fp = { 0 };
> const char *targets[] = { "func1", "func2", "func3"};
> 
> fp.handler = user_handler;
> fp.nentry = ARRAY_SIZE(targets);
> fp.syms = targets;
> 
> ret = register_fprobe(&fp);
> 
> CAUTION: if user entry handler changes registers including
> ip address, it will be applied when returns from the
> entry handler. So user handler must recover it.

Can you rephrase the above, I'm not sure what you mean by it.

> 
> Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
> ---
>  Changes in v4:
>   - Fix a memory leak when symbol lookup failed.
>   - Use ftrace location address instead of symbol address.
>   - Convert the given symbol address to ftrace location automatically.
>   - Rename fprobe::ftrace to fprobe::ops.
>   - Update the Kconfig description.
> ---
>  include/linux/fprobe.h |   80 ++++++++++++++++++++++++++++
>  kernel/trace/Kconfig   |   12 ++++
>  kernel/trace/Makefile  |    1 
>  kernel/trace/fprobe.c  |  135 ++++++++++++++++++++++++++++++++++++++++++++++++
>  4 files changed, 228 insertions(+)
>  create mode 100644 include/linux/fprobe.h
>  create mode 100644 kernel/trace/fprobe.c
> 
> diff --git a/include/linux/fprobe.h b/include/linux/fprobe.h
> new file mode 100644
> index 000000000000..f7de332b08c2
> --- /dev/null
> +++ b/include/linux/fprobe.h
> @@ -0,0 +1,80 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +/* Simple ftrace probe wrapper */
> +#ifndef _LINUX_FPROBE_H
> +#define _LINUX_FPROBE_H
> +
> +#include <linux/compiler.h>
> +#include <linux/ftrace.h>
> +
> +/**
> + * struct fprobe - ftrace based probe.
> + * @syms: The array of symbols to probe.
> + * @addrs: The array of ftrace address of the symbols.
> + * @nentry: The number of entries of @syms or @addrs.
> + * @ops: The ftrace_ops.
> + * @nmissed: The counter for missing events.
> + * @flags: The status flag.
> + * @entry_handler: The callback function for function entry.
> + *
> + * User must set either @syms or @addrs, but not both. If user sets
> + * only @syms, the @addrs are generated when registering the fprobe.
> + * That auto-generated @addrs will be freed when unregistering.
> + */
> +struct fprobe {
> +	const char		**syms;
> +	unsigned long		*addrs;
> +	unsigned int		nentry;
> +
> +	struct ftrace_ops	ops;
> +	unsigned long		nmissed;
> +	unsigned int		flags;
> +	void (*entry_handler)(struct fprobe *fp, unsigned long entry_ip, struct pt_regs *regs);
> +};
> +
> +#define FPROBE_FL_DISABLED	1
> +
> +static inline bool fprobe_disabled(struct fprobe *fp)
> +{
> +	return (fp) ? fp->flags & FPROBE_FL_DISABLED : false;
> +}
> +
> +#ifdef CONFIG_FPROBE
> +int register_fprobe(struct fprobe *fp);
> +int unregister_fprobe(struct fprobe *fp);
> +#else
> +static inline int register_fprobe(struct fprobe *fp)
> +{
> +	return -EOPNOTSUPP;
> +}
> +static inline int unregister_fprobe(struct fprobe *fp)
> +{
> +	return -EOPNOTSUPP;
> +}
> +#endif
> +
> +/**
> + * disable_fprobe() - Disable fprobe
> + * @fp: The fprobe to be disabled.
> + *
> + * This will soft-disable @fp. Note that this doesn't remove the ftrace
> + * hooks from the function entry.
> + */
> +static inline void disable_fprobe(struct fprobe *fp)
> +{
> +	if (fp)
> +		fp->flags |= FPROBE_FL_DISABLED;
> +}
> +
> +/**
> + * enable_fprobe() - Enable fprobe
> + * @fp: The fprobe to be enabled.
> + *
> + * This will soft-enable @fp.
> + */
> +static inline void enable_fprobe(struct fprobe *fp)
> +{
> +	if (fp)
> +		fp->flags &= ~FPROBE_FL_DISABLED;
> +}
> +
> +#endif
> diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
> index 420ff4bc67fd..23483dd474b0 100644
> --- a/kernel/trace/Kconfig
> +++ b/kernel/trace/Kconfig
> @@ -223,6 +223,18 @@ config DYNAMIC_FTRACE_WITH_ARGS
>  	depends on DYNAMIC_FTRACE
>  	depends on HAVE_DYNAMIC_FTRACE_WITH_ARGS
>  
> +config FPROBE
> +	bool "Kernel Function Probe (fprobe)"
> +	depends on FUNCTION_TRACER
> +	depends on DYNAMIC_FTRACE_WITH_REGS
> +	default n
> +	help
> +	  This option enables kernel function probe (fprobe) based on ftrace,
> +	  which is similar to kprobes, but probes only for kernel function
> +	  entries and it can probe multiple functions by one fprobe.
> +
> +	  If unsure, say N.
> +
>  config FUNCTION_PROFILER
>  	bool "Kernel function profiler"
>  	depends on FUNCTION_TRACER
> diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
> index bedc5caceec7..79255f9de9a4 100644
> --- a/kernel/trace/Makefile
> +++ b/kernel/trace/Makefile
> @@ -97,6 +97,7 @@ obj-$(CONFIG_PROBE_EVENTS) += trace_probe.o
>  obj-$(CONFIG_UPROBE_EVENTS) += trace_uprobe.o
>  obj-$(CONFIG_BOOTTIME_TRACING) += trace_boot.o
>  obj-$(CONFIG_FTRACE_RECORD_RECURSION) += trace_recursion_record.o
> +obj-$(CONFIG_FPROBE) += fprobe.o
>  
>  obj-$(CONFIG_TRACEPOINT_BENCHMARK) += trace_benchmark.o
>  
> diff --git a/kernel/trace/fprobe.c b/kernel/trace/fprobe.c
> new file mode 100644
> index 000000000000..748cc34765c1
> --- /dev/null
> +++ b/kernel/trace/fprobe.c
> @@ -0,0 +1,135 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * fprobe - Simple ftrace probe wrapper for function entry.
> + */
> +#define pr_fmt(fmt) "fprobe: " fmt
> +
> +#include <linux/fprobe.h>
> +#include <linux/kallsyms.h>
> +#include <linux/kprobes.h>
> +#include <linux/slab.h>
> +#include <linux/sort.h>
> +
> +static void fprobe_handler(unsigned long ip, unsigned long parent_ip,
> +			   struct ftrace_ops *ops, struct ftrace_regs *fregs)
> +{
> +	struct fprobe *fp;
> +	int bit;
> +
> +	fp = container_of(ops, struct fprobe, ops);
> +	if (fprobe_disabled(fp))
> +		return;
> +
> +	bit = ftrace_test_recursion_trylock(ip, parent_ip);
> +	if (bit < 0) {
> +		fp->nmissed++;
> +		return;
> +	}
> +
> +	if (fp->entry_handler)
> +		fp->entry_handler(fp, ip, ftrace_get_regs(fregs));
> +
> +	ftrace_test_recursion_unlock(bit);
> +}
> +NOKPROBE_SYMBOL(fprobe_handler);
> +
> +/* Convert ftrace location address from symbols */
> +static int convert_func_addresses(struct fprobe *fp)
> +{
> +	unsigned long addr, size;
> +	unsigned int i;
> +
> +	/* Convert symbols to symbol address */
> +	if (fp->syms) {
> +		fp->addrs = kcalloc(fp->nentry, sizeof(*fp->addrs), GFP_KERNEL);
> +		if (!fp->addrs)
> +			return -ENOMEM;
> +
> +		for (i = 0; i < fp->nentry; i++) {
> +			fp->addrs[i] = kallsyms_lookup_name(fp->syms[i]);
> +			if (!fp->addrs[i])	/* Maybe wrong symbol */
> +				goto error;
> +		}
> +	}

I wonder if we should just copy the addrs when fp->syms is not set, and
not have to worry about not freeing addrs (see below). This will make
things easier to maintain. Or better yet, have the syms and addrs passed
in, and then we assign it.

static int convert_func_addresses(struct fprobe *fp, const char **syms,
				  unsigned long *addrs)
{
	unsigned int i;

	fp->addrs = kcalloc(fp->nentry, sizeof(*fp->addrs), GFP_KERNEL);
	if (!fp->addrs)
		return -ENOMEM;

	if (syms) {
		for (i = 0; i < fp->nentry; i++) {
			fp->addrs[i] = kallsyms_lookup_name(fp->syms[i]);
			if (!fp->addrs[i])	/* Maybe wrong symbol */
				goto error;
		}
	} else {
		memcpy(fp->addrs, addrs, fp->nentry * sizeof(*addrs));
	}

> +
> +	/* Convert symbol address to ftrace location. */
> +	for (i = 0; i < fp->nentry; i++) {
> +		if (!kallsyms_lookup_size_offset(fp->addrs[i], &size, NULL))
> +			size = MCOUNT_INSN_SIZE;
> +		addr = ftrace_location_range(fp->addrs[i], fp->addrs[i] + size);
> +		if (!addr) /* No dynamic ftrace there. */
> +			goto error;
> +		fp->addrs[i] = addr;
> +	}
> +
> +	return 0;
> +
> +error:
> +	kfree(fp->addrs);

The above doesn't check if fp->syms was set, so if it wasn't we just freed
the addrs that was passed in. Again, I think these should be passed into
the register function as separate parameters and not via the fp handle.

> +	fp->addrs = NULL;
> +	return -ENOENT;
> +}
> +
> +/**
> + * register_fprobe() - Register fprobe to ftrace
> + * @fp: A fprobe data structure to be registered.
> + *
> + * This expects the user set @fp::entry_handler, @fp::syms or @fp:addrs,
> + * and @fp::nentry. If @fp::addrs are set, that will be updated to point
> + * the ftrace location. If @fp::addrs are NULL, this will generate it
> + * from @fp::syms.
> + * Note that you do not set both of @fp::addrs and @fp::syms.

Again, I think this should pass in the syms and addrs as parameters.

-- Steve

> + */
> +int register_fprobe(struct fprobe *fp)
> +{
> +	int ret;
> +
> +	if (!fp || !fp->nentry || (!fp->syms && !fp->addrs) ||
> +	    (fp->syms && fp->addrs))
> +		return -EINVAL;
> +
> +	ret = convert_func_addresses(fp);
> +	if (ret < 0)
> +		return ret;
> +
> +	fp->nmissed = 0;
> +	fp->ops.func = fprobe_handler;
> +	fp->ops.flags = FTRACE_OPS_FL_SAVE_REGS;
> +
> +	ret = ftrace_set_filter_ips(&fp->ops, fp->addrs, fp->nentry, 0, 0);
> +	if (!ret)
> +		ret = register_ftrace_function(&fp->ops);
> +
> +	if (ret < 0 && fp->syms) {
> +		kfree(fp->addrs);
> +		fp->addrs = NULL;
> +	}
> +
> +	return ret;
> +}
> +EXPORT_SYMBOL_GPL(register_fprobe);
> +
> +/**
> + * unregister_fprobe() - Unregister fprobe from ftrace
> + * @fp: A fprobe data structure to be unregistered.
> + *
> + * Unregister fprobe (and remove ftrace hooks from the function entries).
> + * If the @fp::addrs are generated by register_fprobe(), it will be removed
> + * automatically.
> + */
> +int unregister_fprobe(struct fprobe *fp)
> +{
> +	int ret;
> +
> +	if (!fp || !fp->nentry || !fp->addrs)
> +		return -EINVAL;
> +
> +	ret = unregister_ftrace_function(&fp->ops);
> +
> +	if (!ret && fp->syms) {
> +		kfree(fp->addrs);
> +		fp->addrs = NULL;
> +	}
> +	return ret;
> +}
> +EXPORT_SYMBOL_GPL(unregister_fprobe);
Jiri Olsa Jan. 25, 2022, 4:41 p.m. UTC | #2
On Tue, Jan 25, 2022 at 09:11:57PM +0900, Masami Hiramatsu wrote:

SNIP

> +
> +/* Convert ftrace location address from symbols */
> +static int convert_func_addresses(struct fprobe *fp)
> +{
> +	unsigned long addr, size;
> +	unsigned int i;
> +
> +	/* Convert symbols to symbol address */
> +	if (fp->syms) {
> +		fp->addrs = kcalloc(fp->nentry, sizeof(*fp->addrs), GFP_KERNEL);
> +		if (!fp->addrs)
> +			return -ENOMEM;
> +
> +		for (i = 0; i < fp->nentry; i++) {
> +			fp->addrs[i] = kallsyms_lookup_name(fp->syms[i]);
> +			if (!fp->addrs[i])	/* Maybe wrong symbol */
> +				goto error;
> +		}
> +	}
> +
> +	/* Convert symbol address to ftrace location. */
> +	for (i = 0; i < fp->nentry; i++) {
> +		if (!kallsyms_lookup_size_offset(fp->addrs[i], &size, NULL))
> +			size = MCOUNT_INSN_SIZE;
> +		addr = ftrace_location_range(fp->addrs[i], fp->addrs[i] + size);

you need to substract 1 from 'end' in here, as explained in
__within_notrace_func comment:

        /*
         * Since ftrace_location_range() does inclusive range check, we need
         * to subtract 1 byte from the end address.
         */

like in the patch below

also this convert is for archs where address from kallsyms does not match
the real attach addresss, like for arm you mentioned earlier, right?

could we have that arch specific, so we don't have extra heavy search
loop for archs that do not need it?

thanks,
jirka


---
diff --git a/kernel/trace/fprobe.c b/kernel/trace/fprobe.c
index 4d089dda89c2..7970418820e7 100644
--- a/kernel/trace/fprobe.c
+++ b/kernel/trace/fprobe.c
@@ -91,7 +91,7 @@ static int convert_func_addresses(struct fprobe *fp)
 	for (i = 0; i < fp->nentry; i++) {
 		if (!kallsyms_lookup_size_offset(fp->addrs[i], &size, NULL))
 			size = MCOUNT_INSN_SIZE;
-		addr = ftrace_location_range(fp->addrs[i], fp->addrs[i] + size);
+		addr = ftrace_location_range(fp->addrs[i], fp->addrs[i] + size - 1);
 		if (!addr) /* No dynamic ftrace there. */
 			goto error;
 		fp->addrs[i] = addr;
Jiri Olsa Jan. 25, 2022, 6:11 p.m. UTC | #3
On Tue, Jan 25, 2022 at 05:41:24PM +0100, Jiri Olsa wrote:
> On Tue, Jan 25, 2022 at 09:11:57PM +0900, Masami Hiramatsu wrote:
> 
> SNIP
> 
> > +
> > +/* Convert ftrace location address from symbols */
> > +static int convert_func_addresses(struct fprobe *fp)
> > +{
> > +	unsigned long addr, size;
> > +	unsigned int i;
> > +
> > +	/* Convert symbols to symbol address */
> > +	if (fp->syms) {
> > +		fp->addrs = kcalloc(fp->nentry, sizeof(*fp->addrs), GFP_KERNEL);
> > +		if (!fp->addrs)
> > +			return -ENOMEM;
> > +
> > +		for (i = 0; i < fp->nentry; i++) {
> > +			fp->addrs[i] = kallsyms_lookup_name(fp->syms[i]);
> > +			if (!fp->addrs[i])	/* Maybe wrong symbol */
> > +				goto error;
> > +		}
> > +	}
> > +
> > +	/* Convert symbol address to ftrace location. */
> > +	for (i = 0; i < fp->nentry; i++) {
> > +		if (!kallsyms_lookup_size_offset(fp->addrs[i], &size, NULL))
> > +			size = MCOUNT_INSN_SIZE;
> > +		addr = ftrace_location_range(fp->addrs[i], fp->addrs[i] + size);
> 
> you need to substract 1 from 'end' in here, as explained in
> __within_notrace_func comment:
> 
>         /*
>          * Since ftrace_location_range() does inclusive range check, we need
>          * to subtract 1 byte from the end address.
>          */
> 
> like in the patch below
> 
> also this convert is for archs where address from kallsyms does not match
> the real attach addresss, like for arm you mentioned earlier, right?
> 
> could we have that arch specific, so we don't have extra heavy search
> loop for archs that do not need it?

one more question..

I'm adding support for user to pass function symbols to bpf fprobe link
and I thought I'd pass symbols array to register_fprobe, but I'd need to
copy the whole array of strings from user space first, which could take
lot of memory considering attachment of 10k+ functions

so I'm thinking better way is to resolve symbols already in bpf fprobe
link code and pass just addresses to register_fprobe

I assume you want to keep symbol interface, right? could we have some
flag ensuring the conversion code is skipped, so we don't go through
it twice?

in any case I need addresses before I call register_fprobe, because
of the bpf cookies setup

thanks,
jirka
Masami Hiramatsu (Google) Jan. 26, 2022, 2:50 a.m. UTC | #4
On Tue, 25 Jan 2022 19:11:52 +0100
Jiri Olsa <jolsa@redhat.com> wrote:

> On Tue, Jan 25, 2022 at 05:41:24PM +0100, Jiri Olsa wrote:
> > On Tue, Jan 25, 2022 at 09:11:57PM +0900, Masami Hiramatsu wrote:
> > 
> > SNIP
> > 
> > > +
> > > +/* Convert ftrace location address from symbols */
> > > +static int convert_func_addresses(struct fprobe *fp)
> > > +{
> > > +	unsigned long addr, size;
> > > +	unsigned int i;
> > > +
> > > +	/* Convert symbols to symbol address */
> > > +	if (fp->syms) {
> > > +		fp->addrs = kcalloc(fp->nentry, sizeof(*fp->addrs), GFP_KERNEL);
> > > +		if (!fp->addrs)
> > > +			return -ENOMEM;
> > > +
> > > +		for (i = 0; i < fp->nentry; i++) {
> > > +			fp->addrs[i] = kallsyms_lookup_name(fp->syms[i]);
> > > +			if (!fp->addrs[i])	/* Maybe wrong symbol */
> > > +				goto error;
> > > +		}
> > > +	}
> > > +
> > > +	/* Convert symbol address to ftrace location. */
> > > +	for (i = 0; i < fp->nentry; i++) {
> > > +		if (!kallsyms_lookup_size_offset(fp->addrs[i], &size, NULL))
> > > +			size = MCOUNT_INSN_SIZE;
> > > +		addr = ftrace_location_range(fp->addrs[i], fp->addrs[i] + size);
> > 
> > you need to substract 1 from 'end' in here, as explained in
> > __within_notrace_func comment:
> > 
> >         /*
> >          * Since ftrace_location_range() does inclusive range check, we need
> >          * to subtract 1 byte from the end address.
> >          */
> > 
> > like in the patch below

Good catch, I missed that point.

> > also this convert is for archs where address from kallsyms does not match
> > the real attach addresss, like for arm you mentioned earlier, right?

Yes, that's right.

> > 
> > could we have that arch specific, so we don't have extra heavy search
> > loop for archs that do not need it?

Hmm, is that so heavy? Even though, this kind of convertion is better
to be implemented in the arch-specific ftrace. They may know the
best way to do, because the offset is fixed for each arch.

> 
> one more question..
> 
> I'm adding support for user to pass function symbols to bpf fprobe link
> and I thought I'd pass symbols array to register_fprobe, but I'd need to
> copy the whole array of strings from user space first, which could take
> lot of memory considering attachment of 10k+ functions
> 
> so I'm thinking better way is to resolve symbols already in bpf fprobe
> link code and pass just addresses to register_fprobe

That is OK. Fprobe accepts either ::syms or ::addrs.

> 
> I assume you want to keep symbol interface, right? could we have some
> flag ensuring the conversion code is skipped, so we don't go through
> it twice?

Yeah, we still have many unused bits in fprobe::flags. :)

Thank you,

> in any case I need addresses before I call register_fprobe, because
> of the bpf cookies setup
> 
> thanks,
> jirka
>
Masami Hiramatsu (Google) Jan. 26, 2022, 9:06 a.m. UTC | #5
On Tue, 25 Jan 2022 11:21:23 -0500
Steven Rostedt <rostedt@goodmis.org> wrote:

> On Tue, 25 Jan 2022 21:11:57 +0900
> Masami Hiramatsu <mhiramat@kernel.org> wrote:
> 
> > The fprobe is a wrapper API for ftrace function tracer.
> > Unlike kprobes, this probes only supports the function entry, but
> > it can probe multiple functions by one fprobe. The usage is almost
> > same as the kprobe, user will specify the function names by
> > fprobe::syms, the number of syms by fprobe::nentry,
> > and the user handler by fprobe::entry_handler.
> > 
> > struct fprobe fp = { 0 };
> > const char *targets[] = { "func1", "func2", "func3"};
> > 
> > fp.handler = user_handler;
> > fp.nentry = ARRAY_SIZE(targets);
> > fp.syms = targets;
> > 
> > ret = register_fprobe(&fp);
> > 
> > CAUTION: if user entry handler changes registers including
> > ip address, it will be applied when returns from the
> > entry handler. So user handler must recover it.
> 
> Can you rephrase the above, I'm not sure what you mean by it.

OK, but I think this should be written in the document.
I meant entry_handler can change the regs, but that will change
the execution path. So for some reason if it needs to change the
registers, those should be recovered in the same entry_handler.

[SNIP]
> > +/* Convert ftrace location address from symbols */
> > +static int convert_func_addresses(struct fprobe *fp)
> > +{
> > +	unsigned long addr, size;
> > +	unsigned int i;
> > +
> > +	/* Convert symbols to symbol address */
> > +	if (fp->syms) {
> > +		fp->addrs = kcalloc(fp->nentry, sizeof(*fp->addrs), GFP_KERNEL);
> > +		if (!fp->addrs)
> > +			return -ENOMEM;
> > +
> > +		for (i = 0; i < fp->nentry; i++) {
> > +			fp->addrs[i] = kallsyms_lookup_name(fp->syms[i]);
> > +			if (!fp->addrs[i])	/* Maybe wrong symbol */
> > +				goto error;
> > +		}
> > +	}
> 
> I wonder if we should just copy the addrs when fp->syms is not set, and
> not have to worry about not freeing addrs (see below). This will make
> things easier to maintain. Or better yet, have the syms and addrs passed
> in, and then we assign it.
> 
> static int convert_func_addresses(struct fprobe *fp, const char **syms,
> 				  unsigned long *addrs)
> {
> 	unsigned int i;
> 
> 	fp->addrs = kcalloc(fp->nentry, sizeof(*fp->addrs), GFP_KERNEL);
> 	if (!fp->addrs)
> 		return -ENOMEM;
> 
> 	if (syms) {
> 		for (i = 0; i < fp->nentry; i++) {
> 			fp->addrs[i] = kallsyms_lookup_name(fp->syms[i]);
> 			if (!fp->addrs[i])	/* Maybe wrong symbol */
> 				goto error;
> 		}
> 	} else {
> 		memcpy(fp->addrs, addrs, fp->nentry * sizeof(*addrs));
> 	}

Actually, since fprobe doesn't touch the addrs and syms except for the
registering time, instead of changing the fp->addrs, I would like
to make a temporary address array just for ftrace_filter_ips(). Then
we don't need to free it later.

> 
> > +
> > +	/* Convert symbol address to ftrace location. */
> > +	for (i = 0; i < fp->nentry; i++) {
> > +		if (!kallsyms_lookup_size_offset(fp->addrs[i], &size, NULL))
> > +			size = MCOUNT_INSN_SIZE;
> > +		addr = ftrace_location_range(fp->addrs[i], fp->addrs[i] + size);
> > +		if (!addr) /* No dynamic ftrace there. */
> > +			goto error;
> > +		fp->addrs[i] = addr;
> > +	}
> > +
> > +	return 0;
> > +
> > +error:
> > +	kfree(fp->addrs);
> 
> The above doesn't check if fp->syms was set, so if it wasn't we just freed
> the addrs that was passed in. Again, I think these should be passed into
> the register function as separate parameters and not via the fp handle.

Agreed. I also would like to remove those params from struct fprobe.

> 
> > +	fp->addrs = NULL;
> > +	return -ENOENT;
> > +}
> > +
> > +/**
> > + * register_fprobe() - Register fprobe to ftrace
> > + * @fp: A fprobe data structure to be registered.
> > + *
> > + * This expects the user set @fp::entry_handler, @fp::syms or @fp:addrs,
> > + * and @fp::nentry. If @fp::addrs are set, that will be updated to point
> > + * the ftrace location. If @fp::addrs are NULL, this will generate it
> > + * from @fp::syms.
> > + * Note that you do not set both of @fp::addrs and @fp::syms.
> 
> Again, I think this should pass in the syms and addrs as parameters.

That's good to me :)

Thank you,

> 
> -- Steve
> 
> > + */
> > +int register_fprobe(struct fprobe *fp)
> > +{
> > +	int ret;
> > +
> > +	if (!fp || !fp->nentry || (!fp->syms && !fp->addrs) ||
> > +	    (fp->syms && fp->addrs))
> > +		return -EINVAL;
> > +
> > +	ret = convert_func_addresses(fp);
> > +	if (ret < 0)
> > +		return ret;
> > +
> > +	fp->nmissed = 0;
> > +	fp->ops.func = fprobe_handler;
> > +	fp->ops.flags = FTRACE_OPS_FL_SAVE_REGS;
> > +
> > +	ret = ftrace_set_filter_ips(&fp->ops, fp->addrs, fp->nentry, 0, 0);
> > +	if (!ret)
> > +		ret = register_ftrace_function(&fp->ops);
> > +
> > +	if (ret < 0 && fp->syms) {
> > +		kfree(fp->addrs);
> > +		fp->addrs = NULL;
> > +	}
> > +
> > +	return ret;
> > +}
> > +EXPORT_SYMBOL_GPL(register_fprobe);
> > +
> > +/**
> > + * unregister_fprobe() - Unregister fprobe from ftrace
> > + * @fp: A fprobe data structure to be unregistered.
> > + *
> > + * Unregister fprobe (and remove ftrace hooks from the function entries).
> > + * If the @fp::addrs are generated by register_fprobe(), it will be removed
> > + * automatically.
> > + */
> > +int unregister_fprobe(struct fprobe *fp)
> > +{
> > +	int ret;
> > +
> > +	if (!fp || !fp->nentry || !fp->addrs)
> > +		return -EINVAL;
> > +
> > +	ret = unregister_ftrace_function(&fp->ops);
> > +
> > +	if (!ret && fp->syms) {
> > +		kfree(fp->addrs);
> > +		fp->addrs = NULL;
> > +	}
> > +	return ret;
> > +}
> > +EXPORT_SYMBOL_GPL(unregister_fprobe);
>
Masami Hiramatsu (Google) Jan. 26, 2022, 3:59 p.m. UTC | #6
On Wed, 26 Jan 2022 11:50:22 +0900
Masami Hiramatsu <mhiramat@kernel.org> wrote:

> > one more question..
> > 
> > I'm adding support for user to pass function symbols to bpf fprobe link
> > and I thought I'd pass symbols array to register_fprobe, but I'd need to
> > copy the whole array of strings from user space first, which could take
> > lot of memory considering attachment of 10k+ functions
> > 
> > so I'm thinking better way is to resolve symbols already in bpf fprobe
> > link code and pass just addresses to register_fprobe
> 
> That is OK. Fprobe accepts either ::syms or ::addrs.
> 
> > 
> > I assume you want to keep symbol interface, right? could we have some
> > flag ensuring the conversion code is skipped, so we don't go through
> > it twice?
> 
> Yeah, we still have many unused bits in fprobe::flags. :)

Instead of that, according to Steve's comment, I would like to introduce
3 registration APIs.

int register_fprobe(struct fprobe *fp, const char *filter, const char *notrace);
int register_fprobe_ips(struct fprobe *fp, unsigned long *addrs, int num);
int register_fprobe_syms(struct fprobe *fp, const char **syms, int num);

The register_fprobe_ips() will not touch the @addrs. You have to set the
correct ftrace location address in the @addrs.

Thank you,
Jiri Olsa Jan. 26, 2022, 6:39 p.m. UTC | #7
On Thu, Jan 27, 2022 at 12:59:52AM +0900, Masami Hiramatsu wrote:
> On Wed, 26 Jan 2022 11:50:22 +0900
> Masami Hiramatsu <mhiramat@kernel.org> wrote:
> 
> > > one more question..
> > > 
> > > I'm adding support for user to pass function symbols to bpf fprobe link
> > > and I thought I'd pass symbols array to register_fprobe, but I'd need to
> > > copy the whole array of strings from user space first, which could take
> > > lot of memory considering attachment of 10k+ functions
> > > 
> > > so I'm thinking better way is to resolve symbols already in bpf fprobe
> > > link code and pass just addresses to register_fprobe
> > 
> > That is OK. Fprobe accepts either ::syms or ::addrs.
> > 
> > > 
> > > I assume you want to keep symbol interface, right? could we have some
> > > flag ensuring the conversion code is skipped, so we don't go through
> > > it twice?
> > 
> > Yeah, we still have many unused bits in fprobe::flags. :)
> 
> Instead of that, according to Steve's comment, I would like to introduce
> 3 registration APIs.
> 
> int register_fprobe(struct fprobe *fp, const char *filter, const char *notrace);
> int register_fprobe_ips(struct fprobe *fp, unsigned long *addrs, int num);
> int register_fprobe_syms(struct fprobe *fp, const char **syms, int num);
> 
> The register_fprobe_ips() will not touch the @addrs. You have to set the
> correct ftrace location address in the @addrs.

ok, sounds good

thanks,
jirka
diff mbox series

Patch

diff --git a/include/linux/fprobe.h b/include/linux/fprobe.h
new file mode 100644
index 000000000000..f7de332b08c2
--- /dev/null
+++ b/include/linux/fprobe.h
@@ -0,0 +1,80 @@ 
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Simple ftrace probe wrapper */
+#ifndef _LINUX_FPROBE_H
+#define _LINUX_FPROBE_H
+
+#include <linux/compiler.h>
+#include <linux/ftrace.h>
+
+/**
+ * struct fprobe - ftrace based probe.
+ * @syms: The array of symbols to probe.
+ * @addrs: The array of ftrace address of the symbols.
+ * @nentry: The number of entries of @syms or @addrs.
+ * @ops: The ftrace_ops.
+ * @nmissed: The counter for missing events.
+ * @flags: The status flag.
+ * @entry_handler: The callback function for function entry.
+ *
+ * User must set either @syms or @addrs, but not both. If user sets
+ * only @syms, the @addrs are generated when registering the fprobe.
+ * That auto-generated @addrs will be freed when unregistering.
+ */
+struct fprobe {
+	const char		**syms;
+	unsigned long		*addrs;
+	unsigned int		nentry;
+
+	struct ftrace_ops	ops;
+	unsigned long		nmissed;
+	unsigned int		flags;
+	void (*entry_handler)(struct fprobe *fp, unsigned long entry_ip, struct pt_regs *regs);
+};
+
+#define FPROBE_FL_DISABLED	1
+
+static inline bool fprobe_disabled(struct fprobe *fp)
+{
+	return (fp) ? fp->flags & FPROBE_FL_DISABLED : false;
+}
+
+#ifdef CONFIG_FPROBE
+int register_fprobe(struct fprobe *fp);
+int unregister_fprobe(struct fprobe *fp);
+#else
+static inline int register_fprobe(struct fprobe *fp)
+{
+	return -EOPNOTSUPP;
+}
+static inline int unregister_fprobe(struct fprobe *fp)
+{
+	return -EOPNOTSUPP;
+}
+#endif
+
+/**
+ * disable_fprobe() - Disable fprobe
+ * @fp: The fprobe to be disabled.
+ *
+ * This will soft-disable @fp. Note that this doesn't remove the ftrace
+ * hooks from the function entry.
+ */
+static inline void disable_fprobe(struct fprobe *fp)
+{
+	if (fp)
+		fp->flags |= FPROBE_FL_DISABLED;
+}
+
+/**
+ * enable_fprobe() - Enable fprobe
+ * @fp: The fprobe to be enabled.
+ *
+ * This will soft-enable @fp.
+ */
+static inline void enable_fprobe(struct fprobe *fp)
+{
+	if (fp)
+		fp->flags &= ~FPROBE_FL_DISABLED;
+}
+
+#endif
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 420ff4bc67fd..23483dd474b0 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -223,6 +223,18 @@  config DYNAMIC_FTRACE_WITH_ARGS
 	depends on DYNAMIC_FTRACE
 	depends on HAVE_DYNAMIC_FTRACE_WITH_ARGS
 
+config FPROBE
+	bool "Kernel Function Probe (fprobe)"
+	depends on FUNCTION_TRACER
+	depends on DYNAMIC_FTRACE_WITH_REGS
+	default n
+	help
+	  This option enables kernel function probe (fprobe) based on ftrace,
+	  which is similar to kprobes, but probes only for kernel function
+	  entries and it can probe multiple functions by one fprobe.
+
+	  If unsure, say N.
+
 config FUNCTION_PROFILER
 	bool "Kernel function profiler"
 	depends on FUNCTION_TRACER
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index bedc5caceec7..79255f9de9a4 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -97,6 +97,7 @@  obj-$(CONFIG_PROBE_EVENTS) += trace_probe.o
 obj-$(CONFIG_UPROBE_EVENTS) += trace_uprobe.o
 obj-$(CONFIG_BOOTTIME_TRACING) += trace_boot.o
 obj-$(CONFIG_FTRACE_RECORD_RECURSION) += trace_recursion_record.o
+obj-$(CONFIG_FPROBE) += fprobe.o
 
 obj-$(CONFIG_TRACEPOINT_BENCHMARK) += trace_benchmark.o
 
diff --git a/kernel/trace/fprobe.c b/kernel/trace/fprobe.c
new file mode 100644
index 000000000000..748cc34765c1
--- /dev/null
+++ b/kernel/trace/fprobe.c
@@ -0,0 +1,135 @@ 
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * fprobe - Simple ftrace probe wrapper for function entry.
+ */
+#define pr_fmt(fmt) "fprobe: " fmt
+
+#include <linux/fprobe.h>
+#include <linux/kallsyms.h>
+#include <linux/kprobes.h>
+#include <linux/slab.h>
+#include <linux/sort.h>
+
+static void fprobe_handler(unsigned long ip, unsigned long parent_ip,
+			   struct ftrace_ops *ops, struct ftrace_regs *fregs)
+{
+	struct fprobe *fp;
+	int bit;
+
+	fp = container_of(ops, struct fprobe, ops);
+	if (fprobe_disabled(fp))
+		return;
+
+	bit = ftrace_test_recursion_trylock(ip, parent_ip);
+	if (bit < 0) {
+		fp->nmissed++;
+		return;
+	}
+
+	if (fp->entry_handler)
+		fp->entry_handler(fp, ip, ftrace_get_regs(fregs));
+
+	ftrace_test_recursion_unlock(bit);
+}
+NOKPROBE_SYMBOL(fprobe_handler);
+
+/* Convert ftrace location address from symbols */
+static int convert_func_addresses(struct fprobe *fp)
+{
+	unsigned long addr, size;
+	unsigned int i;
+
+	/* Convert symbols to symbol address */
+	if (fp->syms) {
+		fp->addrs = kcalloc(fp->nentry, sizeof(*fp->addrs), GFP_KERNEL);
+		if (!fp->addrs)
+			return -ENOMEM;
+
+		for (i = 0; i < fp->nentry; i++) {
+			fp->addrs[i] = kallsyms_lookup_name(fp->syms[i]);
+			if (!fp->addrs[i])	/* Maybe wrong symbol */
+				goto error;
+		}
+	}
+
+	/* Convert symbol address to ftrace location. */
+	for (i = 0; i < fp->nentry; i++) {
+		if (!kallsyms_lookup_size_offset(fp->addrs[i], &size, NULL))
+			size = MCOUNT_INSN_SIZE;
+		addr = ftrace_location_range(fp->addrs[i], fp->addrs[i] + size);
+		if (!addr) /* No dynamic ftrace there. */
+			goto error;
+		fp->addrs[i] = addr;
+	}
+
+	return 0;
+
+error:
+	kfree(fp->addrs);
+	fp->addrs = NULL;
+	return -ENOENT;
+}
+
+/**
+ * register_fprobe() - Register fprobe to ftrace
+ * @fp: A fprobe data structure to be registered.
+ *
+ * This expects the user set @fp::entry_handler, @fp::syms or @fp:addrs,
+ * and @fp::nentry. If @fp::addrs are set, that will be updated to point
+ * the ftrace location. If @fp::addrs are NULL, this will generate it
+ * from @fp::syms.
+ * Note that you do not set both of @fp::addrs and @fp::syms.
+ */
+int register_fprobe(struct fprobe *fp)
+{
+	int ret;
+
+	if (!fp || !fp->nentry || (!fp->syms && !fp->addrs) ||
+	    (fp->syms && fp->addrs))
+		return -EINVAL;
+
+	ret = convert_func_addresses(fp);
+	if (ret < 0)
+		return ret;
+
+	fp->nmissed = 0;
+	fp->ops.func = fprobe_handler;
+	fp->ops.flags = FTRACE_OPS_FL_SAVE_REGS;
+
+	ret = ftrace_set_filter_ips(&fp->ops, fp->addrs, fp->nentry, 0, 0);
+	if (!ret)
+		ret = register_ftrace_function(&fp->ops);
+
+	if (ret < 0 && fp->syms) {
+		kfree(fp->addrs);
+		fp->addrs = NULL;
+	}
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(register_fprobe);
+
+/**
+ * unregister_fprobe() - Unregister fprobe from ftrace
+ * @fp: A fprobe data structure to be unregistered.
+ *
+ * Unregister fprobe (and remove ftrace hooks from the function entries).
+ * If the @fp::addrs are generated by register_fprobe(), it will be removed
+ * automatically.
+ */
+int unregister_fprobe(struct fprobe *fp)
+{
+	int ret;
+
+	if (!fp || !fp->nentry || !fp->addrs)
+		return -EINVAL;
+
+	ret = unregister_ftrace_function(&fp->ops);
+
+	if (!ret && fp->syms) {
+		kfree(fp->addrs);
+		fp->addrs = NULL;
+	}
+	return ret;
+}
+EXPORT_SYMBOL_GPL(unregister_fprobe);