diff mbox series

[10/20] function_graph: Have the instances use their own ftrace_ops for filtering

Message ID 20240525023742.786834257@goodmis.org (mailing list archive)
State Superseded
Headers show
Series [01/20] function_graph: Convert ret_stack to a series of longs | expand

Commit Message

Steven Rostedt May 25, 2024, 2:37 a.m. UTC
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>

Allow for instances to have their own ftrace_ops part of the fgraph_ops
that makes the funtion_graph tracer filter on the set_ftrace_filter file
of the instance and not the top instance.

Note that this also requires to update ftrace_graph_func() to call new
function_graph_enter_ops() instead of function_graph_enter() so that
it avoid pushing on shadow stack multiple times on the same function.

Co-developed with Masami Hiramatsu:
Link: https://lore.kernel.org/linux-trace-kernel/171509102088.162236.15758883237657317789.stgit@devnote2

Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 arch/arm64/kernel/ftrace.c           |  21 ++++-
 arch/loongarch/kernel/ftrace_dyn.c   |  15 ++-
 arch/powerpc/kernel/trace/ftrace.c   |   3 +-
 arch/riscv/kernel/ftrace.c           |  15 ++-
 arch/x86/kernel/ftrace.c             |  19 +++-
 include/linux/ftrace.h               |   6 ++
 kernel/trace/fgraph.c                | 131 ++++++++++++++++++++-------
 kernel/trace/ftrace.c                |   4 +-
 kernel/trace/trace.h                 |  16 ++--
 kernel/trace/trace_functions.c       |   2 +-
 kernel/trace/trace_functions_graph.c |   8 +-
 11 files changed, 190 insertions(+), 50 deletions(-)

Comments

Steven Rostedt May 31, 2024, 2:30 a.m. UTC | #1
On Fri, 24 May 2024 22:37:02 -0400
Steven Rostedt <rostedt@goodmis.org> wrote:

> From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
> 
> Allow for instances to have their own ftrace_ops part of the fgraph_ops
> that makes the funtion_graph tracer filter on the set_ftrace_filter file
> of the instance and not the top instance.
> 
> Note that this also requires to update ftrace_graph_func() to call new
> function_graph_enter_ops() instead of function_graph_enter() so that
> it avoid pushing on shadow stack multiple times on the same function.

So I found a major design flaw in this patch.

> 
> Co-developed with Masami Hiramatsu:
> Link: https://lore.kernel.org/linux-trace-kernel/171509102088.162236.15758883237657317789.stgit@devnote2
> 
> Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
> Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
> Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
> ---

> diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
> index 8da0e66ca22d..998558cb8f15 100644
> --- a/arch/x86/kernel/ftrace.c
> +++ b/arch/x86/kernel/ftrace.c
> @@ -648,9 +648,24 @@ void ftrace_graph_func(unsigned long ip, unsigned long parent_ip,
>  		       struct ftrace_ops *op, struct ftrace_regs *fregs)
>  {
>  	struct pt_regs *regs = &fregs->regs;
> -	unsigned long *stack = (unsigned long *)kernel_stack_pointer(regs);
> +	unsigned long *parent = (unsigned long *)kernel_stack_pointer(regs);
> +	struct fgraph_ops *gops = container_of(op, struct fgraph_ops, ops);
> +	int bit;
> +
> +	if (unlikely(ftrace_graph_is_dead()))
> +		return;
> +
> +	if (unlikely(atomic_read(&current->tracing_graph_pause)))
> +		return;
>  
> -	prepare_ftrace_return(ip, (unsigned long *)stack, 0);
> +	bit = ftrace_test_recursion_trylock(ip, *parent);
> +	if (bit < 0)
> +		return;
> +
> +	if (!function_graph_enter_ops(*parent, ip, 0, parent, gops))

So each registered graph ops has its own ftrace_ops which gets
registered with ftrace, so this function does get called in a loop (by
the ftrace iterator function). This means that we would need that code
to detect the function_graph_enter_ops() getting called multiple times
for the same function. This means each fgraph_ops gits its own retstack
on the shadow stack.

I find this a waste of shadow stack resources, and also complicates the
code with having to deal with tail calls and all that.

BUT! There's good news! I also thought about another way of handling
this. I have something working, but requires a bit of rewriting the
code. I should have something out in a day or two.

-- Steve


> +		*parent = (unsigned long)&return_to_handler;
> +
> +	ftrace_test_recursion_unlock(bit);
>  }
>  #endif
>
Masami Hiramatsu (Google) May 31, 2024, 3:12 a.m. UTC | #2
On Thu, 30 May 2024 22:30:57 -0400
Steven Rostedt <rostedt@goodmis.org> wrote:

> On Fri, 24 May 2024 22:37:02 -0400
> Steven Rostedt <rostedt@goodmis.org> wrote:
> 
> > From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
> > 
> > Allow for instances to have their own ftrace_ops part of the fgraph_ops
> > that makes the funtion_graph tracer filter on the set_ftrace_filter file
> > of the instance and not the top instance.
> > 
> > Note that this also requires to update ftrace_graph_func() to call new
> > function_graph_enter_ops() instead of function_graph_enter() so that
> > it avoid pushing on shadow stack multiple times on the same function.
> 
> So I found a major design flaw in this patch.
> 
> > 
> > Co-developed with Masami Hiramatsu:
> > Link: https://lore.kernel.org/linux-trace-kernel/171509102088.162236.15758883237657317789.stgit@devnote2
> > 
> > Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
> > Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
> > Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
> > ---
> 
> > diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
> > index 8da0e66ca22d..998558cb8f15 100644
> > --- a/arch/x86/kernel/ftrace.c
> > +++ b/arch/x86/kernel/ftrace.c
> > @@ -648,9 +648,24 @@ void ftrace_graph_func(unsigned long ip, unsigned long parent_ip,
> >  		       struct ftrace_ops *op, struct ftrace_regs *fregs)
> >  {
> >  	struct pt_regs *regs = &fregs->regs;
> > -	unsigned long *stack = (unsigned long *)kernel_stack_pointer(regs);
> > +	unsigned long *parent = (unsigned long *)kernel_stack_pointer(regs);
> > +	struct fgraph_ops *gops = container_of(op, struct fgraph_ops, ops);
> > +	int bit;
> > +
> > +	if (unlikely(ftrace_graph_is_dead()))
> > +		return;
> > +
> > +	if (unlikely(atomic_read(&current->tracing_graph_pause)))
> > +		return;
> >  
> > -	prepare_ftrace_return(ip, (unsigned long *)stack, 0);
> > +	bit = ftrace_test_recursion_trylock(ip, *parent);
> > +	if (bit < 0)
> > +		return;
> > +
> > +	if (!function_graph_enter_ops(*parent, ip, 0, parent, gops))
> 
> So each registered graph ops has its own ftrace_ops which gets
> registered with ftrace, so this function does get called in a loop (by
> the ftrace iterator function). This means that we would need that code
> to detect the function_graph_enter_ops() getting called multiple times
> for the same function. This means each fgraph_ops gits its own retstack
> on the shadow stack.

Ah, that is my concern and the reason why I added bitmap and stack reuse
code in the ftrace_push_return_trace().

> 
> I find this a waste of shadow stack resources, and also complicates the
> code with having to deal with tail calls and all that.
> 
> BUT! There's good news! I also thought about another way of handling
> this. I have something working, but requires a bit of rewriting the
> code. I should have something out in a day or two.

Hmm, I just wonder why you don't reocver my bitmap check and stack
reusing code. Are there any problem on it? (Too complicated?)

Thanks,

> 
> -- Steve
> 
> 
> > +		*parent = (unsigned long)&return_to_handler;
> > +
> > +	ftrace_test_recursion_unlock(bit);
> >  }
> >  #endif
> >  
>
Steven Rostedt May 31, 2024, 6:03 a.m. UTC | #3
On Fri, 31 May 2024 12:12:41 +0900
Masami Hiramatsu (Google) <mhiramat@kernel.org> wrote:

> On Thu, 30 May 2024 22:30:57 -0400
> Steven Rostedt <rostedt@goodmis.org> wrote:
> 
> > On Fri, 24 May 2024 22:37:02 -0400
> > Steven Rostedt <rostedt@goodmis.org> wrote:
> >   
> > > From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
> > > 
> > > Allow for instances to have their own ftrace_ops part of the fgraph_ops
> > > that makes the funtion_graph tracer filter on the set_ftrace_filter file
> > > of the instance and not the top instance.
> > > 
> > > Note that this also requires to update ftrace_graph_func() to call new
> > > function_graph_enter_ops() instead of function_graph_enter() so that
> > > it avoid pushing on shadow stack multiple times on the same function.  
> > 
> > So I found a major design flaw in this patch.
> >   
> > > 
> > > Co-developed with Masami Hiramatsu:
> > > Link: https://lore.kernel.org/linux-trace-kernel/171509102088.162236.15758883237657317789.stgit@devnote2
> > > 
> > > Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
> > > Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
> > > Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
> > > ---  
> >   
> > > diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
> > > index 8da0e66ca22d..998558cb8f15 100644
> > > --- a/arch/x86/kernel/ftrace.c
> > > +++ b/arch/x86/kernel/ftrace.c
> > > @@ -648,9 +648,24 @@ void ftrace_graph_func(unsigned long ip, unsigned long parent_ip,
> > >  		       struct ftrace_ops *op, struct ftrace_regs *fregs)
> > >  {
> > >  	struct pt_regs *regs = &fregs->regs;
> > > -	unsigned long *stack = (unsigned long *)kernel_stack_pointer(regs);
> > > +	unsigned long *parent = (unsigned long *)kernel_stack_pointer(regs);
> > > +	struct fgraph_ops *gops = container_of(op, struct fgraph_ops, ops);
> > > +	int bit;
> > > +
> > > +	if (unlikely(ftrace_graph_is_dead()))
> > > +		return;
> > > +
> > > +	if (unlikely(atomic_read(&current->tracing_graph_pause)))
> > > +		return;
> > >  
> > > -	prepare_ftrace_return(ip, (unsigned long *)stack, 0);
> > > +	bit = ftrace_test_recursion_trylock(ip, *parent);
> > > +	if (bit < 0)
> > > +		return;
> > > +
> > > +	if (!function_graph_enter_ops(*parent, ip, 0, parent, gops))  
> > 
> > So each registered graph ops has its own ftrace_ops which gets
> > registered with ftrace, so this function does get called in a loop (by
> > the ftrace iterator function). This means that we would need that code
> > to detect the function_graph_enter_ops() getting called multiple times
> > for the same function. This means each fgraph_ops gits its own retstack
> > on the shadow stack.  
> 
> Ah, that is my concern and the reason why I added bitmap and stack reuse
> code in the ftrace_push_return_trace().
> 
> > 
> > I find this a waste of shadow stack resources, and also complicates the
> > code with having to deal with tail calls and all that.
> > 
> > BUT! There's good news! I also thought about another way of handling
> > this. I have something working, but requires a bit of rewriting the
> > code. I should have something out in a day or two.  
> 
> Hmm, I just wonder why you don't reocver my bitmap check and stack
> reusing code. Are there any problem on it? (Too complicated?)
> 

I actually dislike the use of ftrace itself to do the loop. I rather
have fgraph be in control of it.

I've come up with a new "subops" assignment, where you can have one
ftrace_ops represent multiple sub ftrace_ops. Basically, each fgraph
ops can register its own ftrace_ops under a single graph_ops
ftrace_ops. The graph_ops will be used to decide what functions call
the callback, and then the callback does the multiplexing.

This removes the need to touch the architecture code. It can also be
used by fprobes to handle the attachments to functions for several
different sets of callbacks.

I'll send out patches soon.

-- Steve
Masami Hiramatsu (Google) May 31, 2024, 2:50 p.m. UTC | #4
On Fri, 31 May 2024 02:03:46 -0400
Steven Rostedt <rostedt@goodmis.org> wrote:

> On Fri, 31 May 2024 12:12:41 +0900
> Masami Hiramatsu (Google) <mhiramat@kernel.org> wrote:
> 
> > On Thu, 30 May 2024 22:30:57 -0400
> > Steven Rostedt <rostedt@goodmis.org> wrote:
> > 
> > > On Fri, 24 May 2024 22:37:02 -0400
> > > Steven Rostedt <rostedt@goodmis.org> wrote:
> > >   
> > > > From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
> > > > 
> > > > Allow for instances to have their own ftrace_ops part of the fgraph_ops
> > > > that makes the funtion_graph tracer filter on the set_ftrace_filter file
> > > > of the instance and not the top instance.
> > > > 
> > > > Note that this also requires to update ftrace_graph_func() to call new
> > > > function_graph_enter_ops() instead of function_graph_enter() so that
> > > > it avoid pushing on shadow stack multiple times on the same function.  
> > > 
> > > So I found a major design flaw in this patch.
> > >   
> > > > 
> > > > Co-developed with Masami Hiramatsu:
> > > > Link: https://lore.kernel.org/linux-trace-kernel/171509102088.162236.15758883237657317789.stgit@devnote2
> > > > 
> > > > Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
> > > > Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
> > > > Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
> > > > ---  
> > >   
> > > > diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
> > > > index 8da0e66ca22d..998558cb8f15 100644
> > > > --- a/arch/x86/kernel/ftrace.c
> > > > +++ b/arch/x86/kernel/ftrace.c
> > > > @@ -648,9 +648,24 @@ void ftrace_graph_func(unsigned long ip, unsigned long parent_ip,
> > > >  		       struct ftrace_ops *op, struct ftrace_regs *fregs)
> > > >  {
> > > >  	struct pt_regs *regs = &fregs->regs;
> > > > -	unsigned long *stack = (unsigned long *)kernel_stack_pointer(regs);
> > > > +	unsigned long *parent = (unsigned long *)kernel_stack_pointer(regs);
> > > > +	struct fgraph_ops *gops = container_of(op, struct fgraph_ops, ops);
> > > > +	int bit;
> > > > +
> > > > +	if (unlikely(ftrace_graph_is_dead()))
> > > > +		return;
> > > > +
> > > > +	if (unlikely(atomic_read(&current->tracing_graph_pause)))
> > > > +		return;
> > > >  
> > > > -	prepare_ftrace_return(ip, (unsigned long *)stack, 0);
> > > > +	bit = ftrace_test_recursion_trylock(ip, *parent);
> > > > +	if (bit < 0)
> > > > +		return;
> > > > +
> > > > +	if (!function_graph_enter_ops(*parent, ip, 0, parent, gops))  
> > > 
> > > So each registered graph ops has its own ftrace_ops which gets
> > > registered with ftrace, so this function does get called in a loop (by
> > > the ftrace iterator function). This means that we would need that code
> > > to detect the function_graph_enter_ops() getting called multiple times
> > > for the same function. This means each fgraph_ops gits its own retstack
> > > on the shadow stack.  
> > 
> > Ah, that is my concern and the reason why I added bitmap and stack reuse
> > code in the ftrace_push_return_trace().
> > 
> > > 
> > > I find this a waste of shadow stack resources, and also complicates the
> > > code with having to deal with tail calls and all that.
> > > 
> > > BUT! There's good news! I also thought about another way of handling
> > > this. I have something working, but requires a bit of rewriting the
> > > code. I should have something out in a day or two.  
> > 
> > Hmm, I just wonder why you don't reocver my bitmap check and stack
> > reusing code. Are there any problem on it? (Too complicated?)
> > 
> 
> I actually dislike the use of ftrace itself to do the loop. I rather
> have fgraph be in control of it.

(actually, I agreed with you, looping in ftrace may cause trouble)

> 
> I've come up with a new "subops" assignment, where you can have one
> ftrace_ops represent multiple sub ftrace_ops. Basically, each fgraph
> ops can register its own ftrace_ops under a single graph_ops
> ftrace_ops. The graph_ops will be used to decide what functions call
> the callback, and then the callback does the multiplexing.

So is it similar to the fprobe/kprobe, use shared signle ftrace_ops,
but keep each fgraph has own hash table?

> This removes the need to touch the architecture code. It can also be
> used by fprobes to handle the attachments to functions for several
> different sets of callbacks.
> 
> I'll send out patches soon.

OK, I'll wait for that.

Thank you!

> 
> -- Steve
>
Steven Rostedt May 31, 2024, 10:49 p.m. UTC | #5
On Fri, 31 May 2024 23:50:23 +0900
Masami Hiramatsu (Google) <mhiramat@kernel.org> wrote:

> So is it similar to the fprobe/kprobe, use shared signle ftrace_ops,
> but keep each fgraph has own hash table?

Sort of.

I created helper functions in ftrace that lets you have a "manager
ftrace_ops" that will be used to assign to ftrace (with the function
that will demultiplex), and then you can have "subops" that can be
assigned that is per user. Here's a glimpse of the code:

/**
 * ftrace_startup_subops - enable tracing for subops of an ops
 * @ops: Manager ops (used to pick all the functions of its subops)
 * @subops: A new ops to add to @ops
 * @command: Extra commands to use to enable tracing
 *
 * The @ops is a manager @ops that has the filter that includes all the functions
 * that its list of subops are tracing. Adding a new @subops will add the
 * functions of @subops to @ops.
 */
int ftrace_startup_subops(struct ftrace_ops *ops, struct ftrace_ops *subops, int command)
{
	struct ftrace_hash *filter_hash;
	struct ftrace_hash *notrace_hash;
	struct ftrace_hash *save_filter_hash;
	struct ftrace_hash *save_notrace_hash;
	int size_bits;
	int ret;

	if (unlikely(ftrace_disabled))
		return -ENODEV;

	ftrace_ops_init(ops);
	ftrace_ops_init(subops);

	/* Make everything canonical (Just in case!) */
	if (!ops->func_hash->filter_hash)
		ops->func_hash->filter_hash = EMPTY_HASH;
	if (!ops->func_hash->notrace_hash)
		ops->func_hash->notrace_hash = EMPTY_HASH;
	if (!subops->func_hash->filter_hash)
		subops->func_hash->filter_hash = EMPTY_HASH;
	if (!subops->func_hash->notrace_hash)
		subops->func_hash->notrace_hash = EMPTY_HASH;

	/* For the first subops to ops just enable it normally */
	if (list_empty(&ops->subop_list)) {
		/* Just use the subops hashes */
		filter_hash = copy_hash(subops->func_hash->filter_hash);
		notrace_hash = copy_hash(subops->func_hash->notrace_hash);
		if (!filter_hash || !notrace_hash) {
			free_ftrace_hash(filter_hash);
			free_ftrace_hash(notrace_hash);
			return -ENOMEM;
		}

		save_filter_hash = ops->func_hash->filter_hash;
		save_notrace_hash = ops->func_hash->notrace_hash;

		ops->func_hash->filter_hash = filter_hash;
		ops->func_hash->notrace_hash = notrace_hash;
		list_add(&subops->list, &ops->subop_list);
		ret = ftrace_startup(ops, command);
		if (ret < 0) {
			list_del(&subops->list);
			ops->func_hash->filter_hash = save_filter_hash;
			ops->func_hash->notrace_hash = save_notrace_hash;
			free_ftrace_hash(filter_hash);
			free_ftrace_hash(notrace_hash);
		} else {
			free_ftrace_hash(save_filter_hash);
			free_ftrace_hash(save_notrace_hash);
			subops->flags |= FTRACE_OPS_FL_ENABLED;
		}
		return ret;
	}

	/*
	 * Here there's already something attached. Here are the rules:
	 *   o If either filter_hash is empty then the final stays empty
	 *      o Otherwise, the final is a superset of both hashes
	 *   o If either notrace_hash is empty then the final stays empty
	 *      o Otherwise, the final is an intersection between the hashes
	 */
	if (ops->func_hash->filter_hash == EMPTY_HASH ||
	    subops->func_hash->filter_hash == EMPTY_HASH) {
		filter_hash = EMPTY_HASH;
	} else {
		size_bits = max(ops->func_hash->filter_hash->size_bits,
				subops->func_hash->filter_hash->size_bits);
		filter_hash = alloc_and_copy_ftrace_hash(size_bits, ops->func_hash->filter_hash);
		if (!filter_hash)
			return -ENOMEM;
		ret = append_hash(&filter_hash, subops->func_hash->filter_hash);
		if (ret < 0) {
			free_ftrace_hash(filter_hash);
			return ret;
		}
	}

	if (ops->func_hash->notrace_hash == EMPTY_HASH ||
	    subops->func_hash->notrace_hash == EMPTY_HASH) {
		notrace_hash = EMPTY_HASH;
	} else {
		size_bits = max(ops->func_hash->filter_hash->size_bits,
				subops->func_hash->filter_hash->size_bits);
		notrace_hash = alloc_ftrace_hash(size_bits);
		if (!notrace_hash) {
			free_ftrace_hash(filter_hash);
			return -ENOMEM;
		}

		ret = intersect_hash(&notrace_hash, ops->func_hash->filter_hash,
				     subops->func_hash->filter_hash);
		if (ret < 0) {
			free_ftrace_hash(filter_hash);
			free_ftrace_hash(notrace_hash);
			return ret;
		}
	}

	list_add(&subops->list, &ops->subop_list);

	ret = ftrace_update_ops(ops, filter_hash, notrace_hash);
	free_ftrace_hash(filter_hash);
	free_ftrace_hash(notrace_hash);
	if (ret < 0)
		list_del(&subops->list);
	return ret;
}

/**
 * ftrace_shutdown_subops - Remove a subops from a manager ops
 * @ops: A manager ops to remove @subops from
 * @subops: The subops to remove from @ops
 * @command: Any extra command flags to add to modifying the text
 *
 * Removes the functions being traced by the @subops from @ops. Note, it
 * will not affect functions that are being traced by other subops that
 * still exist in @ops.
 *
 * If the last subops is removed from @ops, then @ops is shutdown normally.
 */
int ftrace_shutdown_subops(struct ftrace_ops *ops, struct ftrace_ops *subops, int command)
{
	struct ftrace_hash *filter_hash;
	struct ftrace_hash *notrace_hash;
	int ret;

	if (unlikely(ftrace_disabled))
		return -ENODEV;

	list_del(&subops->list);

	if (list_empty(&ops->subop_list)) {
		/* Last one, just disable the current ops */

		ret = ftrace_shutdown(ops, command);
		if (ret < 0) {
			list_add(&subops->list, &ops->subop_list);
			return ret;
		}

		subops->flags &= ~FTRACE_OPS_FL_ENABLED;

		free_ftrace_hash(ops->func_hash->filter_hash);
		free_ftrace_hash(ops->func_hash->notrace_hash);
		ops->func_hash->filter_hash = EMPTY_HASH;
		ops->func_hash->notrace_hash = EMPTY_HASH;

		return 0;
	}

	/* Rebuild the hashes without subops */
	filter_hash = append_hashes(ops);
	notrace_hash = intersect_hashes(ops);
	if (!filter_hash || !notrace_hash) {
		free_ftrace_hash(filter_hash);
		free_ftrace_hash(notrace_hash);
		list_add(&subops->list, &ops->subop_list);
		return -ENOMEM;
	}

	ret = ftrace_update_ops(ops, filter_hash, notrace_hash);
	if (ret < 0)
		list_add(&subops->list, &ops->subop_list);
	free_ftrace_hash(filter_hash);
	free_ftrace_hash(notrace_hash);
	return ret;
}


> 
> > This removes the need to touch the architecture code. It can also be
> > used by fprobes to handle the attachments to functions for several
> > different sets of callbacks.
> > 
> > I'll send out patches soon.  
> 
> OK, I'll wait for that.

I'm just cleaning it up. I'll post it tomorrow (your today).

-- Steve
Steven Rostedt June 1, 2024, 7:19 p.m. UTC | #6
On Fri, 31 May 2024 18:49:10 -0400
Steven Rostedt <rostedt@goodmis.org> wrote:

> I'm just cleaning it up. I'll post it tomorrow (your today).

It's going to take a little longer. I found that the rewrite broke
updating the filters while tracing is enabled. Working on fixing that
now.

-- Steve
Masami Hiramatsu (Google) June 2, 2024, 2:40 a.m. UTC | #7
On Fri, 31 May 2024 18:49:10 -0400
Steven Rostedt <rostedt@goodmis.org> wrote:

> On Fri, 31 May 2024 23:50:23 +0900
> Masami Hiramatsu (Google) <mhiramat@kernel.org> wrote:
> 
> > So is it similar to the fprobe/kprobe, use shared signle ftrace_ops,
> > but keep each fgraph has own hash table?
> 
> Sort of.
> 
> I created helper functions in ftrace that lets you have a "manager
> ftrace_ops" that will be used to assign to ftrace (with the function
> that will demultiplex), and then you can have "subops" that can be
> assigned that is per user. Here's a glimpse of the code:
> 
> /**
>  * ftrace_startup_subops - enable tracing for subops of an ops
>  * @ops: Manager ops (used to pick all the functions of its subops)
>  * @subops: A new ops to add to @ops
>  * @command: Extra commands to use to enable tracing
>  *
>  * The @ops is a manager @ops that has the filter that includes all the functions
>  * that its list of subops are tracing. Adding a new @subops will add the
>  * functions of @subops to @ops.
>  */
> int ftrace_startup_subops(struct ftrace_ops *ops, struct ftrace_ops *subops, int command)
> {
> 	struct ftrace_hash *filter_hash;
> 	struct ftrace_hash *notrace_hash;
> 	struct ftrace_hash *save_filter_hash;
> 	struct ftrace_hash *save_notrace_hash;
> 	int size_bits;
> 	int ret;
> 
> 	if (unlikely(ftrace_disabled))
> 		return -ENODEV;
> 
> 	ftrace_ops_init(ops);
> 	ftrace_ops_init(subops);
> 
> 	/* Make everything canonical (Just in case!) */
> 	if (!ops->func_hash->filter_hash)
> 		ops->func_hash->filter_hash = EMPTY_HASH;
> 	if (!ops->func_hash->notrace_hash)
> 		ops->func_hash->notrace_hash = EMPTY_HASH;
> 	if (!subops->func_hash->filter_hash)
> 		subops->func_hash->filter_hash = EMPTY_HASH;
> 	if (!subops->func_hash->notrace_hash)
> 		subops->func_hash->notrace_hash = EMPTY_HASH;
> 
> 	/* For the first subops to ops just enable it normally */
> 	if (list_empty(&ops->subop_list)) {

May above ftrace_ops_init() clear this list up always?

> 		/* Just use the subops hashes */
> 		filter_hash = copy_hash(subops->func_hash->filter_hash);
> 		notrace_hash = copy_hash(subops->func_hash->notrace_hash);
> 		if (!filter_hash || !notrace_hash) {
> 			free_ftrace_hash(filter_hash);
> 			free_ftrace_hash(notrace_hash);
> 			return -ENOMEM;
> 		}
> 
> 		save_filter_hash = ops->func_hash->filter_hash;
> 		save_notrace_hash = ops->func_hash->notrace_hash;
> 
> 		ops->func_hash->filter_hash = filter_hash;
> 		ops->func_hash->notrace_hash = notrace_hash;
> 		list_add(&subops->list, &ops->subop_list);
> 		ret = ftrace_startup(ops, command);
> 		if (ret < 0) {
> 			list_del(&subops->list);
> 			ops->func_hash->filter_hash = save_filter_hash;
> 			ops->func_hash->notrace_hash = save_notrace_hash;
> 			free_ftrace_hash(filter_hash);
> 			free_ftrace_hash(notrace_hash);
> 		} else {
> 			free_ftrace_hash(save_filter_hash);
> 			free_ftrace_hash(save_notrace_hash);
> 			subops->flags |= FTRACE_OPS_FL_ENABLED;
> 		}
> 		return ret;
> 	}
> 
> 	/*
> 	 * Here there's already something attached. Here are the rules:
> 	 *   o If either filter_hash is empty then the final stays empty
> 	 *      o Otherwise, the final is a superset of both hashes
> 	 *   o If either notrace_hash is empty then the final stays empty
> 	 *      o Otherwise, the final is an intersection between the hashes

Yeah, filter_hash |= subops_filter_hash and notrace_hash &= subops_notrace_hash.
The complicated point is filter's EMPTY_HASH means FULLSET_HASH. :)

> 	 */
> 	if (ops->func_hash->filter_hash == EMPTY_HASH ||
> 	    subops->func_hash->filter_hash == EMPTY_HASH) {
> 		filter_hash = EMPTY_HASH;
> 	} else {
> 		size_bits = max(ops->func_hash->filter_hash->size_bits,
> 				subops->func_hash->filter_hash->size_bits);

Don't we need to expand the size_bits? In the worst case, both hash does not
share any entry, then it should be expanded.

> 		filter_hash = alloc_and_copy_ftrace_hash(size_bits, ops->func_hash->filter_hash);
> 		if (!filter_hash)
> 			return -ENOMEM;
> 		ret = append_hash(&filter_hash, subops->func_hash->filter_hash);
> 		if (ret < 0) {
> 			free_ftrace_hash(filter_hash);
> 			return ret;
> 		}
> 	}
> 
> 	if (ops->func_hash->notrace_hash == EMPTY_HASH ||
> 	    subops->func_hash->notrace_hash == EMPTY_HASH) {
> 		notrace_hash = EMPTY_HASH;
> 	} else {
> 		size_bits = max(ops->func_hash->filter_hash->size_bits,
> 				subops->func_hash->filter_hash->size_bits);
> 		notrace_hash = alloc_ftrace_hash(size_bits);
> 		if (!notrace_hash) {
> 			free_ftrace_hash(filter_hash);
> 			return -ENOMEM;
> 		}
> 
> 		ret = intersect_hash(&notrace_hash, ops->func_hash->filter_hash,
> 				     subops->func_hash->filter_hash);
> 		if (ret < 0) {
> 			free_ftrace_hash(filter_hash);
> 			free_ftrace_hash(notrace_hash);
> 			return ret;
> 		}
> 	}
> 
> 	list_add(&subops->list, &ops->subop_list);
> 
> 	ret = ftrace_update_ops(ops, filter_hash, notrace_hash);
> 	free_ftrace_hash(filter_hash);
> 	free_ftrace_hash(notrace_hash);
> 	if (ret < 0)
> 		list_del(&subops->list);
> 	return ret;
> }
> 
> /**
>  * ftrace_shutdown_subops - Remove a subops from a manager ops
>  * @ops: A manager ops to remove @subops from
>  * @subops: The subops to remove from @ops
>  * @command: Any extra command flags to add to modifying the text
>  *
>  * Removes the functions being traced by the @subops from @ops. Note, it
>  * will not affect functions that are being traced by other subops that
>  * still exist in @ops.
>  *
>  * If the last subops is removed from @ops, then @ops is shutdown normally.
>  */
> int ftrace_shutdown_subops(struct ftrace_ops *ops, struct ftrace_ops *subops, int command)
> {
> 	struct ftrace_hash *filter_hash;
> 	struct ftrace_hash *notrace_hash;
> 	int ret;
> 
> 	if (unlikely(ftrace_disabled))
> 		return -ENODEV;
> 
> 	list_del(&subops->list);
> 
> 	if (list_empty(&ops->subop_list)) {
> 		/* Last one, just disable the current ops */
> 
> 		ret = ftrace_shutdown(ops, command);
> 		if (ret < 0) {
> 			list_add(&subops->list, &ops->subop_list);
> 			return ret;
> 		}
> 
> 		subops->flags &= ~FTRACE_OPS_FL_ENABLED;
> 
> 		free_ftrace_hash(ops->func_hash->filter_hash);
> 		free_ftrace_hash(ops->func_hash->notrace_hash);
> 		ops->func_hash->filter_hash = EMPTY_HASH;
> 		ops->func_hash->notrace_hash = EMPTY_HASH;
> 
> 		return 0;
> 	}
> 
> 	/* Rebuild the hashes without subops */
> 	filter_hash = append_hashes(ops);
> 	notrace_hash = intersect_hashes(ops);
> 	if (!filter_hash || !notrace_hash) {
> 		free_ftrace_hash(filter_hash);
> 		free_ftrace_hash(notrace_hash);
> 		list_add(&subops->list, &ops->subop_list);
> 		return -ENOMEM;
> 	}
> 
> 	ret = ftrace_update_ops(ops, filter_hash, notrace_hash);
> 	if (ret < 0)
> 		list_add(&subops->list, &ops->subop_list);
> 	free_ftrace_hash(filter_hash);
> 	free_ftrace_hash(notrace_hash);
> 	return ret;
> }

OK, so if the list_is_singlar(ops->subop_list), ftrace_graph_enter_ops() is
called and if not, ftrace_graph_enter() is called, right?

Thank you,

> 
> 
> > 
> > > This removes the need to touch the architecture code. It can also be
> > > used by fprobes to handle the attachments to functions for several
> > > different sets of callbacks.
> > > 
> > > I'll send out patches soon.  
> > 
> > OK, I'll wait for that.
> 
> I'm just cleaning it up. I'll post it tomorrow (your today).
> 
> -- Steve
diff mbox series

Patch

diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c
index a650f5e11fc5..b96740829798 100644
--- a/arch/arm64/kernel/ftrace.c
+++ b/arch/arm64/kernel/ftrace.c
@@ -481,7 +481,26 @@  void prepare_ftrace_return(unsigned long self_addr, unsigned long *parent,
 void ftrace_graph_func(unsigned long ip, unsigned long parent_ip,
 		       struct ftrace_ops *op, struct ftrace_regs *fregs)
 {
-	prepare_ftrace_return(ip, &fregs->lr, fregs->fp);
+	struct fgraph_ops *gops = container_of(op, struct fgraph_ops, ops);
+	unsigned long frame_pointer = fregs->fp;
+	unsigned long *parent = &fregs->lr;
+	int bit;
+
+	if (unlikely(ftrace_graph_is_dead()))
+		return;
+
+	if (unlikely(atomic_read(&current->tracing_graph_pause)))
+		return;
+
+	bit = ftrace_test_recursion_trylock(ip, *parent);
+	if (bit < 0)
+		return;
+
+	if (!function_graph_enter_ops(*parent, ip, frame_pointer,
+				      (void *)frame_pointer, gops))
+		*parent = (unsigned long)&return_to_handler;
+
+	ftrace_test_recursion_unlock(bit);
 }
 #else
 /*
diff --git a/arch/loongarch/kernel/ftrace_dyn.c b/arch/loongarch/kernel/ftrace_dyn.c
index bff058317062..77acb69ad153 100644
--- a/arch/loongarch/kernel/ftrace_dyn.c
+++ b/arch/loongarch/kernel/ftrace_dyn.c
@@ -241,10 +241,21 @@  void prepare_ftrace_return(unsigned long self_addr, unsigned long *parent)
 void ftrace_graph_func(unsigned long ip, unsigned long parent_ip,
 		       struct ftrace_ops *op, struct ftrace_regs *fregs)
 {
+	struct fgraph_ops *gops = container_of(op, struct fgraph_ops, ops);
+	unsigned long return_hooker = (unsigned long)&return_to_handler;
 	struct pt_regs *regs = &fregs->regs;
-	unsigned long *parent = (unsigned long *)&regs->regs[1];
+	unsigned long *parent;
+	unsigned long old;
+
+	parent = (unsigned long *)&regs->regs[1];
 
-	prepare_ftrace_return(ip, (unsigned long *)parent);
+	if (unlikely(atomic_read(&current->tracing_graph_pause)))
+		return;
+
+	old = *parent;
+
+	if (!function_graph_enter_ops(old, ip, 0, parent, gops))
+		*parent = return_hooker;
 }
 #else
 static int ftrace_modify_graph_caller(bool enable)
diff --git a/arch/powerpc/kernel/trace/ftrace.c b/arch/powerpc/kernel/trace/ftrace.c
index d8d6b4fd9a14..4a9294821c0d 100644
--- a/arch/powerpc/kernel/trace/ftrace.c
+++ b/arch/powerpc/kernel/trace/ftrace.c
@@ -421,6 +421,7 @@  int __init ftrace_dyn_arch_init(void)
 void ftrace_graph_func(unsigned long ip, unsigned long parent_ip,
 		       struct ftrace_ops *op, struct ftrace_regs *fregs)
 {
+	struct fgraph_ops *gops = container_of(op, struct fgraph_ops, ops);
 	unsigned long sp = fregs->regs.gpr[1];
 	int bit;
 
@@ -434,7 +435,7 @@  void ftrace_graph_func(unsigned long ip, unsigned long parent_ip,
 	if (bit < 0)
 		goto out;
 
-	if (!function_graph_enter(parent_ip, ip, 0, (unsigned long *)sp))
+	if (!function_graph_enter_ops(parent_ip, ip, 0, (unsigned long *)sp, gops))
 		parent_ip = ppc_function_entry(return_to_handler);
 
 	ftrace_test_recursion_unlock(bit);
diff --git a/arch/riscv/kernel/ftrace.c b/arch/riscv/kernel/ftrace.c
index 4f4987a6d83d..7bcb5f321523 100644
--- a/arch/riscv/kernel/ftrace.c
+++ b/arch/riscv/kernel/ftrace.c
@@ -218,10 +218,23 @@  void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr,
 void ftrace_graph_func(unsigned long ip, unsigned long parent_ip,
 		       struct ftrace_ops *op, struct ftrace_regs *fregs)
 {
+	struct fgraph_ops *gops = container_of(op, struct fgraph_ops, ops);
+	unsigned long return_hooker = (unsigned long)&return_to_handler;
 	struct pt_regs *regs = arch_ftrace_get_regs(fregs);
 	unsigned long *parent = (unsigned long *)&regs->ra;
+	unsigned long old;
+
+	if (unlikely(atomic_read(&current->tracing_graph_pause)))
+		return;
+
+	/*
+	 * We don't suffer access faults, so no extra fault-recovery assembly
+	 * is needed here.
+	 */
+	old = *parent;
 
-	prepare_ftrace_return(parent, ip, frame_pointer(regs));
+	if (!function_graph_enter_ops(old, ip, frame_pointer(regs), parent, gops))
+		*parent = return_hooker;
 }
 #else /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */
 extern void ftrace_graph_call(void);
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 8da0e66ca22d..998558cb8f15 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -648,9 +648,24 @@  void ftrace_graph_func(unsigned long ip, unsigned long parent_ip,
 		       struct ftrace_ops *op, struct ftrace_regs *fregs)
 {
 	struct pt_regs *regs = &fregs->regs;
-	unsigned long *stack = (unsigned long *)kernel_stack_pointer(regs);
+	unsigned long *parent = (unsigned long *)kernel_stack_pointer(regs);
+	struct fgraph_ops *gops = container_of(op, struct fgraph_ops, ops);
+	int bit;
+
+	if (unlikely(ftrace_graph_is_dead()))
+		return;
+
+	if (unlikely(atomic_read(&current->tracing_graph_pause)))
+		return;
 
-	prepare_ftrace_return(ip, (unsigned long *)stack, 0);
+	bit = ftrace_test_recursion_trylock(ip, *parent);
+	if (bit < 0)
+		return;
+
+	if (!function_graph_enter_ops(*parent, ip, 0, parent, gops))
+		*parent = (unsigned long)&return_to_handler;
+
+	ftrace_test_recursion_unlock(bit);
 }
 #endif
 
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 942a1f767280..e5b41683ffb9 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -1041,6 +1041,7 @@  extern int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace, struct fgraph
 struct fgraph_ops {
 	trace_func_graph_ent_t		entryfunc;
 	trace_func_graph_ret_t		retfunc;
+	struct ftrace_ops		ops; /* for the hash lists */
 	void				*private;
 	int				idx;
 };
@@ -1076,6 +1077,11 @@  extern int
 function_graph_enter(unsigned long ret, unsigned long func,
 		     unsigned long frame_pointer, unsigned long *retp);
 
+extern int
+function_graph_enter_ops(unsigned long ret, unsigned long func,
+			 unsigned long frame_pointer, unsigned long *retp,
+			 struct fgraph_ops *gops);
+
 struct ftrace_ret_stack *
 ftrace_graph_get_ret_stack(struct task_struct *task, int skip);
 
diff --git a/kernel/trace/fgraph.c b/kernel/trace/fgraph.c
index 54ed2ed2036b..67fa7fbf6aac 100644
--- a/kernel/trace/fgraph.c
+++ b/kernel/trace/fgraph.c
@@ -18,13 +18,6 @@ 
 #include "ftrace_internal.h"
 #include "trace.h"
 
-#ifdef CONFIG_DYNAMIC_FTRACE
-#define ASSIGN_OPS_HASH(opsname, val) \
-	.func_hash		= val, \
-	.local_hash.regex_lock	= __MUTEX_INITIALIZER(opsname.local_hash.regex_lock),
-#else
-#define ASSIGN_OPS_HASH(opsname, val)
-#endif
 
 /*
  * FGRAPH_FRAME_SIZE:	Size in bytes of the meta data on the shadow stack
@@ -155,6 +148,13 @@  get_bitmap_bits(struct task_struct *t, int offset)
 	return (t->ret_stack[offset] >> FGRAPH_INDEX_SHIFT) & FGRAPH_INDEX_MASK;
 }
 
+/* For BITMAP type: set the bits in the bitmap bitmask at @offset on ret_stack */
+static inline void
+set_bitmap_bits(struct task_struct *t, int offset, unsigned long bitmap)
+{
+	t->ret_stack[offset] |= (bitmap << FGRAPH_INDEX_SHIFT);
+}
+
 /* Write the bitmap to the ret_stack at @offset (does index, offset and bitmask) */
 static inline void
 set_bitmap(struct task_struct *t, int offset, unsigned long bitmap)
@@ -381,7 +381,8 @@  int function_graph_enter(unsigned long ret, unsigned long func,
 		if (gops == &fgraph_stub)
 			continue;
 
-		if (gops->entryfunc(&trace, gops))
+		if (ftrace_ops_test(&gops->ops, func, NULL) &&
+		    gops->entryfunc(&trace, gops))
 			bitmap |= BIT(i);
 	}
 
@@ -402,6 +403,46 @@  int function_graph_enter(unsigned long ret, unsigned long func,
 	return -EBUSY;
 }
 
+/* This is called from ftrace_graph_func() via ftrace */
+int function_graph_enter_ops(unsigned long ret, unsigned long func,
+			     unsigned long frame_pointer, unsigned long *retp,
+			     struct fgraph_ops *gops)
+{
+	struct ftrace_graph_ent trace;
+	int offset;
+	int type;
+
+	/* Check whether the fgraph_ops is unregistered. */
+	if (unlikely(fgraph_array[gops->idx] == &fgraph_stub))
+		return -ENODEV;
+
+	/* Use start for the distance to ret_stack (skipping over reserve) */
+	offset = ftrace_push_return_trace(ret, func, frame_pointer, retp, gops->idx);
+	if (offset < 0)
+		return offset;
+	type = get_fgraph_type(current, offset);
+
+	/* This is the first ret_stack for this fentry */
+	if (type == FGRAPH_TYPE_RESERVED)
+		++current->curr_ret_depth;
+
+	trace.func = func;
+	trace.depth = current->curr_ret_depth;
+	if (gops->entryfunc(&trace, gops)) {
+		if (type == FGRAPH_TYPE_RESERVED)
+			set_bitmap(current, offset, BIT(gops->idx));
+		else
+			set_bitmap_bits(current, offset, BIT(gops->idx));
+		return 0;
+	}
+
+	if (type == FGRAPH_TYPE_RESERVED) {
+		current->curr_ret_stack -= FGRAPH_FRAME_OFFSET + 1;
+		current->curr_ret_depth--;
+	}
+	return -EBUSY;
+}
+
 /* Retrieve a function return address to the trace stack on thread info.*/
 static struct ftrace_ret_stack *
 ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret,
@@ -662,17 +703,25 @@  unsigned long ftrace_graph_ret_addr(struct task_struct *task, int *idx,
 }
 #endif /* HAVE_FUNCTION_GRAPH_RET_ADDR_PTR */
 
-static struct ftrace_ops graph_ops = {
-	.func			= ftrace_graph_func,
-	.flags			= FTRACE_OPS_FL_INITIALIZED |
-				   FTRACE_OPS_FL_PID |
-				   FTRACE_OPS_GRAPH_STUB,
+void fgraph_init_ops(struct ftrace_ops *dst_ops,
+		     struct ftrace_ops *src_ops)
+{
+	dst_ops->func = ftrace_graph_func;
+	dst_ops->flags = FTRACE_OPS_FL_PID | FTRACE_OPS_GRAPH_STUB;
+
 #ifdef FTRACE_GRAPH_TRAMP_ADDR
-	.trampoline		= FTRACE_GRAPH_TRAMP_ADDR,
+	dst_ops->trampoline = FTRACE_GRAPH_TRAMP_ADDR;
 	/* trampoline_size is only needed for dynamically allocated tramps */
 #endif
-	ASSIGN_OPS_HASH(graph_ops, &global_ops.local_hash)
-};
+
+#ifdef CONFIG_DYNAMIC_FTRACE
+	if (src_ops) {
+		dst_ops->func_hash = &src_ops->local_hash;
+		mutex_init(&dst_ops->local_hash.regex_lock);
+		dst_ops->flags |= FTRACE_OPS_FL_INITIALIZED;
+	}
+#endif
+}
 
 void ftrace_graph_sleep_time_control(bool enable)
 {
@@ -876,11 +925,20 @@  static int start_graph_tracing(void)
 
 int register_ftrace_graph(struct fgraph_ops *gops)
 {
+	int command = 0;
 	int ret = 0;
 	int i;
 
 	mutex_lock(&ftrace_lock);
 
+	if (!gops->ops.func) {
+		gops->ops.flags |= FTRACE_OPS_GRAPH_STUB;
+		gops->ops.func = ftrace_graph_func;
+#ifdef FTRACE_GRAPH_TRAMP_ADDR
+		gops->ops.trampoline = FTRACE_GRAPH_TRAMP_ADDR;
+#endif
+	}
+
 	if (!fgraph_array[0]) {
 		/* The array must always have real data on it */
 		for (i = 0; i < FGRAPH_ARRAY_SIZE; i++)
@@ -893,7 +951,7 @@  int register_ftrace_graph(struct fgraph_ops *gops)
 			break;
 	}
 	if (i >= FGRAPH_ARRAY_SIZE) {
-		ret = -EBUSY;
+		ret = -ENOSPC;
 		goto out;
 	}
 
@@ -907,18 +965,22 @@  int register_ftrace_graph(struct fgraph_ops *gops)
 	if (ftrace_graph_active == 1) {
 		register_pm_notifier(&ftrace_suspend_notifier);
 		ret = start_graph_tracing();
-		if (ret) {
-			ftrace_graph_active--;
-			goto out;
-		}
+		if (ret)
+			goto error;
 		/*
 		 * Some archs just test to see if these are not
 		 * the default function
 		 */
 		ftrace_graph_return = return_run;
 		ftrace_graph_entry = entry_run;
+		command = FTRACE_START_FUNC_RET;
+	}
 
-		ret = ftrace_startup(&graph_ops, FTRACE_START_FUNC_RET);
+	ret = ftrace_startup(&gops->ops, command);
+error:
+	if (ret) {
+		fgraph_array[i] = &fgraph_stub;
+		ftrace_graph_active--;
 	}
 out:
 	mutex_unlock(&ftrace_lock);
@@ -927,6 +989,7 @@  int register_ftrace_graph(struct fgraph_ops *gops)
 
 void unregister_ftrace_graph(struct fgraph_ops *gops)
 {
+	int command = 0;
 	int i;
 
 	mutex_lock(&ftrace_lock);
@@ -934,25 +997,29 @@  void unregister_ftrace_graph(struct fgraph_ops *gops)
 	if (unlikely(!ftrace_graph_active))
 		goto out;
 
-	for (i = 0; i < fgraph_array_cnt; i++)
-		if (gops == fgraph_array[i])
-			break;
-	if (i >= fgraph_array_cnt)
+	if (unlikely(gops->idx < 0 || gops->idx >= fgraph_array_cnt))
 		goto out;
 
-	fgraph_array[i] = &fgraph_stub;
-	if (i + 1 == fgraph_array_cnt) {
-		for (; i >= 0; i--)
-			if (fgraph_array[i] != &fgraph_stub)
-				break;
+	WARN_ON_ONCE(fgraph_array[gops->idx] != gops);
+
+	fgraph_array[gops->idx] = &fgraph_stub;
+	if (gops->idx + 1 == fgraph_array_cnt) {
+		i = gops->idx;
+		while (i >= 0 && fgraph_array[i] == &fgraph_stub)
+			i--;
 		fgraph_array_cnt = i + 1;
 	}
 
 	ftrace_graph_active--;
+
+	if (!ftrace_graph_active)
+		command = FTRACE_STOP_FUNC_RET;
+
+	ftrace_shutdown(&gops->ops, command);
+
 	if (!ftrace_graph_active) {
 		ftrace_graph_return = ftrace_stub_graph;
 		ftrace_graph_entry = ftrace_graph_entry_stub;
-		ftrace_shutdown(&graph_ops, FTRACE_STOP_FUNC_RET);
 		unregister_pm_notifier(&ftrace_suspend_notifier);
 		unregister_trace_sched_switch(ftrace_graph_probe_sched_switch, NULL);
 	}
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index b85f00b0ffe7..fa6a686726db 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -3016,6 +3016,8 @@  int ftrace_startup(struct ftrace_ops *ops, int command)
 	if (unlikely(ftrace_disabled))
 		return -ENODEV;
 
+	ftrace_ops_init(ops);
+
 	ret = __register_ftrace_function(ops);
 	if (ret)
 		return ret;
@@ -7327,7 +7329,7 @@  __init void ftrace_init_global_array_ops(struct trace_array *tr)
 	tr->ops = &global_ops;
 	tr->ops->private = tr;
 	ftrace_init_trace_array(tr);
-	init_array_fgraph_ops(tr);
+	init_array_fgraph_ops(tr, tr->ops);
 }
 
 void ftrace_init_array_ops(struct trace_array *tr, ftrace_func_t func)
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index a5070f9b977b..ced61eacf40f 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -894,8 +894,8 @@  extern int __trace_graph_entry(struct trace_array *tr,
 extern void __trace_graph_return(struct trace_array *tr,
 				 struct ftrace_graph_ret *trace,
 				 unsigned int trace_ctx);
-extern void init_array_fgraph_ops(struct trace_array *tr);
-extern int allocate_fgraph_ops(struct trace_array *tr);
+extern void init_array_fgraph_ops(struct trace_array *tr, struct ftrace_ops *ops);
+extern int allocate_fgraph_ops(struct trace_array *tr, struct ftrace_ops *ops);
 extern void free_fgraph_ops(struct trace_array *tr);
 
 #ifdef CONFIG_DYNAMIC_FTRACE
@@ -978,6 +978,7 @@  static inline int ftrace_graph_notrace_addr(unsigned long addr)
 	preempt_enable_notrace();
 	return ret;
 }
+
 #else
 static inline int ftrace_graph_addr(struct ftrace_graph_ent *trace)
 {
@@ -1003,18 +1004,19 @@  static inline bool ftrace_graph_ignore_func(struct ftrace_graph_ent *trace)
 		(fgraph_max_depth && trace->depth >= fgraph_max_depth);
 }
 
+void fgraph_init_ops(struct ftrace_ops *dst_ops,
+		     struct ftrace_ops *src_ops);
+
 #else /* CONFIG_FUNCTION_GRAPH_TRACER */
 static inline enum print_line_t
 print_graph_function_flags(struct trace_iterator *iter, u32 flags)
 {
 	return TRACE_TYPE_UNHANDLED;
 }
-static inline void init_array_fgraph_ops(struct trace_array *tr) { }
-static inline int allocate_fgraph_ops(struct trace_array *tr)
-{
-	return 0;
-}
 static inline void free_fgraph_ops(struct trace_array *tr) { }
+/* ftrace_ops may not be defined */
+#define init_array_fgraph_ops(tr, ops) do { } while (0)
+#define allocate_fgraph_ops(tr, ops) ({ 0; })
 #endif /* CONFIG_FUNCTION_GRAPH_TRACER */
 
 extern struct list_head ftrace_pids;
diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c
index 8e8da0d0ee52..13bf2415245d 100644
--- a/kernel/trace/trace_functions.c
+++ b/kernel/trace/trace_functions.c
@@ -91,7 +91,7 @@  int ftrace_create_function_files(struct trace_array *tr,
 	if (!tr->ops)
 		return -EINVAL;
 
-	ret = allocate_fgraph_ops(tr);
+	ret = allocate_fgraph_ops(tr, tr->ops);
 	if (ret) {
 		kfree(tr->ops);
 		return ret;
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index 9ccc904a7703..7f30652f0e97 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -288,7 +288,7 @@  static struct fgraph_ops funcgraph_ops = {
 	.retfunc = &trace_graph_return,
 };
 
-int allocate_fgraph_ops(struct trace_array *tr)
+int allocate_fgraph_ops(struct trace_array *tr, struct ftrace_ops *ops)
 {
 	struct fgraph_ops *gops;
 
@@ -301,6 +301,9 @@  int allocate_fgraph_ops(struct trace_array *tr)
 
 	tr->gops = gops;
 	gops->private = tr;
+
+	fgraph_init_ops(&gops->ops, ops);
+
 	return 0;
 }
 
@@ -309,10 +312,11 @@  void free_fgraph_ops(struct trace_array *tr)
 	kfree(tr->gops);
 }
 
-__init void init_array_fgraph_ops(struct trace_array *tr)
+__init void init_array_fgraph_ops(struct trace_array *tr, struct ftrace_ops *ops)
 {
 	tr->gops = &funcgraph_ops;
 	funcgraph_ops.private = tr;
+	fgraph_init_ops(&tr->gops->ops, ops);
 }
 
 static int graph_trace_init(struct trace_array *tr)