diff mbox series

[v2,24/27] function_graph: Use static_call and branch to optimize entry function

Message ID 20240602033834.997761817@goodmis.org (mailing list archive)
State Not Applicable
Delegated to: BPF
Headers show
Series function_graph: Allow multiple users for function graph tracing | expand

Checks

Context Check Description
netdev/tree_selection success Not a local patch

Commit Message

Steven Rostedt June 2, 2024, 3:38 a.m. UTC
From: "Steven Rostedt (Google)" <rostedt@goodmis.org>

In most cases function graph is used by a single user. Instead of calling
a loop to call function graph callbacks in this case, call the function
entry callback directly.

Add a static_key that will be used to set the function graph logic to
either do the loop (when more than one callback is registered) or to call
the callback directly if there is only one registered callback.

Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 kernel/trace/fgraph.c | 77 ++++++++++++++++++++++++++++++++++++-------
 1 file changed, 66 insertions(+), 11 deletions(-)

Comments

Masami Hiramatsu (Google) June 3, 2024, 3:11 a.m. UTC | #1
On Sat, 01 Jun 2024 23:38:08 -0400
Steven Rostedt <rostedt@goodmis.org> wrote:

> From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
> 
> In most cases function graph is used by a single user. Instead of calling
> a loop to call function graph callbacks in this case, call the function
> entry callback directly.
> 
> Add a static_key that will be used to set the function graph logic to
> either do the loop (when more than one callback is registered) or to call
> the callback directly if there is only one registered callback.

I understand this works, but my concern is that, if we use fprobe
and function_graph at the same time, does it always loop on both gops?

I mean if those are the subops of one ftrace_ops, ftrace_trampoline
will always call the same function_graph_enter() for both gops, and loop
on the gops list.

For example, if there are 2 fgraph_ops, one has "vfs_*" filter and
another has "sched_*" filter, those does not cover each other.

Are there any way to solve this issue? I think my previous series
calls function_graph_enter_ops() directly from trampoline (If it works
correctly...)

Thank you,

> 
> Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
> ---
>  kernel/trace/fgraph.c | 77 ++++++++++++++++++++++++++++++++++++-------
>  1 file changed, 66 insertions(+), 11 deletions(-)
> 
> diff --git a/kernel/trace/fgraph.c b/kernel/trace/fgraph.c
> index 4d566a0a741d..7c3b0261b1bb 100644
> --- a/kernel/trace/fgraph.c
> +++ b/kernel/trace/fgraph.c
> @@ -11,6 +11,7 @@
>  #include <linux/jump_label.h>
>  #include <linux/suspend.h>
>  #include <linux/ftrace.h>
> +#include <linux/static_call.h>
>  #include <linux/slab.h>
>  
>  #include <trace/events/sched.h>
> @@ -511,6 +512,10 @@ static struct fgraph_ops fgraph_stub = {
>  	.retfunc = ftrace_graph_ret_stub,
>  };
>  
> +static struct fgraph_ops *fgraph_direct_gops = &fgraph_stub;
> +DEFINE_STATIC_CALL(fgraph_func, ftrace_graph_entry_stub);
> +DEFINE_STATIC_KEY_TRUE(fgraph_do_direct);
> +
>  /**
>   * ftrace_graph_stop - set to permanently disable function graph tracing
>   *
> @@ -636,21 +641,34 @@ int function_graph_enter(unsigned long ret, unsigned long func,
>  	if (offset < 0)
>  		goto out;
>  
> -	for_each_set_bit(i, &fgraph_array_bitmask,
> -			 sizeof(fgraph_array_bitmask) * BITS_PER_BYTE) {
> -		struct fgraph_ops *gops = fgraph_array[i];
> -		int save_curr_ret_stack;
> -
> -		if (gops == &fgraph_stub)
> -			continue;
> +#ifdef CONFIG_HAVE_STATIC_CALL
> +	if (static_branch_likely(&fgraph_do_direct)) {
> +		int save_curr_ret_stack = current->curr_ret_stack;
>  
> -		save_curr_ret_stack = current->curr_ret_stack;
> -		if (ftrace_ops_test(&gops->ops, func, NULL) &&
> -		    gops->entryfunc(&trace, gops))
> -			bitmap |= BIT(i);
> +		if (static_call(fgraph_func)(&trace, fgraph_direct_gops))
> +			bitmap |= BIT(fgraph_direct_gops->idx);
>  		else
>  			/* Clear out any saved storage */
>  			current->curr_ret_stack = save_curr_ret_stack;
> +	} else
> +#endif
> +	{
> +		for_each_set_bit(i, &fgraph_array_bitmask,
> +					 sizeof(fgraph_array_bitmask) * BITS_PER_BYTE) {
> +			struct fgraph_ops *gops = fgraph_array[i];
> +			int save_curr_ret_stack;
> +
> +			if (gops == &fgraph_stub)
> +				continue;
> +
> +			save_curr_ret_stack = current->curr_ret_stack;
> +			if (ftrace_ops_test(&gops->ops, func, NULL) &&
> +			    gops->entryfunc(&trace, gops))
> +				bitmap |= BIT(i);
> +			else
> +				/* Clear out any saved storage */
> +				current->curr_ret_stack = save_curr_ret_stack;
> +		}
>  	}
>  
>  	if (!bitmap)
> @@ -1155,6 +1173,8 @@ void fgraph_update_pid_func(void)
>  			gops = container_of(op, struct fgraph_ops, ops);
>  			gops->entryfunc = ftrace_pids_enabled(op) ?
>  				fgraph_pid_func : gops->saved_func;
> +			if (ftrace_graph_active == 1)
> +				static_call_update(fgraph_func, gops->entryfunc);
>  		}
>  	}
>  }
> @@ -1209,6 +1229,32 @@ static void init_task_vars(int idx)
>  	read_unlock(&tasklist_lock);
>  }
>  
> +static void ftrace_graph_enable_direct(bool enable_branch)
> +{
> +	trace_func_graph_ent_t func = NULL;
> +	int i;
> +
> +	for_each_set_bit(i, &fgraph_array_bitmask,
> +			 sizeof(fgraph_array_bitmask) * BITS_PER_BYTE) {
> +		func = fgraph_array[i]->entryfunc;
> +		fgraph_direct_gops = fgraph_array[i];
> +	 }
> +	if (WARN_ON_ONCE(!func))
> +		return;
> +
> +	static_call_update(fgraph_func, func);
> +	if (enable_branch)
> +		static_branch_disable(&fgraph_do_direct);
> +}
> +
> +static void ftrace_graph_disable_direct(bool disable_branch)
> +{
> +	if (disable_branch)
> +		static_branch_disable(&fgraph_do_direct);
> +	static_call_update(fgraph_func, ftrace_graph_entry_stub);
> +	fgraph_direct_gops = &fgraph_stub;
> +}
> +
>  int register_ftrace_graph(struct fgraph_ops *gops)
>  {
>  	int command = 0;
> @@ -1235,7 +1281,11 @@ int register_ftrace_graph(struct fgraph_ops *gops)
>  
>  	ftrace_graph_active++;
>  
> +	if (ftrace_graph_active == 2)
> +		ftrace_graph_disable_direct(true);
> +
>  	if (ftrace_graph_active == 1) {
> +		ftrace_graph_enable_direct(false);
>  		register_pm_notifier(&ftrace_suspend_notifier);
>  		ret = start_graph_tracing();
>  		if (ret)
> @@ -1292,6 +1342,11 @@ void unregister_ftrace_graph(struct fgraph_ops *gops)
>  
>  	ftrace_shutdown_subops(&graph_ops, &gops->ops, command);
>  
> +	if (ftrace_graph_active == 1)
> +		ftrace_graph_enable_direct(true);
> +	else if (!ftrace_graph_active)
> +		ftrace_graph_disable_direct(false);
> +
>  	if (!ftrace_graph_active) {
>  		ftrace_graph_return = ftrace_stub_graph;
>  		ftrace_graph_entry = ftrace_graph_entry_stub;
> -- 
> 2.43.0
> 
>
Steven Rostedt June 3, 2024, 3 p.m. UTC | #2
On Mon, 3 Jun 2024 12:11:07 +0900
Masami Hiramatsu (Google) <mhiramat@kernel.org> wrote:

> > From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
> > 
> > In most cases function graph is used by a single user. Instead of calling
> > a loop to call function graph callbacks in this case, call the function
> > entry callback directly.
> > 
> > Add a static_key that will be used to set the function graph logic to
> > either do the loop (when more than one callback is registered) or to call
> > the callback directly if there is only one registered callback.  
> 
> I understand this works, but my concern is that, if we use fprobe
> and function_graph at the same time, does it always loop on both gops?
> 
> I mean if those are the subops of one ftrace_ops, ftrace_trampoline
> will always call the same function_graph_enter() for both gops, and loop
> on the gops list.
> 
> For example, if there are 2 fgraph_ops, one has "vfs_*" filter and
> another has "sched_*" filter, those does not cover each other.
> 
> Are there any way to solve this issue? I think my previous series
> calls function_graph_enter_ops() directly from trampoline (If it works
> correctly...)

Yes, but that gets a bit complex, and requires the changing of all archs.
If it starts to become a problem, I rather add that as a feature. That is,
we can always go back to it. But for now, lets keep the complexity down.

-- Steve
Steven Rostedt June 3, 2024, 3:07 p.m. UTC | #3
On Mon, 3 Jun 2024 11:00:18 -0400
Steven Rostedt <rostedt@goodmis.org> wrote:

> Yes, but that gets a bit complex, and requires the changing of all archs.
> If it starts to become a problem, I rather add that as a feature. That is,
> we can always go back to it. But for now, lets keep the complexity down.

And if we were to go the route of calling a single fgraph_ops caller, I
would want it choreographed with ftrace, such that the direct caller calls
a different fgraph function only if it has only one graph caller on it and
the fgraph loop function if a function has more than one. Just like the
ftrace code does.

If we are going to go that route, let's do it right.

-- Steve
Masami Hiramatsu (Google) June 3, 2024, 11:08 p.m. UTC | #4
On Mon, 3 Jun 2024 11:07:52 -0400
Steven Rostedt <rostedt@goodmis.org> wrote:

> On Mon, 3 Jun 2024 11:00:18 -0400
> Steven Rostedt <rostedt@goodmis.org> wrote:
> 
> > Yes, but that gets a bit complex, and requires the changing of all archs.
> > If it starts to become a problem, I rather add that as a feature. That is,
> > we can always go back to it. But for now, lets keep the complexity down.
> 
> And if we were to go the route of calling a single fgraph_ops caller, I
> would want it choreographed with ftrace, such that the direct caller calls
> a different fgraph function only if it has only one graph caller on it and
> the fgraph loop function if a function has more than one. Just like the
> ftrace code does.
> 
> If we are going to go that route, let's do it right.

Yes, that is what I expect. ftrace_ops has a callback for loop and subops
has another direct callbacks, and the ftrace_ops has a flag to direct subops
assign. In this case ftrace will assign the subops's direct callback for
single subops entries.

But anyway, this optimization can be done afterwards. Especially this feature
is important for fprobes on fgraph, until that, the current implementation is
enough.

Thank you,

> 
> -- Steve
diff mbox series

Patch

diff --git a/kernel/trace/fgraph.c b/kernel/trace/fgraph.c
index 4d566a0a741d..7c3b0261b1bb 100644
--- a/kernel/trace/fgraph.c
+++ b/kernel/trace/fgraph.c
@@ -11,6 +11,7 @@ 
 #include <linux/jump_label.h>
 #include <linux/suspend.h>
 #include <linux/ftrace.h>
+#include <linux/static_call.h>
 #include <linux/slab.h>
 
 #include <trace/events/sched.h>
@@ -511,6 +512,10 @@  static struct fgraph_ops fgraph_stub = {
 	.retfunc = ftrace_graph_ret_stub,
 };
 
+static struct fgraph_ops *fgraph_direct_gops = &fgraph_stub;
+DEFINE_STATIC_CALL(fgraph_func, ftrace_graph_entry_stub);
+DEFINE_STATIC_KEY_TRUE(fgraph_do_direct);
+
 /**
  * ftrace_graph_stop - set to permanently disable function graph tracing
  *
@@ -636,21 +641,34 @@  int function_graph_enter(unsigned long ret, unsigned long func,
 	if (offset < 0)
 		goto out;
 
-	for_each_set_bit(i, &fgraph_array_bitmask,
-			 sizeof(fgraph_array_bitmask) * BITS_PER_BYTE) {
-		struct fgraph_ops *gops = fgraph_array[i];
-		int save_curr_ret_stack;
-
-		if (gops == &fgraph_stub)
-			continue;
+#ifdef CONFIG_HAVE_STATIC_CALL
+	if (static_branch_likely(&fgraph_do_direct)) {
+		int save_curr_ret_stack = current->curr_ret_stack;
 
-		save_curr_ret_stack = current->curr_ret_stack;
-		if (ftrace_ops_test(&gops->ops, func, NULL) &&
-		    gops->entryfunc(&trace, gops))
-			bitmap |= BIT(i);
+		if (static_call(fgraph_func)(&trace, fgraph_direct_gops))
+			bitmap |= BIT(fgraph_direct_gops->idx);
 		else
 			/* Clear out any saved storage */
 			current->curr_ret_stack = save_curr_ret_stack;
+	} else
+#endif
+	{
+		for_each_set_bit(i, &fgraph_array_bitmask,
+					 sizeof(fgraph_array_bitmask) * BITS_PER_BYTE) {
+			struct fgraph_ops *gops = fgraph_array[i];
+			int save_curr_ret_stack;
+
+			if (gops == &fgraph_stub)
+				continue;
+
+			save_curr_ret_stack = current->curr_ret_stack;
+			if (ftrace_ops_test(&gops->ops, func, NULL) &&
+			    gops->entryfunc(&trace, gops))
+				bitmap |= BIT(i);
+			else
+				/* Clear out any saved storage */
+				current->curr_ret_stack = save_curr_ret_stack;
+		}
 	}
 
 	if (!bitmap)
@@ -1155,6 +1173,8 @@  void fgraph_update_pid_func(void)
 			gops = container_of(op, struct fgraph_ops, ops);
 			gops->entryfunc = ftrace_pids_enabled(op) ?
 				fgraph_pid_func : gops->saved_func;
+			if (ftrace_graph_active == 1)
+				static_call_update(fgraph_func, gops->entryfunc);
 		}
 	}
 }
@@ -1209,6 +1229,32 @@  static void init_task_vars(int idx)
 	read_unlock(&tasklist_lock);
 }
 
+static void ftrace_graph_enable_direct(bool enable_branch)
+{
+	trace_func_graph_ent_t func = NULL;
+	int i;
+
+	for_each_set_bit(i, &fgraph_array_bitmask,
+			 sizeof(fgraph_array_bitmask) * BITS_PER_BYTE) {
+		func = fgraph_array[i]->entryfunc;
+		fgraph_direct_gops = fgraph_array[i];
+	 }
+	if (WARN_ON_ONCE(!func))
+		return;
+
+	static_call_update(fgraph_func, func);
+	if (enable_branch)
+		static_branch_disable(&fgraph_do_direct);
+}
+
+static void ftrace_graph_disable_direct(bool disable_branch)
+{
+	if (disable_branch)
+		static_branch_disable(&fgraph_do_direct);
+	static_call_update(fgraph_func, ftrace_graph_entry_stub);
+	fgraph_direct_gops = &fgraph_stub;
+}
+
 int register_ftrace_graph(struct fgraph_ops *gops)
 {
 	int command = 0;
@@ -1235,7 +1281,11 @@  int register_ftrace_graph(struct fgraph_ops *gops)
 
 	ftrace_graph_active++;
 
+	if (ftrace_graph_active == 2)
+		ftrace_graph_disable_direct(true);
+
 	if (ftrace_graph_active == 1) {
+		ftrace_graph_enable_direct(false);
 		register_pm_notifier(&ftrace_suspend_notifier);
 		ret = start_graph_tracing();
 		if (ret)
@@ -1292,6 +1342,11 @@  void unregister_ftrace_graph(struct fgraph_ops *gops)
 
 	ftrace_shutdown_subops(&graph_ops, &gops->ops, command);
 
+	if (ftrace_graph_active == 1)
+		ftrace_graph_enable_direct(true);
+	else if (!ftrace_graph_active)
+		ftrace_graph_disable_direct(false);
+
 	if (!ftrace_graph_active) {
 		ftrace_graph_return = ftrace_stub_graph;
 		ftrace_graph_entry = ftrace_graph_entry_stub;