diff mbox series

[RFC,v2,3/3] tracing: Freeable reserved ring buffer

Message ID 173920225773.826592.6428338529686044470.stgit@devnote2 (mailing list archive)
State New
Headers show
Series tracing: Improve persistent ring buffer | expand

Commit Message

Masami Hiramatsu (Google) Feb. 10, 2025, 3:44 p.m. UTC
From: Masami Hiramatsu (Google) <mhiramat@kernel.org>

Make the ring buffer on reserved memory to be freeable. This allows us
to free the trace instance on the reserved memory without changing
cmdline and rebooting. Even if we can not change the kernel cmdline
for security reason, we can release the reserved memory for the ring
buffer as free (available) memory.

For example, boot kernel with reserved memory;
"reserve_mem=20M:2M:trace trace_instance=boot_mapped^traceoff@trace"

 # free
              total        used        free      shared  buff/cache   available
Mem:        1994720       45292     1931960       14908       17468     1915920
Swap:             0           0           0
 # rmdir /sys/kernel/tracing/instances/boot_mapped/
 # free
              total        used        free      shared  buff/cache   available
Mem:        1994720       17204     1960060       14912       17456     1944012
Swap:             0           0           0

Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
---
 kernel/trace/trace.c |   17 +++++++++++++++--
 kernel/trace/trace.h |    1 +
 2 files changed, 16 insertions(+), 2 deletions(-)

Comments

Steven Rostedt Feb. 11, 2025, 12:13 a.m. UTC | #1
On Tue, 11 Feb 2025 00:44:17 +0900
"Masami Hiramatsu (Google)" <mhiramat@kernel.org> wrote:

> @@ -9857,8 +9861,10 @@ static int __remove_instance(struct trace_array *tr)
>  	int i;
>  
>  	/* Reference counter for a newly created trace array = 1. */
> -	if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
> +	if (tr->ref > 1 || (tr->current_trace && tr->trace_ref)) {
> +		pr_info("Instance is busy: ref %d trace_ref %d\n", tr->ref, tr->trace_ref);

I don't think we need the print. The function will error with -EBUSY
letting the user know that it is busy. This is a very common error, where I
found that I had a program that didn't completely die, and still had a file
descriptor open on an instance, and when I go to remove it, I get:

  # cd /sys/kernel/tracing/
  # mkdir instances/foo
  # bash 5< instances/foo/events/enable
  # rmdir instances/foo
  rmdir: failed to remove 'instances/foo': Device or resource busy
  # exit
  # rmdir instances/foo
  #

If you only have it for debugging that is fine, but we should remove it
before we accept it.

-- Steve

>  		return -EBUSY;
> +	}
>  
>  	list_del(&tr->list);
>  
> @@ -9881,6 +9887,11 @@ static int __remove_instance(struct trace_array *tr)
>  	free_trace_buffers(tr);
>  	clear_tracing_err_log(tr);
>  
> +	if (tr->range_name) {
> +		reserved_mem_release_by_name(tr->range_name);
> +		kfree(tr->range_name);
> +	}
> +
>  	for (i = 0; i < tr->nr_topts; i++) {
>  		kfree(tr->topts[i].topts);
>  	}
> @@ -10740,6 +10751,7 @@ __init static void enable_instances(void)
>  		bool traceoff = false;
>  		char *flag_delim;
>  		char *addr_delim;
> +		char *rname __free(kfree) = NULL;
>  
>  		tok = strsep(&curr_str, ",");
>  
> @@ -10796,6 +10808,7 @@ __init static void enable_instances(void)
>  				pr_warn("Failed to map boot instance %s to %s\n", name, tok);
>  				continue;
>  			}
> +			rname = kstrdup(tok, GFP_KERNEL);
>  		}
>  
>  		if (start) {
> @@ -10832,7 +10845,7 @@ __init static void enable_instances(void)
>  		 */
>  		if (start) {
>  			tr->flags |= TRACE_ARRAY_FL_BOOT | TRACE_ARRAY_FL_LAST_BOOT;
> -			tr->ref++;
> +			tr->range_name = no_free_ptr(rname);
>  		}
>  
>  		while ((tok = strsep(&curr_str, ","))) {
> diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
> index 47c0742fe9ec..ae8f7fac6592 100644
> --- a/kernel/trace/trace.h
> +++ b/kernel/trace/trace.h
> @@ -348,6 +348,7 @@ struct trace_array {
>  	unsigned int		mapped;
>  	unsigned long		range_addr_start;
>  	unsigned long		range_addr_size;
> +	char			*range_name;
>  	long			text_delta;
>  	int			nr_modules;
>  	long			*module_delta;
Masami Hiramatsu (Google) Feb. 11, 2025, 1:41 a.m. UTC | #2
On Mon, 10 Feb 2025 19:13:50 -0500
Steven Rostedt <rostedt@goodmis.org> wrote:

> On Tue, 11 Feb 2025 00:44:17 +0900
> "Masami Hiramatsu (Google)" <mhiramat@kernel.org> wrote:
> 
> > @@ -9857,8 +9861,10 @@ static int __remove_instance(struct trace_array *tr)
> >  	int i;
> >  
> >  	/* Reference counter for a newly created trace array = 1. */
> > -	if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
> > +	if (tr->ref > 1 || (tr->current_trace && tr->trace_ref)) {
> > +		pr_info("Instance is busy: ref %d trace_ref %d\n", tr->ref, tr->trace_ref);
> 
> I don't think we need the print. The function will error with -EBUSY
> letting the user know that it is busy. This is a very common error, where I
> found that I had a program that didn't completely die, and still had a file
> descriptor open on an instance, and when I go to remove it, I get:
> 
>   # cd /sys/kernel/tracing/
>   # mkdir instances/foo
>   # bash 5< instances/foo/events/enable
>   # rmdir instances/foo
>   rmdir: failed to remove 'instances/foo': Device or resource busy
>   # exit
>   # rmdir instances/foo
>   #
> 
> If you only have it for debugging that is fine, but we should remove it
> before we accept it.

Oops, that is my debug code. I missed to remove that! Sorry for confusion.

Thank you!

> 
> -- Steve
> 
> >  		return -EBUSY;
> > +	}
> >  
> >  	list_del(&tr->list);
> >  
> > @@ -9881,6 +9887,11 @@ static int __remove_instance(struct trace_array *tr)
> >  	free_trace_buffers(tr);
> >  	clear_tracing_err_log(tr);
> >  
> > +	if (tr->range_name) {
> > +		reserved_mem_release_by_name(tr->range_name);
> > +		kfree(tr->range_name);
> > +	}
> > +
> >  	for (i = 0; i < tr->nr_topts; i++) {
> >  		kfree(tr->topts[i].topts);
> >  	}
> > @@ -10740,6 +10751,7 @@ __init static void enable_instances(void)
> >  		bool traceoff = false;
> >  		char *flag_delim;
> >  		char *addr_delim;
> > +		char *rname __free(kfree) = NULL;
> >  
> >  		tok = strsep(&curr_str, ",");
> >  
> > @@ -10796,6 +10808,7 @@ __init static void enable_instances(void)
> >  				pr_warn("Failed to map boot instance %s to %s\n", name, tok);
> >  				continue;
> >  			}
> > +			rname = kstrdup(tok, GFP_KERNEL);
> >  		}
> >  
> >  		if (start) {
> > @@ -10832,7 +10845,7 @@ __init static void enable_instances(void)
> >  		 */
> >  		if (start) {
> >  			tr->flags |= TRACE_ARRAY_FL_BOOT | TRACE_ARRAY_FL_LAST_BOOT;
> > -			tr->ref++;
> > +			tr->range_name = no_free_ptr(rname);
> >  		}
> >  
> >  		while ((tok = strsep(&curr_str, ","))) {
> > diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
> > index 47c0742fe9ec..ae8f7fac6592 100644
> > --- a/kernel/trace/trace.h
> > +++ b/kernel/trace/trace.h
> > @@ -348,6 +348,7 @@ struct trace_array {
> >  	unsigned int		mapped;
> >  	unsigned long		range_addr_start;
> >  	unsigned long		range_addr_size;
> > +	char			*range_name;
> >  	long			text_delta;
> >  	int			nr_modules;
> >  	long			*module_delta;
>
Steven Rostedt Feb. 11, 2025, 2:57 p.m. UTC | #3
On Tue, 11 Feb 2025 10:41:22 +0900
Masami Hiramatsu (Google) <mhiramat@kernel.org> wrote:

> On Mon, 10 Feb 2025 19:13:50 -0500
> Steven Rostedt <rostedt@goodmis.org> wrote:
> 
> > On Tue, 11 Feb 2025 00:44:17 +0900
> > "Masami Hiramatsu (Google)" <mhiramat@kernel.org> wrote:
> >   
> > > @@ -9857,8 +9861,10 @@ static int __remove_instance(struct trace_array *tr)
> > >  	int i;
> > >  
> > >  	/* Reference counter for a newly created trace array = 1. */
> > > -	if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
> > > +	if (tr->ref > 1 || (tr->current_trace && tr->trace_ref)) {
> > > +		pr_info("Instance is busy: ref %d trace_ref %d\n", tr->ref, tr->trace_ref);  
> > 
> > I don't think we need the print. The function will error with -EBUSY
> > letting the user know that it is busy. This is a very common error, where I
> > found that I had a program that didn't completely die, and still had a file
> > descriptor open on an instance, and when I go to remove it, I get:
> > 
> >   # cd /sys/kernel/tracing/
> >   # mkdir instances/foo
> >   # bash 5< instances/foo/events/enable
> >   # rmdir instances/foo
> >   rmdir: failed to remove 'instances/foo': Device or resource busy
> >   # exit
> >   # rmdir instances/foo
> >   #
> > 
> > If you only have it for debugging that is fine, but we should remove it
> > before we accept it.  
> 
> Oops, that is my debug code. I missed to remove that! Sorry for confusion.

Heh, that's why I use "printk()" and not the "pr_*()" functions. Since I
don't usually have "printk()" in the normal code but use the "pr_*()"
functions there, it makes it easy for me to find the debug statements I put
into my code while I develop it.

-- Steve
diff mbox series

Patch

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index dde8000a5614..113ad3179ecd 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -9594,6 +9594,9 @@  static void free_trace_buffers(struct trace_array *tr)
 #ifdef CONFIG_TRACER_MAX_TRACE
 	free_trace_buffer(&tr->max_buffer);
 #endif
+
+	if (tr->range_addr_start)
+		vunmap((void *)tr->range_addr_start);
 }
 
 static void init_trace_flags_index(struct trace_array *tr)
@@ -9755,6 +9758,7 @@  trace_array_create_systems(const char *name, const char *systems,
 	free_cpumask_var(tr->pipe_cpumask);
 	free_cpumask_var(tr->tracing_cpumask);
 	kfree_const(tr->system_names);
+	kfree(tr->range_name);
 	kfree(tr->name);
 	kfree(tr);
 
@@ -9857,8 +9861,10 @@  static int __remove_instance(struct trace_array *tr)
 	int i;
 
 	/* Reference counter for a newly created trace array = 1. */
-	if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
+	if (tr->ref > 1 || (tr->current_trace && tr->trace_ref)) {
+		pr_info("Instance is busy: ref %d trace_ref %d\n", tr->ref, tr->trace_ref);
 		return -EBUSY;
+	}
 
 	list_del(&tr->list);
 
@@ -9881,6 +9887,11 @@  static int __remove_instance(struct trace_array *tr)
 	free_trace_buffers(tr);
 	clear_tracing_err_log(tr);
 
+	if (tr->range_name) {
+		reserved_mem_release_by_name(tr->range_name);
+		kfree(tr->range_name);
+	}
+
 	for (i = 0; i < tr->nr_topts; i++) {
 		kfree(tr->topts[i].topts);
 	}
@@ -10740,6 +10751,7 @@  __init static void enable_instances(void)
 		bool traceoff = false;
 		char *flag_delim;
 		char *addr_delim;
+		char *rname __free(kfree) = NULL;
 
 		tok = strsep(&curr_str, ",");
 
@@ -10796,6 +10808,7 @@  __init static void enable_instances(void)
 				pr_warn("Failed to map boot instance %s to %s\n", name, tok);
 				continue;
 			}
+			rname = kstrdup(tok, GFP_KERNEL);
 		}
 
 		if (start) {
@@ -10832,7 +10845,7 @@  __init static void enable_instances(void)
 		 */
 		if (start) {
 			tr->flags |= TRACE_ARRAY_FL_BOOT | TRACE_ARRAY_FL_LAST_BOOT;
-			tr->ref++;
+			tr->range_name = no_free_ptr(rname);
 		}
 
 		while ((tok = strsep(&curr_str, ","))) {
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 47c0742fe9ec..ae8f7fac6592 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -348,6 +348,7 @@  struct trace_array {
 	unsigned int		mapped;
 	unsigned long		range_addr_start;
 	unsigned long		range_addr_size;
+	char			*range_name;
 	long			text_delta;
 	int			nr_modules;
 	long			*module_delta;