diff mbox series

tracing: Fix blocked reader of snapshot buffer

Message ID 20231228095149.77f5b45d@gandalf.local.home (mailing list archive)
State Accepted
Commit 39a7dc23a1ed0fe81141792a09449d124c5953bd
Headers show
Series tracing: Fix blocked reader of snapshot buffer | expand

Commit Message

Steven Rostedt Dec. 28, 2023, 2:51 p.m. UTC
From: "Steven Rostedt (Google)" <rostedt@goodmis.org>

If an application blocks on the snapshot or snapshot_raw files, expecting
to be woken up when a snapshot occurs, it will not happen. Or it may
happen with an unexpected result.

That result is that the application will be reading the main buffer
instead of the snapshot buffer. That is because when the snapshot occurs,
the main and snapshot buffers are swapped. But the reader has a descriptor
still pointing to the buffer that it originally connected to.

This is fine for the main buffer readers, as they may be blocked waiting
for a watermark to be hit, and when a snapshot occurs, the data that the
main readers want is now on the snapshot buffer.

But for waiters of the snapshot buffer, they are waiting for an event to
occur that will trigger the snapshot and they can then consume it quickly
to save the snapshot before the next snapshot occurs. But to do this, they
need to read the new snapshot buffer, not the old one that is now
receiving new data.

Also, it does not make sense to have a watermark "buffer_percent" on the
snapshot buffer, as the snapshot buffer is static and does not receive new
data except all at once.

Cc: stable@vger.kernel.org
Fixes: debdd57f5145f ("tracing: Make a snapshot feature available from userspace")
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 kernel/trace/ring_buffer.c |  3 ++-
 kernel/trace/trace.c       | 20 +++++++++++++++++---
 2 files changed, 19 insertions(+), 4 deletions(-)

Comments

Masami Hiramatsu (Google) Dec. 29, 2023, 1:47 p.m. UTC | #1
On Thu, 28 Dec 2023 09:51:49 -0500
Steven Rostedt <rostedt@goodmis.org> wrote:

> From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
> 
> If an application blocks on the snapshot or snapshot_raw files, expecting
> to be woken up when a snapshot occurs, it will not happen. Or it may
> happen with an unexpected result.
> 
> That result is that the application will be reading the main buffer
> instead of the snapshot buffer. That is because when the snapshot occurs,
> the main and snapshot buffers are swapped. But the reader has a descriptor
> still pointing to the buffer that it originally connected to.
> 
> This is fine for the main buffer readers, as they may be blocked waiting
> for a watermark to be hit, and when a snapshot occurs, the data that the
> main readers want is now on the snapshot buffer.
> 
> But for waiters of the snapshot buffer, they are waiting for an event to
> occur that will trigger the snapshot and they can then consume it quickly
> to save the snapshot before the next snapshot occurs. But to do this, they
> need to read the new snapshot buffer, not the old one that is now
> receiving new data.
> 
> Also, it does not make sense to have a watermark "buffer_percent" on the
> snapshot buffer, as the snapshot buffer is static and does not receive new
> data except all at once.

This looks good to me.

Acked-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>

Thank you!

> 
> Cc: stable@vger.kernel.org
> Fixes: debdd57f5145f ("tracing: Make a snapshot feature available from userspace")
> Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
> ---
>  kernel/trace/ring_buffer.c |  3 ++-
>  kernel/trace/trace.c       | 20 +++++++++++++++++---
>  2 files changed, 19 insertions(+), 4 deletions(-)
> 
> diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
> index e476d42c84c5..07dae67424a9 100644
> --- a/kernel/trace/ring_buffer.c
> +++ b/kernel/trace/ring_buffer.c
> @@ -838,7 +838,8 @@ void ring_buffer_wake_waiters(struct trace_buffer *buffer, int cpu)
>  	/* make sure the waiters see the new index */
>  	smp_wmb();
>  
> -	rb_wake_up_waiters(&rbwork->work);
> +	/* This can be called in any context */
> +	irq_work_queue(&rbwork->work);
>  }
>  
>  /**
> diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
> index cfeaf2cd204e..606787edae8c 100644
> --- a/kernel/trace/trace.c
> +++ b/kernel/trace/trace.c
> @@ -1902,6 +1902,9 @@ update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
>  	__update_max_tr(tr, tsk, cpu);
>  
>  	arch_spin_unlock(&tr->max_lock);
> +
> +	/* Any waiters on the old snapshot buffer need to wake up */
> +	ring_buffer_wake_waiters(tr->array_buffer.buffer, RING_BUFFER_ALL_CPUS);
>  }
>  
>  /**
> @@ -1953,12 +1956,23 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
>  
>  static int wait_on_pipe(struct trace_iterator *iter, int full)
>  {
> +	int ret;
> +
>  	/* Iterators are static, they should be filled or empty */
>  	if (trace_buffer_iter(iter, iter->cpu_file))
>  		return 0;
>  
> -	return ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file,
> -				full);
> +	ret = ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file, full);
> +
> +#ifdef CONFIG_TRACER_MAX_TRACE
> +	/*
> +	 * Make sure this is still the snapshot buffer, as if a snapshot were
> +	 * to happen, this would now be the main buffer.
> +	 */
> +	if (iter->snapshot)
> +		iter->array_buffer = &iter->tr->max_buffer;
> +#endif
> +	return ret;
>  }
>  
>  #ifdef CONFIG_FTRACE_STARTUP_TEST
> @@ -8560,7 +8574,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
>  
>  		wait_index = READ_ONCE(iter->wait_index);
>  
> -		ret = wait_on_pipe(iter, iter->tr->buffer_percent);
> +		ret = wait_on_pipe(iter, iter->snapshot ? 0 : iter->tr->buffer_percent);
>  		if (ret)
>  			goto out;
>  
> -- 
> 2.42.0
>
diff mbox series

Patch

diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index e476d42c84c5..07dae67424a9 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -838,7 +838,8 @@  void ring_buffer_wake_waiters(struct trace_buffer *buffer, int cpu)
 	/* make sure the waiters see the new index */
 	smp_wmb();
 
-	rb_wake_up_waiters(&rbwork->work);
+	/* This can be called in any context */
+	irq_work_queue(&rbwork->work);
 }
 
 /**
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index cfeaf2cd204e..606787edae8c 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -1902,6 +1902,9 @@  update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
 	__update_max_tr(tr, tsk, cpu);
 
 	arch_spin_unlock(&tr->max_lock);
+
+	/* Any waiters on the old snapshot buffer need to wake up */
+	ring_buffer_wake_waiters(tr->array_buffer.buffer, RING_BUFFER_ALL_CPUS);
 }
 
 /**
@@ -1953,12 +1956,23 @@  update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
 
 static int wait_on_pipe(struct trace_iterator *iter, int full)
 {
+	int ret;
+
 	/* Iterators are static, they should be filled or empty */
 	if (trace_buffer_iter(iter, iter->cpu_file))
 		return 0;
 
-	return ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file,
-				full);
+	ret = ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file, full);
+
+#ifdef CONFIG_TRACER_MAX_TRACE
+	/*
+	 * Make sure this is still the snapshot buffer, as if a snapshot were
+	 * to happen, this would now be the main buffer.
+	 */
+	if (iter->snapshot)
+		iter->array_buffer = &iter->tr->max_buffer;
+#endif
+	return ret;
 }
 
 #ifdef CONFIG_FTRACE_STARTUP_TEST
@@ -8560,7 +8574,7 @@  tracing_buffers_splice_read(struct file *file, loff_t *ppos,
 
 		wait_index = READ_ONCE(iter->wait_index);
 
-		ret = wait_on_pipe(iter, iter->tr->buffer_percent);
+		ret = wait_on_pipe(iter, iter->snapshot ? 0 : iter->tr->buffer_percent);
 		if (ret)
 			goto out;