diff mbox series

[6/6] tracing/ring-buffer: Fix wait_on_pipe() race

Message ID 20240308184008.120434979@goodmis.org (mailing list archive)
State Superseded
Headers show
Series tracing/ring-buffer: Fix wakeup of ring buffer waiters | expand

Commit Message

Steven Rostedt March 8, 2024, 6:38 p.m. UTC
From: "Steven Rostedt (Google)" <rostedt@goodmis.org>

When the trace_pipe_raw file is closed, there should be no new readers on
the file descriptor. This is mostly handled with the waking and wait_index
fields of the iterator. But there's still a slight race.

     CPU 0				CPU 1
     -----				-----
 wait_woken_prepare()
    if (waking)
       woken = true;
    index = wait_index;
				   wait_woken_set()
				      waking = true
				      wait_index++;
				   ring_buffer_wake_waiters();
 wait_on_pipe()
    ring_buffer_wait();

The ring_buffer_wait() will miss the wakeup from CPU 1. The problem is
that the ring_buffer_wait() needs the logic of:

	prepare_to_wait();
	if (!condition)
		schedule();

Where the missing condition check is the iter->waking.

Either that condition check needs to be passed to ring_buffer_wait() or
the function needs to be broken up into three parts. This chooses to do
the break up.

Break ring_buffer_wait() into:

	ring_buffer_prepare_to_wait();
	ring_buffer_wait();
	ring_buffer_finish_wait();

Now wait_on_pipe() can have:

	ring_buffer_prepare_to_wait();
	if (!iter->waking)
		ring_buffer_wait();
	ring_buffer_finish_wait();

And this will catch the above race, as the waiter will either see waking,
or already have been woken up.

Link: https://lore.kernel.org/all/CAHk-=whs5MdtNjzFkTyaUy=vHi=qwWgPi0JgTe6OYUYMNSRZfg@mail.gmail.com/

Cc: stable@vger.kernel.org
Fixes: f3ddb74ad0790 ("tracing: Wake up ring buffer waiters on closing of the file")
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 include/linux/ring_buffer.h |  4 ++
 kernel/trace/ring_buffer.c  | 88 ++++++++++++++++++++++++++-----------
 kernel/trace/trace.c        | 14 +++++-
 3 files changed, 78 insertions(+), 28 deletions(-)
diff mbox series

Patch

diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h
index fa802db216f9..e5b5903cdc21 100644
--- a/include/linux/ring_buffer.h
+++ b/include/linux/ring_buffer.h
@@ -98,7 +98,11 @@  __ring_buffer_alloc(unsigned long size, unsigned flags, struct lock_class_key *k
 	__ring_buffer_alloc((size), (flags), &__key);	\
 })
 
+int ring_buffer_prepare_to_wait(struct trace_buffer *buffer, int cpu, int *full,
+				struct wait_queue_entry *wait);
 int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full);
+void ring_buffer_finish_wait(struct trace_buffer *buffer, int cpu, int full,
+				 struct wait_queue_entry *wait);
 __poll_t ring_buffer_poll_wait(struct trace_buffer *buffer, int cpu,
 			  struct file *filp, poll_table *poll_table, int full);
 void ring_buffer_wake_waiters(struct trace_buffer *buffer, int cpu);
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 856d0e5b0da5..fa7090f6b4fc 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -868,29 +868,29 @@  rb_get_work_queue(struct trace_buffer *buffer, int cpu, int *full)
 }
 
 /**
- * ring_buffer_wait - wait for input to the ring buffer
+ * ring_buffer_prepare_to_wait - Prepare to wait for data on the ring buffer
  * @buffer: buffer to wait on
  * @cpu: the cpu buffer to wait on
- * @full: wait until the percentage of pages are available, if @cpu != RING_BUFFER_ALL_CPUS
+ * @full: wait until the percentage of pages are available,
+ *         if @cpu != RING_BUFFER_ALL_CPUS. It may be updated via this function.
+ * @wait: The wait queue entry.
  *
- * If @cpu == RING_BUFFER_ALL_CPUS then the task will wake up as soon
- * as data is added to any of the @buffer's cpu buffers. Otherwise
- * it will wait for data to be added to a specific cpu buffer.
+ * This must be called before ring_buffer_wait(). It calls the prepare_to_wait()
+ * on @wait for the necessary wait queue defined by @buffer, @cpu, and @full.
  */
-int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full)
+int ring_buffer_prepare_to_wait(struct trace_buffer *buffer, int cpu, int *full,
+				 struct wait_queue_entry *wait)
 {
 	struct rb_irq_work *rbwork;
-	DEFINE_WAIT(wait);
-	int ret = 0;
 
-	rbwork = rb_get_work_queue(buffer, cpu, &full);
+	rbwork = rb_get_work_queue(buffer, cpu, full);
 	if (IS_ERR(rbwork))
 		return PTR_ERR(rbwork);
 
-	if (full)
-		prepare_to_wait(&rbwork->full_waiters, &wait, TASK_INTERRUPTIBLE);
+	if (*full)
+		prepare_to_wait(&rbwork->full_waiters, wait, TASK_INTERRUPTIBLE);
 	else
-		prepare_to_wait(&rbwork->waiters, &wait, TASK_INTERRUPTIBLE);
+		prepare_to_wait(&rbwork->waiters, wait, TASK_INTERRUPTIBLE);
 
 	/*
 	 * The events can happen in critical sections where
@@ -912,30 +912,66 @@  int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full)
 	 * that is necessary is that the wake up happens after
 	 * a task has been queued. It's OK for spurious wake ups.
 	 */
-	if (full)
+	if (*full)
 		rbwork->full_waiters_pending = true;
 	else
 		rbwork->waiters_pending = true;
 
-	if (rb_watermark_hit(buffer, cpu, full))
-		goto out;
+	return 0;
+}
 
-	if (signal_pending(current)) {
-		ret = -EINTR;
-		goto out;
-	}
+/**
+ * ring_buffer_finish_wait - clean up of ring_buffer_prepare_to_wait()
+ * @buffer: buffer to wait on
+ * @cpu: the cpu buffer to wait on
+ * @full: wait until the percentage of pages are available, if @cpu != RING_BUFFER_ALL_CPUS
+ * @wait: The wait queue entry.
+ *
+ * This must be called after ring_buffer_prepare_to_wait(). It cleans up
+ * the @wait for the queue defined by @buffer, @cpu, and @full.
+ */
+void ring_buffer_finish_wait(struct trace_buffer *buffer, int cpu, int full,
+				 struct wait_queue_entry *wait)
+{
+	struct rb_irq_work *rbwork;
+
+	rbwork = rb_get_work_queue(buffer, cpu, &full);
+	if (WARN_ON_ONCE(IS_ERR(rbwork)))
+		return;
 
-	schedule();
- out:
 	if (full)
-		finish_wait(&rbwork->full_waiters, &wait);
+		finish_wait(&rbwork->full_waiters, wait);
 	else
-		finish_wait(&rbwork->waiters, &wait);
+		finish_wait(&rbwork->waiters, wait);
+}
 
-	if (!ret && !rb_watermark_hit(buffer, cpu, full) && signal_pending(current))
-		ret = -EINTR;
+/**
+ * ring_buffer_wait - wait for input to the ring buffer
+ * @buffer: buffer to wait on
+ * @cpu: the cpu buffer to wait on
+ * @full: wait until the percentage of pages are available, if @cpu != RING_BUFFER_ALL_CPUS
+ *
+ * If @cpu == RING_BUFFER_ALL_CPUS then the task will wake up as soon
+ * as data is added to any of the @buffer's cpu buffers. Otherwise
+ * it will wait for data to be added to a specific cpu buffer.
+ *
+ * ring_buffer_prepare_to_wait() must be called before this function
+ * and ring_buffer_finish_wait() must be called after.
+ */
+int ring_buffer_wait(struct trace_buffer *buffer, int cpu, int full)
+{
+	if (rb_watermark_hit(buffer, cpu, full))
+		return 0;
 
-	return ret;
+	if (signal_pending(current))
+		return -EINTR;
+
+	schedule();
+
+	if (!rb_watermark_hit(buffer, cpu, full) && signal_pending(current))
+		return -EINTR;
+
+	return 0;
 }
 
 /**
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 4e8f6cdeafd5..790ce3ba2acb 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -1981,7 +1981,8 @@  static bool wait_woken_prepare(struct trace_iterator *iter, int *wait_index)
 	mutex_lock(&wait_mutex);
 	if (iter->waking)
 		woken = true;
-	*wait_index = iter->wait_index;
+	if (wait_index)
+		*wait_index = iter->wait_index;
 	mutex_unlock(&wait_mutex);
 
 	return woken;
@@ -2016,13 +2017,22 @@  static void wait_woken_clear(struct trace_iterator *iter)
 
 static int wait_on_pipe(struct trace_iterator *iter, int full)
 {
+	struct trace_buffer *buffer;
+	DEFINE_WAIT(wait);
 	int ret;
 
 	/* Iterators are static, they should be filled or empty */
 	if (trace_buffer_iter(iter, iter->cpu_file))
 		return 0;
 
-	ret = ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file, full);
+	buffer = iter->array_buffer->buffer;
+
+	ret = ring_buffer_prepare_to_wait(buffer, iter->cpu_file, &full, &wait);
+	if (ret < 0)
+		return ret;
+	if (!wait_woken_prepare(iter, NULL))
+		ret = ring_buffer_wait(buffer, iter->cpu_file, full);
+	ring_buffer_finish_wait(buffer, iter->cpu_file, full, &wait);
 
 #ifdef CONFIG_TRACER_MAX_TRACE
 	/*