diff mbox series

[4/6] tracing: Fix waking up tracing readers

Message ID 20240308184007.805898590@goodmis.org (mailing list archive)
State Superseded
Headers show
Series tracing/ring-buffer: Fix wakeup of ring buffer waiters | expand

Commit Message

Steven Rostedt March 8, 2024, 6:38 p.m. UTC
From: "Steven Rostedt (Google)" <rostedt@goodmis.org>

When the tracing_pipe_raw file is closed, if there are readers still
blocked on it, they need to be woken up. Currently a wait_index is used.
When the readers need to be woken, the index is updated and they are all
woken up.

But there is a race where a new reader could be coming in just as the file
is being closed, and it could still block if the wake up happens just
before the reader enters the wait.

Add another field called "waking" and wrap both the waking and wait_index
around a new wait_mutex to synchronize them.

When a reader comes in, it will save the current wait_index, but if waking
is set, then it will not block no matter what wait_index is.

After it wakes from the wait, if either the waking is set or the
wait_index is not the same as what it read before, then it will not block.

The waker will set waking and increment the wait_index. For the .flush()
function, it will not clear waking so that all new readers must not block.

There's an ioctl() that kicks all current waiters, but does not care about
new waiters. It will set the waking count back to what it was when it came
in.

There's still a race with the wait_on_pipe() with respect to the
ring_buffer_wait(), but that will be dealt with separately.

Link: https://lore.kernel.org/all/CAHk-=whs5MdtNjzFkTyaUy=vHi=qwWgPi0JgTe6OYUYMNSRZfg@mail.gmail.com/

Cc: stable@vger.kernel.org
Fixes: f3ddb74ad0790 ("tracing: Wake up ring buffer waiters on closing of the file")
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 include/linux/trace_events.h |   3 +-
 kernel/trace/trace.c         | 101 +++++++++++++++++++++++++++++------
 2 files changed, 86 insertions(+), 18 deletions(-)

Comments

Steven Rostedt March 8, 2024, 7:13 p.m. UTC | #1
On Fri, 08 Mar 2024 13:38:20 -0500
Steven Rostedt <rostedt@goodmis.org> wrote:

> +static DEFINE_MUTEX(wait_mutex);
> +
> +static bool wait_woken_prepare(struct trace_iterator *iter, int *wait_index)
> +{
> +	bool woken = false;
> +
> +	mutex_lock(&wait_mutex);
> +	if (iter->waking)
> +		woken = true;
> +	*wait_index = iter->wait_index;
> +	mutex_unlock(&wait_mutex);
> +
> +	return woken;
> +}

The last patch adds this code after a prepare_to_wait(), which triggered
the warning:

  do not call blocking ops when !TASK_RUNNING; state=1 set at [<00000000797e3e20>] prepare_to_wait+0x48/0xf0

Which is correct. The prepare_to_wait() set task state to
TASK_INTERRUPTIBLE, so I can not call a mutex after that.

I'll send a v2 where I switch this over to spin locks.

-- Steve


> +
> +static bool wait_woken_check(struct trace_iterator *iter, int *wait_index)
> +{
> +	bool woken = false;
> +
> +	mutex_lock(&wait_mutex);
> +	if (iter->waking || *wait_index != iter->wait_index)
> +		woken = true;
> +	mutex_unlock(&wait_mutex);
> +
> +	return woken;
> +}
> +
> +static void wait_woken_set(struct trace_iterator *iter)
> +{
> +	mutex_lock(&wait_mutex);
> +	iter->waking++;
> +	iter->wait_index++;
> +	mutex_unlock(&wait_mutex);
> +}
> +
> +static void wait_woken_clear(struct trace_iterator *iter)
> +{
> +	mutex_lock(&wait_mutex);
> +	iter->waking--;
> +	mutex_unlock(&wait_mutex);
> +}
> +
diff mbox series

Patch

diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
index d68ff9b1247f..adf8e163a7be 100644
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -103,7 +103,8 @@  struct trace_iterator {
 	unsigned int		temp_size;
 	char			*fmt;	/* modified format holder */
 	unsigned int		fmt_size;
-	long			wait_index;
+	int			wait_index;
+	int			waking;	/* set by a waker */
 
 	/* trace_seq for __print_flags() and __print_symbolic() etc. */
 	struct trace_seq	tmp_seq;
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index c9c898307348..4e8f6cdeafd5 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -1955,6 +1955,65 @@  update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
 
 #endif /* CONFIG_TRACER_MAX_TRACE */
 
+/*
+ * In order to wake up readers and have them return back to user space,
+ * the iterator has two counters:
+ *
+ *  wait_index - always increases every time a waker wakes up the readers.
+ *  waking - Set by the waker when waking and cleared afterward.
+ *
+ * Both are protected together with the wait_mutex.
+ * When waking, the lock is taken and both indexes are incremented.
+ * The reader will first prepare the wait by taking the lock,
+ * if waking is set, it will sleep regardless of what wait_index is.
+ * Then after it sleeps it checks if wait_index has been updated
+ * and if it has, it will not sleep again.
+ *
+ * Note, if wait_woken_clear() is not called, then all new readers
+ * will not sleep (this happens in closing the file).
+ */
+static DEFINE_MUTEX(wait_mutex);
+
+static bool wait_woken_prepare(struct trace_iterator *iter, int *wait_index)
+{
+	bool woken = false;
+
+	mutex_lock(&wait_mutex);
+	if (iter->waking)
+		woken = true;
+	*wait_index = iter->wait_index;
+	mutex_unlock(&wait_mutex);
+
+	return woken;
+}
+
+static bool wait_woken_check(struct trace_iterator *iter, int *wait_index)
+{
+	bool woken = false;
+
+	mutex_lock(&wait_mutex);
+	if (iter->waking || *wait_index != iter->wait_index)
+		woken = true;
+	mutex_unlock(&wait_mutex);
+
+	return woken;
+}
+
+static void wait_woken_set(struct trace_iterator *iter)
+{
+	mutex_lock(&wait_mutex);
+	iter->waking++;
+	iter->wait_index++;
+	mutex_unlock(&wait_mutex);
+}
+
+static void wait_woken_clear(struct trace_iterator *iter)
+{
+	mutex_lock(&wait_mutex);
+	iter->waking--;
+	mutex_unlock(&wait_mutex);
+}
+
 static int wait_on_pipe(struct trace_iterator *iter, int full)
 {
 	int ret;
@@ -8312,9 +8371,11 @@  tracing_buffers_read(struct file *filp, char __user *ubuf,
 	struct ftrace_buffer_info *info = filp->private_data;
 	struct trace_iterator *iter = &info->iter;
 	void *trace_data;
+	int wait_index;
 	int page_size;
 	ssize_t ret = 0;
 	ssize_t size;
+	bool woken;
 
 	if (!count)
 		return 0;
@@ -8353,6 +8414,7 @@  tracing_buffers_read(struct file *filp, char __user *ubuf,
 	if (info->read < page_size)
 		goto read;
 
+	woken = wait_woken_prepare(iter, &wait_index);
  again:
 	trace_access_lock(iter->cpu_file);
 	ret = ring_buffer_read_page(iter->array_buffer->buffer,
@@ -8362,7 +8424,7 @@  tracing_buffers_read(struct file *filp, char __user *ubuf,
 	trace_access_unlock(iter->cpu_file);
 
 	if (ret < 0) {
-		if (trace_empty(iter)) {
+		if (trace_empty(iter) && !woken) {
 			if ((filp->f_flags & O_NONBLOCK))
 				return -EAGAIN;
 
@@ -8370,6 +8432,8 @@  tracing_buffers_read(struct file *filp, char __user *ubuf,
 			if (ret)
 				return ret;
 
+			woken = wait_woken_check(iter, &wait_index);
+
 			goto again;
 		}
 		return 0;
@@ -8398,12 +8462,14 @@  static int tracing_buffers_flush(struct file *file, fl_owner_t id)
 	struct ftrace_buffer_info *info = file->private_data;
 	struct trace_iterator *iter = &info->iter;
 
-	iter->wait_index++;
-	/* Make sure the waiters see the new wait_index */
-	smp_wmb();
+	wait_woken_set(iter);
 
 	ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
 
+	/*
+	 * Do not call wait_woken_clear(), as the file is being closed.
+	 * this will prevent any new readers from sleeping.
+	 */
 	return 0;
 }
 
@@ -8500,9 +8566,11 @@  tracing_buffers_splice_read(struct file *file, loff_t *ppos,
 		.spd_release	= buffer_spd_release,
 	};
 	struct buffer_ref *ref;
+	int wait_index;
 	int page_size;
 	int entries, i;
 	ssize_t ret = 0;
+	bool woken = false;
 
 #ifdef CONFIG_TRACER_MAX_TRACE
 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
@@ -8522,6 +8590,7 @@  tracing_buffers_splice_read(struct file *file, loff_t *ppos,
 	if (splice_grow_spd(pipe, &spd))
 		return -ENOMEM;
 
+	woken = wait_woken_prepare(iter, &wait_index);
  again:
 	trace_access_lock(iter->cpu_file);
 	entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
@@ -8573,17 +8642,17 @@  tracing_buffers_splice_read(struct file *file, loff_t *ppos,
 
 	/* did we read anything? */
 	if (!spd.nr_pages) {
-		long wait_index;
 
 		if (ret)
 			goto out;
 
+		if (woken)
+			goto out;
+
 		ret = -EAGAIN;
 		if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
 			goto out;
 
-		wait_index = READ_ONCE(iter->wait_index);
-
 		ret = wait_on_pipe(iter, iter->snapshot ? 0 : iter->tr->buffer_percent);
 		if (ret)
 			goto out;
@@ -8592,10 +8661,7 @@  tracing_buffers_splice_read(struct file *file, loff_t *ppos,
 		if (!tracer_tracing_is_on(iter->tr))
 			goto out;
 
-		/* Make sure we see the new wait_index */
-		smp_rmb();
-		if (wait_index != iter->wait_index)
-			goto out;
+		woken = wait_woken_check(iter, &wait_index);
 
 		goto again;
 	}
@@ -8616,15 +8682,16 @@  static long tracing_buffers_ioctl(struct file *file, unsigned int cmd, unsigned
 	if (cmd)
 		return -ENOIOCTLCMD;
 
-	mutex_lock(&trace_types_lock);
-
-	iter->wait_index++;
-	/* Make sure the waiters see the new wait_index */
-	smp_wmb();
+	wait_woken_set(iter);
 
 	ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
 
-	mutex_unlock(&trace_types_lock);
+	/*
+	 * This just kicks existing readers, a new reader coming in may
+	 * still sleep.
+	 */
+	wait_woken_clear(iter);
+
 	return 0;
 }