Message ID | 20230818022645.1948314-1-zhengyejian1@huawei.com (mailing list archive) |
---|---|
State | Accepted |
Commit | c2489bb7e6be2e8cdced12c16c42fa128403ac03 |
Headers | show |
Series | [v2] tracing: Introduce pipe_cpumask to avoid race on trace_pipes | expand |
On Fri, 18 Aug 2023 10:26:45 +0800 Zheng Yejian <zhengyejian1@huawei.com> wrote: > There is race issue when concurrently splice_read main trace_pipe and > per_cpu trace_pipes which will result in data read out being different > from what actually writen. > > As suggested by Steven: > > I believe we should add a ref count to trace_pipe and the per_cpu > > trace_pipes, where if they are opened, nothing else can read it. > > > > Opening trace_pipe locks all per_cpu ref counts, if any of them are > > open, then the trace_pipe open will fail (and releases any ref counts > > it had taken). > > > > Opening a per_cpu trace_pipe will up the ref count for just that > > CPU buffer. This will allow multiple tasks to read different per_cpu > > trace_pipe files, but will prevent the main trace_pipe file from > > being opened. > > But because we only need to know whether per_cpu trace_pipe is open or > not, using a cpumask instead of using ref count may be easier. > > After this patch, users will find that: > - Main trace_pipe can be opened by only one user, and if it is > opened, all per_cpu trace_pipes cannot be opened; > - Per_cpu trace_pipes can be opened by multiple users, but each per_cpu > trace_pipe can only be opened by one user. And if one of them is > opened, main trace_pipe cannot be opened. > > Suggested-by: Steven Rostedt (Google) <rostedt@goodmis.org> > Signed-off-by: Zheng Yejian <zhengyejian1@huawei.com> Looks good to me. Reviewed-by: Masami Hiramatsu (Google) <mhiramat@kernel.org> BTW, this reminds me that splice read support had been introduced for virtio-trace support. I've tested it with/without this patch, but I hit a different issue, which seems like a virtio-serial issue (per-cpu buffers on the guest are spliced correctly, but the data can not be read from host side). Let me investigate it. Thank you, > --- > > v2: > - In open_pipe_on_cpu(), clean format of if statements. > - In tracing_open_pipe(), call tracing_get_cpu() after > tracing_check_open_get_tr() and within trace_types_lock, > also keep "int ret;" as the last declaration. > Link: https://lore.kernel.org/all/20230817101331.21ab6b33@gandalf.local.home/ > > v1: > - Link: https://lore.kernel.org/all/20230817115057.1637676-1-zhengyejian1@huawei.com/ > > kernel/trace/trace.c | 55 ++++++++++++++++++++++++++++++++++++++------ > kernel/trace/trace.h | 2 ++ > 2 files changed, 50 insertions(+), 7 deletions(-) > > diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c > index b8870078ef58..c888a0a2c0e2 100644 > --- a/kernel/trace/trace.c > +++ b/kernel/trace/trace.c > @@ -6705,10 +6705,36 @@ tracing_max_lat_write(struct file *filp, const char __user *ubuf, > > #endif > > +static int open_pipe_on_cpu(struct trace_array *tr, int cpu) > +{ > + if (cpu == RING_BUFFER_ALL_CPUS) { > + if (cpumask_empty(tr->pipe_cpumask)) { > + cpumask_setall(tr->pipe_cpumask); > + return 0; > + } > + } else if (!cpumask_test_cpu(cpu, tr->pipe_cpumask)) { > + cpumask_set_cpu(cpu, tr->pipe_cpumask); > + return 0; > + } > + return -EBUSY; > +} > + > +static void close_pipe_on_cpu(struct trace_array *tr, int cpu) > +{ > + if (cpu == RING_BUFFER_ALL_CPUS) { > + WARN_ON(!cpumask_full(tr->pipe_cpumask)); > + cpumask_clear(tr->pipe_cpumask); > + } else { > + WARN_ON(!cpumask_test_cpu(cpu, tr->pipe_cpumask)); > + cpumask_clear_cpu(cpu, tr->pipe_cpumask); > + } > +} > + > static int tracing_open_pipe(struct inode *inode, struct file *filp) > { > struct trace_array *tr = inode->i_private; > struct trace_iterator *iter; > + int cpu; > int ret; > > ret = tracing_check_open_get_tr(tr); > @@ -6716,13 +6742,16 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp) > return ret; > > mutex_lock(&trace_types_lock); > + cpu = tracing_get_cpu(inode); > + ret = open_pipe_on_cpu(tr, cpu); > + if (ret) > + goto fail_pipe_on_cpu; > > /* create a buffer to store the information to pass to userspace */ > iter = kzalloc(sizeof(*iter), GFP_KERNEL); > if (!iter) { > ret = -ENOMEM; > - __trace_array_put(tr); > - goto out; > + goto fail_alloc_iter; > } > > trace_seq_init(&iter->seq); > @@ -6745,7 +6774,7 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp) > > iter->tr = tr; > iter->array_buffer = &tr->array_buffer; > - iter->cpu_file = tracing_get_cpu(inode); > + iter->cpu_file = cpu; > mutex_init(&iter->mutex); > filp->private_data = iter; > > @@ -6755,12 +6784,15 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp) > nonseekable_open(inode, filp); > > tr->trace_ref++; > -out: > + > mutex_unlock(&trace_types_lock); > return ret; > > fail: > kfree(iter); > +fail_alloc_iter: > + close_pipe_on_cpu(tr, cpu); > +fail_pipe_on_cpu: > __trace_array_put(tr); > mutex_unlock(&trace_types_lock); > return ret; > @@ -6777,7 +6809,7 @@ static int tracing_release_pipe(struct inode *inode, struct file *file) > > if (iter->trace->pipe_close) > iter->trace->pipe_close(iter); > - > + close_pipe_on_cpu(tr, iter->cpu_file); > mutex_unlock(&trace_types_lock); > > free_cpumask_var(iter->started); > @@ -9441,6 +9473,9 @@ static struct trace_array *trace_array_create(const char *name) > if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL)) > goto out_free_tr; > > + if (!alloc_cpumask_var(&tr->pipe_cpumask, GFP_KERNEL)) > + goto out_free_tr; > + > tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS; > > cpumask_copy(tr->tracing_cpumask, cpu_all_mask); > @@ -9482,6 +9517,7 @@ static struct trace_array *trace_array_create(const char *name) > out_free_tr: > ftrace_free_ftrace_ops(tr); > free_trace_buffers(tr); > + free_cpumask_var(tr->pipe_cpumask); > free_cpumask_var(tr->tracing_cpumask); > kfree(tr->name); > kfree(tr); > @@ -9584,6 +9620,7 @@ static int __remove_instance(struct trace_array *tr) > } > kfree(tr->topts); > > + free_cpumask_var(tr->pipe_cpumask); > free_cpumask_var(tr->tracing_cpumask); > kfree(tr->name); > kfree(tr); > @@ -10381,12 +10418,14 @@ __init static int tracer_alloc_buffers(void) > if (trace_create_savedcmd() < 0) > goto out_free_temp_buffer; > > + if (!alloc_cpumask_var(&global_trace.pipe_cpumask, GFP_KERNEL)) > + goto out_free_savedcmd; > + > /* TODO: make the number of buffers hot pluggable with CPUS */ > if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) { > MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n"); > - goto out_free_savedcmd; > + goto out_free_pipe_cpumask; > } > - > if (global_trace.buffer_disabled) > tracing_off(); > > @@ -10439,6 +10478,8 @@ __init static int tracer_alloc_buffers(void) > > return 0; > > +out_free_pipe_cpumask: > + free_cpumask_var(global_trace.pipe_cpumask); > out_free_savedcmd: > free_saved_cmdlines_buffer(savedcmd); > out_free_temp_buffer: > diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h > index e1edc2197fc8..53ac0f7780c2 100644 > --- a/kernel/trace/trace.h > +++ b/kernel/trace/trace.h > @@ -377,6 +377,8 @@ struct trace_array { > struct list_head events; > struct trace_event_file *trace_marker_file; > cpumask_var_t tracing_cpumask; /* only trace on set CPUs */ > + /* one per_cpu trace_pipe can be opened by only one user */ > + cpumask_var_t pipe_cpumask; > int ref; > int trace_ref; > #ifdef CONFIG_FUNCTION_TRACER > -- > 2.25.1 >
On Fri, 18 Aug 2023 14:03:09 +0900 Masami Hiramatsu (Google) <mhiramat@kernel.org> wrote: > Looks good to me. > > Reviewed-by: Masami Hiramatsu (Google) <mhiramat@kernel.org> > > BTW, this reminds me that splice read support had been introduced for > virtio-trace support. I've tested it with/without this patch, but I hit > a different issue, which seems like a virtio-serial issue (per-cpu buffers > on the guest are spliced correctly, but the data can not be read from host > side). Let me investigate it. Is virtio-trace using trace_pipe or trace_pipe_raw? Those two are handled differently. -- Steve
On Fri, 18 Aug 2023 09:41:28 -0400 Steven Rostedt <rostedt@goodmis.org> wrote: > On Fri, 18 Aug 2023 14:03:09 +0900 > Masami Hiramatsu (Google) <mhiramat@kernel.org> wrote: > > > Looks good to me. > > > > Reviewed-by: Masami Hiramatsu (Google) <mhiramat@kernel.org> > > > > BTW, this reminds me that splice read support had been introduced for > > virtio-trace support. I've tested it with/without this patch, but I hit > > a different issue, which seems like a virtio-serial issue (per-cpu buffers > > on the guest are spliced correctly, but the data can not be read from host > > side). Let me investigate it. > > Is virtio-trace using trace_pipe or trace_pipe_raw? Those two are handled > differently. It uses trace_pipe_raw. I guess if splice(from trace_pipe_raw to virtio-serial) returns -1 and errno == EAGAIN, the trace data will be lost? Thank you, > > -- Steve
On Fri, 18 Aug 2023 23:23:01 +0900 Masami Hiramatsu (Google) <mhiramat@kernel.org> wrote: > It uses trace_pipe_raw. I guess if splice(from trace_pipe_raw to virtio-serial) > returns -1 and errno == EAGAIN, the trace data will be lost? It shouldn't. If it does, then there's likely a bug. The code will block and if an interrupt comes in it will return immediately without reading from the buffer. https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/kernel/trace/trace.c#n8262 I don't see where it would return -EINTR and consume data, but I may be missing something. -- Steve
On Fri, 18 Aug 2023 11:53:22 -0400 Steven Rostedt <rostedt@goodmis.org> wrote: > On Fri, 18 Aug 2023 23:23:01 +0900 > Masami Hiramatsu (Google) <mhiramat@kernel.org> wrote: > > > It uses trace_pipe_raw. I guess if splice(from trace_pipe_raw to virtio-serial) > > returns -1 and errno == EAGAIN, the trace data will be lost? > > It shouldn't. If it does, then there's likely a bug. The code will block > and if an interrupt comes in it will return immediately without reading > from the buffer. > > https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/kernel/trace/trace.c#n8262 > > I don't see where it would return -EINTR and consume data, but I may be > missing something. Hmm, I suspect the case if the spilice_to_pipe() returns -EAGAIN. https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/kernel/trace/trace.c#n8491 It seems not handling such case. Anyway, I also think something wrong in virtio-serial (or misusing?), since it can not read anything from the host sometimes. I just setup the virtio-trace with below patch (ignore EAGAIN). From 92242480285448360c9390a743ea7b3751bb3e61 Mon Sep 17 00:00:00 2001 From: "Masami Hiramatsu (Google)" <mhiramat@kernel.org> Date: Thu, 17 Aug 2023 14:08:40 +0900 Subject: [PATCH 1/3] tools/virtio-trace: Ignore EAGAIN error on splice() splice() can return EAGAIN error instead of returning 0 size read. In that case, wait a while and try to call splice() again. Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org> --- tools/virtio/virtio-trace/trace-agent-rw.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tools/virtio/virtio-trace/trace-agent-rw.c b/tools/virtio/virtio-trace/trace-agent-rw.c index ddfe7875eb16..e8a4c4f0c499 100644 --- a/tools/virtio/virtio-trace/trace-agent-rw.c +++ b/tools/virtio/virtio-trace/trace-agent-rw.c @@ -8,6 +8,7 @@ */ #define _GNU_SOURCE +#include <errno.h> #include <fcntl.h> #include <stdio.h> #include <stdlib.h> @@ -127,10 +128,10 @@ static void *rw_thread_main(void *thread_info) rlen = splice(ts->in_fd, NULL, ts->read_pipe, NULL, ts->pipe_size, SPLICE_F_MOVE | SPLICE_F_MORE); - if (rlen < 0) { - pr_err("Splice_read in rw-thread(%d)\n", ts->cpu_num); + if (rlen < 0 && errno != EAGAIN) { + pr_err("Splice_read error (%d) in rw-thread(%d)\n", errno, ts->cpu_num); goto error; - } else if (rlen == 0) { + } else if (rlen == 0 || errno == EAGAIN) { /* * If trace data do not exist or are unreadable not * for exceeding the page size, splice_read returns
On Sat, 19 Aug 2023 10:42:57 +0900 Masami Hiramatsu (Google) <mhiramat@kernel.org> wrote: > On Fri, 18 Aug 2023 11:53:22 -0400 > Steven Rostedt <rostedt@goodmis.org> wrote: > > > On Fri, 18 Aug 2023 23:23:01 +0900 > > Masami Hiramatsu (Google) <mhiramat@kernel.org> wrote: > > > > > It uses trace_pipe_raw. I guess if splice(from trace_pipe_raw to virtio-serial) > > > returns -1 and errno == EAGAIN, the trace data will be lost? > > > > It shouldn't. If it does, then there's likely a bug. The code will block > > and if an interrupt comes in it will return immediately without reading > > from the buffer. > > > > https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/kernel/trace/trace.c#n8262 > > > > I don't see where it would return -EINTR and consume data, but I may be > > missing something. > > Hmm, I suspect the case if the spilice_to_pipe() returns -EAGAIN. > > https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/kernel/trace/trace.c#n8491 > > It seems not handling such case. > > Anyway, I also think something wrong in virtio-serial (or misusing?), since > it can not read anything from the host sometimes. I just setup the virtio-trace > with below patch (ignore EAGAIN). Hmm, I couldn't reproduce it. (maybe a host security update change something?) Anyway, I confirmed that the ring buffer pages will not be consumed unless splice_to_pipe() succeeded. Thank you, > > > From 92242480285448360c9390a743ea7b3751bb3e61 Mon Sep 17 00:00:00 2001 > From: "Masami Hiramatsu (Google)" <mhiramat@kernel.org> > Date: Thu, 17 Aug 2023 14:08:40 +0900 > Subject: [PATCH 1/3] tools/virtio-trace: Ignore EAGAIN error on splice() > > splice() can return EAGAIN error instead of returning 0 size read. > In that case, wait a while and try to call splice() again. > > Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org> > --- > tools/virtio/virtio-trace/trace-agent-rw.c | 7 ++++--- > 1 file changed, 4 insertions(+), 3 deletions(-) > > diff --git a/tools/virtio/virtio-trace/trace-agent-rw.c b/tools/virtio/virtio-trace/trace-agent-rw.c > index ddfe7875eb16..e8a4c4f0c499 100644 > --- a/tools/virtio/virtio-trace/trace-agent-rw.c > +++ b/tools/virtio/virtio-trace/trace-agent-rw.c > @@ -8,6 +8,7 @@ > */ > > #define _GNU_SOURCE > +#include <errno.h> > #include <fcntl.h> > #include <stdio.h> > #include <stdlib.h> > @@ -127,10 +128,10 @@ static void *rw_thread_main(void *thread_info) > rlen = splice(ts->in_fd, NULL, ts->read_pipe, NULL, > ts->pipe_size, SPLICE_F_MOVE | SPLICE_F_MORE); > > - if (rlen < 0) { > - pr_err("Splice_read in rw-thread(%d)\n", ts->cpu_num); > + if (rlen < 0 && errno != EAGAIN) { > + pr_err("Splice_read error (%d) in rw-thread(%d)\n", errno, ts->cpu_num); > goto error; > - } else if (rlen == 0) { > + } else if (rlen == 0 || errno == EAGAIN) { > /* > * If trace data do not exist or are unreadable not > * for exceeding the page size, splice_read returns > -- > 2.34.1 > > > -- > Masami Hiramatsu (Google) <mhiramat@kernel.org>
On Sat, 19 Aug 2023 10:42:57 +0900 Masami Hiramatsu (Google) <mhiramat@kernel.org> wrote: > On Fri, 18 Aug 2023 11:53:22 -0400 > Steven Rostedt <rostedt@goodmis.org> wrote: > > > On Fri, 18 Aug 2023 23:23:01 +0900 > > Masami Hiramatsu (Google) <mhiramat@kernel.org> wrote: > > > > > It uses trace_pipe_raw. I guess if splice(from trace_pipe_raw to virtio-serial) > > > returns -1 and errno == EAGAIN, the trace data will be lost? > > > > It shouldn't. If it does, then there's likely a bug. The code will block > > and if an interrupt comes in it will return immediately without reading > > from the buffer. > > > > https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/kernel/trace/trace.c#n8262 > > > > I don't see where it would return -EINTR and consume data, but I may be > > missing something. > > Hmm, I suspect the case if the spilice_to_pipe() returns -EAGAIN. > > https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/kernel/trace/trace.c#n8491 > > It seems not handling such case. > > Anyway, I also think something wrong in virtio-serial (or misusing?), since > it can not read anything from the host sometimes. I just setup the virtio-trace > with below patch (ignore EAGAIN). > > > From 92242480285448360c9390a743ea7b3751bb3e61 Mon Sep 17 00:00:00 2001 > From: "Masami Hiramatsu (Google)" <mhiramat@kernel.org> > Date: Thu, 17 Aug 2023 14:08:40 +0900 > Subject: [PATCH 1/3] tools/virtio-trace: Ignore EAGAIN error on splice() > > splice() can return EAGAIN error instead of returning 0 size read. > In that case, wait a while and try to call splice() again. > > Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org> > --- > tools/virtio/virtio-trace/trace-agent-rw.c | 7 ++++--- > 1 file changed, 4 insertions(+), 3 deletions(-) > > diff --git a/tools/virtio/virtio-trace/trace-agent-rw.c b/tools/virtio/virtio-trace/trace-agent-rw.c > index ddfe7875eb16..e8a4c4f0c499 100644 > --- a/tools/virtio/virtio-trace/trace-agent-rw.c > +++ b/tools/virtio/virtio-trace/trace-agent-rw.c > @@ -8,6 +8,7 @@ > */ > > #define _GNU_SOURCE > +#include <errno.h> > #include <fcntl.h> > #include <stdio.h> > #include <stdlib.h> > @@ -127,10 +128,10 @@ static void *rw_thread_main(void *thread_info) > rlen = splice(ts->in_fd, NULL, ts->read_pipe, NULL, > ts->pipe_size, SPLICE_F_MOVE | SPLICE_F_MORE); > > - if (rlen < 0) { > - pr_err("Splice_read in rw-thread(%d)\n", ts->cpu_num); > + if (rlen < 0 && errno != EAGAIN) { > + pr_err("Splice_read error (%d) in rw-thread(%d)\n", errno, ts->cpu_num); > goto error; > - } else if (rlen == 0) { > + } else if (rlen == 0 || errno == EAGAIN) { Ah, this caused a drop. errno can be EAGAIN even if rlen > 0. I've fixed this and that works. BTW, I think this virtio-trace would be better to move under tools/tracing because it is a tracing tool. Thanks, > /* > * If trace data do not exist or are unreadable not > * for exceeding the page size, splice_read returns > -- > 2.34.1 > > > -- > Masami Hiramatsu (Google) <mhiramat@kernel.org>
On Mon, 21 Aug 2023 11:19:54 +0900 Masami Hiramatsu (Google) <mhiramat@kernel.org> wrote: > Ah, this caused a drop. errno can be EAGAIN even if rlen > 0. > I've fixed this and that works. > BTW, I think this virtio-trace would be better to move under > tools/tracing because it is a tracing tool. I'm fine with that, as where it is, I'm very unfamiliar with this tool. It is likely not taking advantage of all the tracing tooling we have. I actually never even used it. -- Steve
On Sun, 20 Aug 2023 22:33:01 -0400 Steven Rostedt <rostedt@goodmis.org> wrote: > On Mon, 21 Aug 2023 11:19:54 +0900 > Masami Hiramatsu (Google) <mhiramat@kernel.org> wrote: > > > Ah, this caused a drop. errno can be EAGAIN even if rlen > 0. > > I've fixed this and that works. > > BTW, I think this virtio-trace would be better to move under > > tools/tracing because it is a tracing tool. > > I'm fine with that, as where it is, I'm very unfamiliar with this tool. > It is likely not taking advantage of all the tracing tooling we > have. I actually never even used it. Actually I also used this after a long time. :P I think this is a kind of simplest splice support test tool. What the tool does; (guest side) per_cpu/cpu*/trace_pipe_raw | (splice) | anon-pipe | (splice) | virtio-serial chardev | = virtqueue === | named-fifo (host-side) So that we can move the traced data (page) from the ring buffer to virtqueue. Then host tool can read the trace data without copying. (The host part needs a copy to read or write to file.) Obviously, this requires some integration work with other tracing tools, because this is just a "fastest trace-data dumper". (I think Yoshihiro worked that integration, but it was not updated) https://lkml.org/lkml/2013/9/12/788 Thank you,
On Sat, 19 Aug 2023 10:42:57 +0900 Masami Hiramatsu (Google) <mhiramat@kernel.org> wrote: > >From 92242480285448360c9390a743ea7b3751bb3e61 Mon Sep 17 00:00:00 2001 > From: "Masami Hiramatsu (Google)" <mhiramat@kernel.org> > Date: Thu, 17 Aug 2023 14:08:40 +0900 > Subject: [PATCH 1/3] tools/virtio-trace: Ignore EAGAIN error on splice() > > splice() can return EAGAIN error instead of returning 0 size read. > In that case, wait a while and try to call splice() again. > > Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org> Bah, When I pulled in Zheng's patch from patchwork, it included the above signed-off-by. I was confused to why it added your SoB. Probably best if we had started this conversation on another thread and not hijack the discussion about this patch. -- Steve > --- > tools/virtio/virtio-trace/trace-agent-rw.c | 7 ++++--- > 1 file changed, 4 insertions(+), 3 deletions(-)
On Mon, 21 Aug 2023 11:17:25 -0400 Steven Rostedt <rostedt@goodmis.org> wrote: > On Sat, 19 Aug 2023 10:42:57 +0900 > Masami Hiramatsu (Google) <mhiramat@kernel.org> wrote: > > > >From 92242480285448360c9390a743ea7b3751bb3e61 Mon Sep 17 00:00:00 2001 > > From: "Masami Hiramatsu (Google)" <mhiramat@kernel.org> > > Date: Thu, 17 Aug 2023 14:08:40 +0900 > > Subject: [PATCH 1/3] tools/virtio-trace: Ignore EAGAIN error on splice() > > > > splice() can return EAGAIN error instead of returning 0 size read. > > In that case, wait a while and try to call splice() again. > > > > Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org> > > Bah, > > When I pulled in Zheng's patch from patchwork, it included the above > signed-off-by. I was confused to why it added your SoB. Oops, I didn't noticed such patchwork behavior. > > Probably best if we had started this conversation on another thread and > not hijack the discussion about this patch. Agreed. Sorry for confusion. > > -- Steve > > > > --- > > tools/virtio/virtio-trace/trace-agent-rw.c | 7 ++++--- > > 1 file changed, 4 insertions(+), 3 deletions(-) >
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index b8870078ef58..c888a0a2c0e2 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -6705,10 +6705,36 @@ tracing_max_lat_write(struct file *filp, const char __user *ubuf, #endif +static int open_pipe_on_cpu(struct trace_array *tr, int cpu) +{ + if (cpu == RING_BUFFER_ALL_CPUS) { + if (cpumask_empty(tr->pipe_cpumask)) { + cpumask_setall(tr->pipe_cpumask); + return 0; + } + } else if (!cpumask_test_cpu(cpu, tr->pipe_cpumask)) { + cpumask_set_cpu(cpu, tr->pipe_cpumask); + return 0; + } + return -EBUSY; +} + +static void close_pipe_on_cpu(struct trace_array *tr, int cpu) +{ + if (cpu == RING_BUFFER_ALL_CPUS) { + WARN_ON(!cpumask_full(tr->pipe_cpumask)); + cpumask_clear(tr->pipe_cpumask); + } else { + WARN_ON(!cpumask_test_cpu(cpu, tr->pipe_cpumask)); + cpumask_clear_cpu(cpu, tr->pipe_cpumask); + } +} + static int tracing_open_pipe(struct inode *inode, struct file *filp) { struct trace_array *tr = inode->i_private; struct trace_iterator *iter; + int cpu; int ret; ret = tracing_check_open_get_tr(tr); @@ -6716,13 +6742,16 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp) return ret; mutex_lock(&trace_types_lock); + cpu = tracing_get_cpu(inode); + ret = open_pipe_on_cpu(tr, cpu); + if (ret) + goto fail_pipe_on_cpu; /* create a buffer to store the information to pass to userspace */ iter = kzalloc(sizeof(*iter), GFP_KERNEL); if (!iter) { ret = -ENOMEM; - __trace_array_put(tr); - goto out; + goto fail_alloc_iter; } trace_seq_init(&iter->seq); @@ -6745,7 +6774,7 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp) iter->tr = tr; iter->array_buffer = &tr->array_buffer; - iter->cpu_file = tracing_get_cpu(inode); + iter->cpu_file = cpu; mutex_init(&iter->mutex); filp->private_data = iter; @@ -6755,12 +6784,15 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp) nonseekable_open(inode, filp); tr->trace_ref++; -out: + mutex_unlock(&trace_types_lock); return ret; fail: kfree(iter); +fail_alloc_iter: + close_pipe_on_cpu(tr, cpu); +fail_pipe_on_cpu: __trace_array_put(tr); mutex_unlock(&trace_types_lock); return ret; @@ -6777,7 +6809,7 @@ static int tracing_release_pipe(struct inode *inode, struct file *file) if (iter->trace->pipe_close) iter->trace->pipe_close(iter); - + close_pipe_on_cpu(tr, iter->cpu_file); mutex_unlock(&trace_types_lock); free_cpumask_var(iter->started); @@ -9441,6 +9473,9 @@ static struct trace_array *trace_array_create(const char *name) if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL)) goto out_free_tr; + if (!alloc_cpumask_var(&tr->pipe_cpumask, GFP_KERNEL)) + goto out_free_tr; + tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS; cpumask_copy(tr->tracing_cpumask, cpu_all_mask); @@ -9482,6 +9517,7 @@ static struct trace_array *trace_array_create(const char *name) out_free_tr: ftrace_free_ftrace_ops(tr); free_trace_buffers(tr); + free_cpumask_var(tr->pipe_cpumask); free_cpumask_var(tr->tracing_cpumask); kfree(tr->name); kfree(tr); @@ -9584,6 +9620,7 @@ static int __remove_instance(struct trace_array *tr) } kfree(tr->topts); + free_cpumask_var(tr->pipe_cpumask); free_cpumask_var(tr->tracing_cpumask); kfree(tr->name); kfree(tr); @@ -10381,12 +10418,14 @@ __init static int tracer_alloc_buffers(void) if (trace_create_savedcmd() < 0) goto out_free_temp_buffer; + if (!alloc_cpumask_var(&global_trace.pipe_cpumask, GFP_KERNEL)) + goto out_free_savedcmd; + /* TODO: make the number of buffers hot pluggable with CPUS */ if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) { MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n"); - goto out_free_savedcmd; + goto out_free_pipe_cpumask; } - if (global_trace.buffer_disabled) tracing_off(); @@ -10439,6 +10478,8 @@ __init static int tracer_alloc_buffers(void) return 0; +out_free_pipe_cpumask: + free_cpumask_var(global_trace.pipe_cpumask); out_free_savedcmd: free_saved_cmdlines_buffer(savedcmd); out_free_temp_buffer: diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index e1edc2197fc8..53ac0f7780c2 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -377,6 +377,8 @@ struct trace_array { struct list_head events; struct trace_event_file *trace_marker_file; cpumask_var_t tracing_cpumask; /* only trace on set CPUs */ + /* one per_cpu trace_pipe can be opened by only one user */ + cpumask_var_t pipe_cpumask; int ref; int trace_ref; #ifdef CONFIG_FUNCTION_TRACER