Message ID | 171723016594.258703.1629777910752596529.stgit@devnote2 (mailing list archive) |
---|---|
State | Superseded |
Delegated to: | Masami Hiramatsu |
Headers | show |
Series | tracing/probes: Support tracepoint events on modules | expand |
On 2024-06-01 04:22, Masami Hiramatsu (Google) wrote: > From: Masami Hiramatsu (Google) <mhiramat@kernel.org> > > Support raw tracepoint event on module by fprobe events. > Since it only uses for_each_kernel_tracepoint() to find a tracepoint, > the tracepoints on modules are not handled. Thus if user specified a > tracepoint on a module, it shows an error. > This adds new for_each_module_tracepoint() API to tracepoint subsystem, > and uses it to find tracepoints on modules. Hi Masami, Why prevent module unload when a fprobe tracepoint is attached to a module ? This changes the kernel's behavior significantly just for the sake of instrumentation. As an alternative, LTTng-modules attach/detach to/from modules with the coming/going notifiers, so the instrumentation gets removed when a module is unloaded rather than preventing its unload by holding a module reference count. I would recommend a similar approach for fprobe. Thanks, Mathieu > > Reported-by: don <zds100@gmail.com> > Closes: https://lore.kernel.org/all/20240530215718.aeec973a1d0bf058d39cb1e3@kernel.org/ > Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org> > --- > Changes in v2: > - Fix build errors with CONFIG_MODULES=y. > --- > kernel/trace/trace_fprobe.c | 46 ++++++++++++++++++++++++++++++++++++------- > 1 file changed, 38 insertions(+), 8 deletions(-) > > diff --git a/kernel/trace/trace_fprobe.c b/kernel/trace/trace_fprobe.c > index 62e6a8f4aae9..1d8a983e1edc 100644 > --- a/kernel/trace/trace_fprobe.c > +++ b/kernel/trace/trace_fprobe.c > @@ -385,6 +385,7 @@ static struct trace_fprobe *alloc_trace_fprobe(const char *group, > const char *event, > const char *symbol, > struct tracepoint *tpoint, > + struct module *mod, > int maxactive, > int nargs, bool is_return) > { > @@ -405,6 +406,7 @@ static struct trace_fprobe *alloc_trace_fprobe(const char *group, > tf->fp.entry_handler = fentry_dispatcher; > > tf->tpoint = tpoint; > + tf->mod = mod; > tf->fp.nr_maxactive = maxactive; > > ret = trace_probe_init(&tf->tp, event, group, false, nargs); > @@ -895,8 +897,23 @@ static struct notifier_block tracepoint_module_nb = { > struct __find_tracepoint_cb_data { > const char *tp_name; > struct tracepoint *tpoint; > + struct module *mod; > }; > > +static void __find_tracepoint_module_cb(struct tracepoint *tp, void *priv) > +{ > + struct __find_tracepoint_cb_data *data = priv; > + > + if (!data->tpoint && !strcmp(data->tp_name, tp->name)) { > + data->tpoint = tp; > + data->mod = __module_text_address((unsigned long)tp->probestub); > + if (!try_module_get(data->mod)) { > + data->tpoint = NULL; > + data->mod = NULL; > + } > + } > +} > + > static void __find_tracepoint_cb(struct tracepoint *tp, void *priv) > { > struct __find_tracepoint_cb_data *data = priv; > @@ -905,14 +922,28 @@ static void __find_tracepoint_cb(struct tracepoint *tp, void *priv) > data->tpoint = tp; > } > > -static struct tracepoint *find_tracepoint(const char *tp_name) > +/* > + * Find a tracepoint from kernel and module. If the tracepoint is in a module, > + * this increments the module refcount to prevent unloading until the > + * trace_fprobe is registered to the list. After registering the trace_fprobe > + * on the trace_fprobe list, the module refcount is decremented because > + * tracepoint_probe_module_cb will handle it. > + */ > +static struct tracepoint *find_tracepoint(const char *tp_name, > + struct module **tp_mod) > { > struct __find_tracepoint_cb_data data = { > .tp_name = tp_name, > + .mod = NULL, > }; > > for_each_kernel_tracepoint(__find_tracepoint_cb, &data); > > + if (!data.tpoint && IS_ENABLED(CONFIG_MODULES)) { > + for_each_module_tracepoint(__find_tracepoint_module_cb, &data); > + *tp_mod = data.mod; > + } > + > return data.tpoint; > } > > @@ -996,6 +1027,7 @@ static int __trace_fprobe_create(int argc, const char *argv[]) > char abuf[MAX_BTF_ARGS_LEN]; > char *dbuf = NULL; > bool is_tracepoint = false; > + struct module *tp_mod = NULL; > struct tracepoint *tpoint = NULL; > struct traceprobe_parse_context ctx = { > .flags = TPARG_FL_KERNEL | TPARG_FL_FPROBE, > @@ -1080,7 +1112,7 @@ static int __trace_fprobe_create(int argc, const char *argv[]) > > if (is_tracepoint) { > ctx.flags |= TPARG_FL_TPOINT; > - tpoint = find_tracepoint(symbol); > + tpoint = find_tracepoint(symbol, &tp_mod); > if (!tpoint) { > trace_probe_log_set_index(1); > trace_probe_log_err(0, NO_TRACEPOINT); > @@ -1110,8 +1142,8 @@ static int __trace_fprobe_create(int argc, const char *argv[]) > goto out; > > /* setup a probe */ > - tf = alloc_trace_fprobe(group, event, symbol, tpoint, maxactive, > - argc, is_return); > + tf = alloc_trace_fprobe(group, event, symbol, tpoint, tp_mod, > + maxactive, argc, is_return); > if (IS_ERR(tf)) { > ret = PTR_ERR(tf); > /* This must return -ENOMEM, else there is a bug */ > @@ -1119,10 +1151,6 @@ static int __trace_fprobe_create(int argc, const char *argv[]) > goto out; /* We know tf is not allocated */ > } > > - if (is_tracepoint) > - tf->mod = __module_text_address( > - (unsigned long)tf->tpoint->probestub); > - > /* parse arguments */ > for (i = 0; i < argc && i < MAX_TRACE_ARGS; i++) { > trace_probe_log_set_index(i + 2); > @@ -1155,6 +1183,8 @@ static int __trace_fprobe_create(int argc, const char *argv[]) > } > > out: > + if (tp_mod) > + module_put(tp_mod); > traceprobe_finish_parse(&ctx); > trace_probe_log_clear(); > kfree(new_argv); >
On Mon, 3 Jun 2024 15:50:55 -0400 Mathieu Desnoyers <mathieu.desnoyers@efficios.com> wrote: > Hi Masami, > > Why prevent module unload when a fprobe tracepoint is attached to a > module ? This changes the kernel's behavior significantly just for the > sake of instrumentation. > > As an alternative, LTTng-modules attach/detach to/from modules with the > coming/going notifiers, so the instrumentation gets removed when a > module is unloaded rather than preventing its unload by holding a module > reference count. I would recommend a similar approach for fprobe. I think one major difference between fprobes and LTTng module attachment, is that fprobes is an internal mechanism used by other utilities (like BPF). You could have a program loaded that expects an fprobe to succeed, and may have undefined behavior if the fprobe suddenly disappears. That is, we don't know what is depending on that fprobe to simply disable it on module unload. -- Steve
On Mon, 3 Jun 2024 15:50:55 -0400 Mathieu Desnoyers <mathieu.desnoyers@efficios.com> wrote: > On 2024-06-01 04:22, Masami Hiramatsu (Google) wrote: > > From: Masami Hiramatsu (Google) <mhiramat@kernel.org> > > > > Support raw tracepoint event on module by fprobe events. > > Since it only uses for_each_kernel_tracepoint() to find a tracepoint, > > the tracepoints on modules are not handled. Thus if user specified a > > tracepoint on a module, it shows an error. > > This adds new for_each_module_tracepoint() API to tracepoint subsystem, > > and uses it to find tracepoints on modules. > > Hi Masami, > > Why prevent module unload when a fprobe tracepoint is attached to a > module ? This changes the kernel's behavior significantly just for the > sake of instrumentation. I don't prevent module unloading all the time, just before registering tracepoint handler (something like booking a ticket :-) ). See the last hunk of this patch, it puts the module before exiting __trace_fprobe_create(). > > As an alternative, LTTng-modules attach/detach to/from modules with the > coming/going notifiers, so the instrumentation gets removed when a > module is unloaded rather than preventing its unload by holding a module > reference count. I would recommend a similar approach for fprobe. Yes, since tracepoint subsystem provides a notifier API to notify the tracepoint is gone, fprobe already uses it to find unloading and unregister the target function. (see __tracepoint_probe_module_cb) Thank you! > > Thanks, > > Mathieu > > > > > > Reported-by: don <zds100@gmail.com> > > Closes: https://lore.kernel.org/all/20240530215718.aeec973a1d0bf058d39cb1e3@kernel.org/ > > Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org> > > --- > > Changes in v2: > > - Fix build errors with CONFIG_MODULES=y. > > --- > > kernel/trace/trace_fprobe.c | 46 ++++++++++++++++++++++++++++++++++++------- > > 1 file changed, 38 insertions(+), 8 deletions(-) > > > > diff --git a/kernel/trace/trace_fprobe.c b/kernel/trace/trace_fprobe.c > > index 62e6a8f4aae9..1d8a983e1edc 100644 > > --- a/kernel/trace/trace_fprobe.c > > +++ b/kernel/trace/trace_fprobe.c > > @@ -385,6 +385,7 @@ static struct trace_fprobe *alloc_trace_fprobe(const char *group, > > const char *event, > > const char *symbol, > > struct tracepoint *tpoint, > > + struct module *mod, > > int maxactive, > > int nargs, bool is_return) > > { > > @@ -405,6 +406,7 @@ static struct trace_fprobe *alloc_trace_fprobe(const char *group, > > tf->fp.entry_handler = fentry_dispatcher; > > > > tf->tpoint = tpoint; > > + tf->mod = mod; > > tf->fp.nr_maxactive = maxactive; > > > > ret = trace_probe_init(&tf->tp, event, group, false, nargs); > > @@ -895,8 +897,23 @@ static struct notifier_block tracepoint_module_nb = { > > struct __find_tracepoint_cb_data { > > const char *tp_name; > > struct tracepoint *tpoint; > > + struct module *mod; > > }; > > > > +static void __find_tracepoint_module_cb(struct tracepoint *tp, void *priv) > > +{ > > + struct __find_tracepoint_cb_data *data = priv; > > + > > + if (!data->tpoint && !strcmp(data->tp_name, tp->name)) { > > + data->tpoint = tp; > > + data->mod = __module_text_address((unsigned long)tp->probestub); > > + if (!try_module_get(data->mod)) { > > + data->tpoint = NULL; > > + data->mod = NULL; > > + } > > + } > > +} > > + > > static void __find_tracepoint_cb(struct tracepoint *tp, void *priv) > > { > > struct __find_tracepoint_cb_data *data = priv; > > @@ -905,14 +922,28 @@ static void __find_tracepoint_cb(struct tracepoint *tp, void *priv) > > data->tpoint = tp; > > } > > > > -static struct tracepoint *find_tracepoint(const char *tp_name) > > +/* > > + * Find a tracepoint from kernel and module. If the tracepoint is in a module, > > + * this increments the module refcount to prevent unloading until the > > + * trace_fprobe is registered to the list. After registering the trace_fprobe > > + * on the trace_fprobe list, the module refcount is decremented because > > + * tracepoint_probe_module_cb will handle it. > > + */ > > +static struct tracepoint *find_tracepoint(const char *tp_name, > > + struct module **tp_mod) > > { > > struct __find_tracepoint_cb_data data = { > > .tp_name = tp_name, > > + .mod = NULL, > > }; > > > > for_each_kernel_tracepoint(__find_tracepoint_cb, &data); > > > > + if (!data.tpoint && IS_ENABLED(CONFIG_MODULES)) { > > + for_each_module_tracepoint(__find_tracepoint_module_cb, &data); > > + *tp_mod = data.mod; > > + } > > + > > return data.tpoint; > > } > > > > @@ -996,6 +1027,7 @@ static int __trace_fprobe_create(int argc, const char *argv[]) > > char abuf[MAX_BTF_ARGS_LEN]; > > char *dbuf = NULL; > > bool is_tracepoint = false; > > + struct module *tp_mod = NULL; > > struct tracepoint *tpoint = NULL; > > struct traceprobe_parse_context ctx = { > > .flags = TPARG_FL_KERNEL | TPARG_FL_FPROBE, > > @@ -1080,7 +1112,7 @@ static int __trace_fprobe_create(int argc, const char *argv[]) > > > > if (is_tracepoint) { > > ctx.flags |= TPARG_FL_TPOINT; > > - tpoint = find_tracepoint(symbol); > > + tpoint = find_tracepoint(symbol, &tp_mod); > > if (!tpoint) { > > trace_probe_log_set_index(1); > > trace_probe_log_err(0, NO_TRACEPOINT); > > @@ -1110,8 +1142,8 @@ static int __trace_fprobe_create(int argc, const char *argv[]) > > goto out; > > > > /* setup a probe */ > > - tf = alloc_trace_fprobe(group, event, symbol, tpoint, maxactive, > > - argc, is_return); > > + tf = alloc_trace_fprobe(group, event, symbol, tpoint, tp_mod, > > + maxactive, argc, is_return); > > if (IS_ERR(tf)) { > > ret = PTR_ERR(tf); > > /* This must return -ENOMEM, else there is a bug */ > > @@ -1119,10 +1151,6 @@ static int __trace_fprobe_create(int argc, const char *argv[]) > > goto out; /* We know tf is not allocated */ > > } > > > > - if (is_tracepoint) > > - tf->mod = __module_text_address( > > - (unsigned long)tf->tpoint->probestub); > > - > > /* parse arguments */ > > for (i = 0; i < argc && i < MAX_TRACE_ARGS; i++) { > > trace_probe_log_set_index(i + 2); > > @@ -1155,6 +1183,8 @@ static int __trace_fprobe_create(int argc, const char *argv[]) > > } > > > > out: > > + if (tp_mod) > > + module_put(tp_mod); > > traceprobe_finish_parse(&ctx); > > trace_probe_log_clear(); > > kfree(new_argv); > > > > -- > Mathieu Desnoyers > EfficiOS Inc. > https://www.efficios.com >
On 2024-06-03 19:49, Masami Hiramatsu (Google) wrote: > On Mon, 3 Jun 2024 15:50:55 -0400 > Mathieu Desnoyers <mathieu.desnoyers@efficios.com> wrote: > >> On 2024-06-01 04:22, Masami Hiramatsu (Google) wrote: >>> From: Masami Hiramatsu (Google) <mhiramat@kernel.org> >>> >>> Support raw tracepoint event on module by fprobe events. >>> Since it only uses for_each_kernel_tracepoint() to find a tracepoint, >>> the tracepoints on modules are not handled. Thus if user specified a >>> tracepoint on a module, it shows an error. >>> This adds new for_each_module_tracepoint() API to tracepoint subsystem, >>> and uses it to find tracepoints on modules. >> >> Hi Masami, >> >> Why prevent module unload when a fprobe tracepoint is attached to a >> module ? This changes the kernel's behavior significantly just for the >> sake of instrumentation. > > I don't prevent module unloading all the time, just before registering > tracepoint handler (something like booking a ticket :-) ). > See the last hunk of this patch, it puts the module before exiting > __trace_fprobe_create(). So at least this is transient, but I still have concerns, see below. >> >> As an alternative, LTTng-modules attach/detach to/from modules with the >> coming/going notifiers, so the instrumentation gets removed when a >> module is unloaded rather than preventing its unload by holding a module >> reference count. I would recommend a similar approach for fprobe. > > Yes, since tracepoint subsystem provides a notifier API to notify the > tracepoint is gone, fprobe already uses it to find unloading and > unregister the target function. (see __tracepoint_probe_module_cb) I see. It looks like there are a few things we could improve there: 1) With your approach, modules need to be already loaded before attaching an fprobe event to them. This effectively prevents attaching to any module init code. Is there any way we could allow this by implementing a module coming notifier in fprobe as well ? This would require that fprobes are kept around in a data structure that matches the modules when they are loaded in the coming notifier. 2) Given that the fprobe module going notifier is protected by the event_mutex, can we use locking rather than reference counting in fprobe attach to guarantee the target module is not reclaimed concurrently ? This would remove the transient side-effect of holding a module reference count which temporarily prevents module unload. Thanks, Mathieu
On Tue, 4 Jun 2024 08:49:55 +0900 Masami Hiramatsu (Google) <mhiramat@kernel.org> wrote: > On Mon, 3 Jun 2024 15:50:55 -0400 > Mathieu Desnoyers <mathieu.desnoyers@efficios.com> wrote: > > > On 2024-06-01 04:22, Masami Hiramatsu (Google) wrote: > > > From: Masami Hiramatsu (Google) <mhiramat@kernel.org> > > > > > > Support raw tracepoint event on module by fprobe events. > > > Since it only uses for_each_kernel_tracepoint() to find a tracepoint, > > > the tracepoints on modules are not handled. Thus if user specified a > > > tracepoint on a module, it shows an error. > > > This adds new for_each_module_tracepoint() API to tracepoint subsystem, > > > and uses it to find tracepoints on modules. > > > > Hi Masami, > > > > Why prevent module unload when a fprobe tracepoint is attached to a > > module ? This changes the kernel's behavior significantly just for the > > sake of instrumentation. > > I don't prevent module unloading all the time, just before registering > tracepoint handler (something like booking a ticket :-) ). > See the last hunk of this patch, it puts the module before exiting > __trace_fprobe_create(). > > > > > As an alternative, LTTng-modules attach/detach to/from modules with the > > coming/going notifiers, so the instrumentation gets removed when a > > module is unloaded rather than preventing its unload by holding a module > > reference count. I would recommend a similar approach for fprobe. > > Yes, since tracepoint subsystem provides a notifier API to notify the > tracepoint is gone, fprobe already uses it to find unloading and > unregister the target function. (see __tracepoint_probe_module_cb) > Ah, it only prevents module unloading in __trace_fprobe_create() > +static void __find_tracepoint_module_cb(struct tracepoint *tp, void *priv) > +{ > + struct __find_tracepoint_cb_data *data = priv; > + > + if (!data->tpoint && !strcmp(data->tp_name, tp->name)) { > + data->tpoint = tp; > + data->mod = __module_text_address((unsigned long)tp->probestub); > + if (!try_module_get(data->mod)) { Here it gets the module. Should only happen once, as it sets data->tpoint. > + data->tpoint = NULL; > + data->mod = NULL; > + } > + } > +} > + > static void __find_tracepoint_cb(struct tracepoint *tp, void *priv) > { > struct __find_tracepoint_cb_data *data = priv; > @@ -905,14 +922,28 @@ static void __find_tracepoint_cb(struct tracepoint *tp, void *priv) > data->tpoint = tp; > } > > -static struct tracepoint *find_tracepoint(const char *tp_name) > +/* > + * Find a tracepoint from kernel and module. If the tracepoint is in a module, > + * this increments the module refcount to prevent unloading until the > + * trace_fprobe is registered to the list. After registering the trace_fprobe > + * on the trace_fprobe list, the module refcount is decremented because > + * tracepoint_probe_module_cb will handle it. > + */ > +static struct tracepoint *find_tracepoint(const char *tp_name, > + struct module **tp_mod) > { > struct __find_tracepoint_cb_data data = { > .tp_name = tp_name, > + .mod = NULL, > }; > > for_each_kernel_tracepoint(__find_tracepoint_cb, &data); > > + if (!data.tpoint && IS_ENABLED(CONFIG_MODULES)) { > + for_each_module_tracepoint(__find_tracepoint_module_cb, &data); > + *tp_mod = data.mod; > + } > + > return data.tpoint; > } > > @@ -996,6 +1027,7 @@ static int __trace_fprobe_create(int argc, const char *argv[]) > char abuf[MAX_BTF_ARGS_LEN]; > char *dbuf = NULL; > bool is_tracepoint = false; > + struct module *tp_mod = NULL; > struct tracepoint *tpoint = NULL; > struct traceprobe_parse_context ctx = { > .flags = TPARG_FL_KERNEL | TPARG_FL_FPROBE, > @@ -1080,7 +1112,7 @@ static int __trace_fprobe_create(int argc, const char *argv[]) > > if (is_tracepoint) { > ctx.flags |= TPARG_FL_TPOINT; > - tpoint = find_tracepoint(symbol); > + tpoint = find_tracepoint(symbol, &tp_mod); > if (!tpoint) { > trace_probe_log_set_index(1); > trace_probe_log_err(0, NO_TRACEPOINT); > @@ -1110,8 +1142,8 @@ static int __trace_fprobe_create(int argc, const char *argv[]) > goto out; > > /* setup a probe */ > - tf = alloc_trace_fprobe(group, event, symbol, tpoint, maxactive, > - argc, is_return); > + tf = alloc_trace_fprobe(group, event, symbol, tpoint, tp_mod, > + maxactive, argc, is_return); > if (IS_ERR(tf)) { > ret = PTR_ERR(tf); > /* This must return -ENOMEM, else there is a bug */ > @@ -1119,10 +1151,6 @@ static int __trace_fprobe_create(int argc, const char *argv[]) > goto out; /* We know tf is not allocated */ > } > > - if (is_tracepoint) > - tf->mod = __module_text_address( > - (unsigned long)tf->tpoint->probestub); > - > /* parse arguments */ > for (i = 0; i < argc && i < MAX_TRACE_ARGS; i++) { > trace_probe_log_set_index(i + 2); > @@ -1155,6 +1183,8 @@ static int __trace_fprobe_create(int argc, const char *argv[]) > } > > out: > + if (tp_mod) > + module_put(tp_mod); And on the way out, it puts it. -- Steve > traceprobe_finish_parse(&ctx); > trace_probe_log_clear(); > kfree(new_argv);
On Tue, 4 Jun 2024 11:02:16 -0400 Mathieu Desnoyers <mathieu.desnoyers@efficios.com> wrote: > I see. > > It looks like there are a few things we could improve there: > > 1) With your approach, modules need to be already loaded before > attaching an fprobe event to them. This effectively prevents > attaching to any module init code. Is there any way we could allow > this by implementing a module coming notifier in fprobe as well ? > This would require that fprobes are kept around in a data structure > that matches the modules when they are loaded in the coming notifier. The above sounds like a nice enhancement, but not something necessary for this series. > > 2) Given that the fprobe module going notifier is protected by the > event_mutex, can we use locking rather than reference counting > in fprobe attach to guarantee the target module is not reclaimed > concurrently ? This would remove the transient side-effect of > holding a module reference count which temporarily prevents module > unload. Why do we care about unloading modules during the transition? Note, module unload has always been considered a second class citizen, and there's been talks in the past to even rip it out. -- Steve
On 2024-06-04 12:34, Steven Rostedt wrote: > On Tue, 4 Jun 2024 11:02:16 -0400 > Mathieu Desnoyers <mathieu.desnoyers@efficios.com> wrote: > >> I see. >> >> It looks like there are a few things we could improve there: >> >> 1) With your approach, modules need to be already loaded before >> attaching an fprobe event to them. This effectively prevents >> attaching to any module init code. Is there any way we could allow >> this by implementing a module coming notifier in fprobe as well ? >> This would require that fprobes are kept around in a data structure >> that matches the modules when they are loaded in the coming notifier. > > The above sounds like a nice enhancement, but not something necessary for > this series. IMHO it is nevertheless relevant to discuss the impact of supporting this kind of use-case on the ABI presented to userspace, at least to validate that what is exposed today can incrementally be enhanced towards that goal. I'm not saying that it needs to be implemented today, but we should at least give it some thoughts right now to make sure the ABI is a good fit. >> >> 2) Given that the fprobe module going notifier is protected by the >> event_mutex, can we use locking rather than reference counting >> in fprobe attach to guarantee the target module is not reclaimed >> concurrently ? This would remove the transient side-effect of >> holding a module reference count which temporarily prevents module >> unload. > > Why do we care about unloading modules during the transition? Note, module > unload has always been considered a second class citizen, and there's been > talks in the past to even rip it out. As a general rule I try to ensure tracing has as little impact on the system behavior so issues that occur without tracing can be reproduced with instrumentation. On systems where modules are loaded/unloaded with udev, holding references on modules can spuriously prevent module unload, which as a consequence changes the system behavior. About the relative importance of the various kernel subsystems, following your reasoning that module unload is considered a second-class citizen within the kernel, I would argue that tracing is a third-class citizen and should not needlessly modify the behavior of classes above it. Thanks, Mathieu
Hi, Sorry I missed this thread. Thanks for your comments. On Tue, 4 Jun 2024 14:03:05 -0400 Mathieu Desnoyers <mathieu.desnoyers@efficios.com> wrote: > On 2024-06-04 12:34, Steven Rostedt wrote: > > On Tue, 4 Jun 2024 11:02:16 -0400 > > Mathieu Desnoyers <mathieu.desnoyers@efficios.com> wrote: > > > >> I see. > >> > >> It looks like there are a few things we could improve there: > >> > >> 1) With your approach, modules need to be already loaded before > >> attaching an fprobe event to them. This effectively prevents > >> attaching to any module init code. Is there any way we could allow > >> this by implementing a module coming notifier in fprobe as well ? > >> This would require that fprobes are kept around in a data structure > >> that matches the modules when they are loaded in the coming notifier. > > > > The above sounds like a nice enhancement, but not something necessary for > > this series. > > IMHO it is nevertheless relevant to discuss the impact of supporting > this kind of use-case on the ABI presented to userspace, at least to > validate that what is exposed today can incrementally be enhanced > towards that goal. > > I'm not saying that it needs to be implemented today, but we should > at least give it some thoughts right now to make sure the ABI is a > good fit. OK, let me try to update to handle module loading. I also need to add a module which has a test tracepoint in init function. > > >> > >> 2) Given that the fprobe module going notifier is protected by the > >> event_mutex, can we use locking rather than reference counting > >> in fprobe attach to guarantee the target module is not reclaimed > >> concurrently ? This would remove the transient side-effect of > >> holding a module reference count which temporarily prevents module > >> unload. See trace_kprobe_module_callback()@kernel/trace/trace_kprobe.c. I think we can filter the MODULE_STATE_COMING flag before locking event_mutex. We anyway don't check the module is going because it would be a waste to disarm the raw tracepoint events from the going module. Thank you, > > > > Why do we care about unloading modules during the transition? Note, module > > unload has always been considered a second class citizen, and there's been > > talks in the past to even rip it out. > > As a general rule I try to ensure tracing has as little impact on the > system behavior so issues that occur without tracing can be reproduced > with instrumentation. > > On systems where modules are loaded/unloaded with udev, holding > references on modules can spuriously prevent module unload, which > as a consequence changes the system behavior. > > About the relative importance of the various kernel subsystems, > following your reasoning that module unload is considered a > second-class citizen within the kernel, I would argue that tracing > is a third-class citizen and should not needlessly modify the > behavior of classes above it. > > Thanks, > > Mathieu > > -- > Mathieu Desnoyers > EfficiOS Inc. > https://www.efficios.com >
diff --git a/kernel/trace/trace_fprobe.c b/kernel/trace/trace_fprobe.c index 62e6a8f4aae9..1d8a983e1edc 100644 --- a/kernel/trace/trace_fprobe.c +++ b/kernel/trace/trace_fprobe.c @@ -385,6 +385,7 @@ static struct trace_fprobe *alloc_trace_fprobe(const char *group, const char *event, const char *symbol, struct tracepoint *tpoint, + struct module *mod, int maxactive, int nargs, bool is_return) { @@ -405,6 +406,7 @@ static struct trace_fprobe *alloc_trace_fprobe(const char *group, tf->fp.entry_handler = fentry_dispatcher; tf->tpoint = tpoint; + tf->mod = mod; tf->fp.nr_maxactive = maxactive; ret = trace_probe_init(&tf->tp, event, group, false, nargs); @@ -895,8 +897,23 @@ static struct notifier_block tracepoint_module_nb = { struct __find_tracepoint_cb_data { const char *tp_name; struct tracepoint *tpoint; + struct module *mod; }; +static void __find_tracepoint_module_cb(struct tracepoint *tp, void *priv) +{ + struct __find_tracepoint_cb_data *data = priv; + + if (!data->tpoint && !strcmp(data->tp_name, tp->name)) { + data->tpoint = tp; + data->mod = __module_text_address((unsigned long)tp->probestub); + if (!try_module_get(data->mod)) { + data->tpoint = NULL; + data->mod = NULL; + } + } +} + static void __find_tracepoint_cb(struct tracepoint *tp, void *priv) { struct __find_tracepoint_cb_data *data = priv; @@ -905,14 +922,28 @@ static void __find_tracepoint_cb(struct tracepoint *tp, void *priv) data->tpoint = tp; } -static struct tracepoint *find_tracepoint(const char *tp_name) +/* + * Find a tracepoint from kernel and module. If the tracepoint is in a module, + * this increments the module refcount to prevent unloading until the + * trace_fprobe is registered to the list. After registering the trace_fprobe + * on the trace_fprobe list, the module refcount is decremented because + * tracepoint_probe_module_cb will handle it. + */ +static struct tracepoint *find_tracepoint(const char *tp_name, + struct module **tp_mod) { struct __find_tracepoint_cb_data data = { .tp_name = tp_name, + .mod = NULL, }; for_each_kernel_tracepoint(__find_tracepoint_cb, &data); + if (!data.tpoint && IS_ENABLED(CONFIG_MODULES)) { + for_each_module_tracepoint(__find_tracepoint_module_cb, &data); + *tp_mod = data.mod; + } + return data.tpoint; } @@ -996,6 +1027,7 @@ static int __trace_fprobe_create(int argc, const char *argv[]) char abuf[MAX_BTF_ARGS_LEN]; char *dbuf = NULL; bool is_tracepoint = false; + struct module *tp_mod = NULL; struct tracepoint *tpoint = NULL; struct traceprobe_parse_context ctx = { .flags = TPARG_FL_KERNEL | TPARG_FL_FPROBE, @@ -1080,7 +1112,7 @@ static int __trace_fprobe_create(int argc, const char *argv[]) if (is_tracepoint) { ctx.flags |= TPARG_FL_TPOINT; - tpoint = find_tracepoint(symbol); + tpoint = find_tracepoint(symbol, &tp_mod); if (!tpoint) { trace_probe_log_set_index(1); trace_probe_log_err(0, NO_TRACEPOINT); @@ -1110,8 +1142,8 @@ static int __trace_fprobe_create(int argc, const char *argv[]) goto out; /* setup a probe */ - tf = alloc_trace_fprobe(group, event, symbol, tpoint, maxactive, - argc, is_return); + tf = alloc_trace_fprobe(group, event, symbol, tpoint, tp_mod, + maxactive, argc, is_return); if (IS_ERR(tf)) { ret = PTR_ERR(tf); /* This must return -ENOMEM, else there is a bug */ @@ -1119,10 +1151,6 @@ static int __trace_fprobe_create(int argc, const char *argv[]) goto out; /* We know tf is not allocated */ } - if (is_tracepoint) - tf->mod = __module_text_address( - (unsigned long)tf->tpoint->probestub); - /* parse arguments */ for (i = 0; i < argc && i < MAX_TRACE_ARGS; i++) { trace_probe_log_set_index(i + 2); @@ -1155,6 +1183,8 @@ static int __trace_fprobe_create(int argc, const char *argv[]) } out: + if (tp_mod) + module_put(tp_mod); traceprobe_finish_parse(&ctx); trace_probe_log_clear(); kfree(new_argv);