Message ID | 20191016113316.13415-2-mbenes@suse.cz (mailing list archive) |
---|---|
State | Mainlined |
Commit | 7162431dcf72032835d369c8d7b51311df407938 |
Headers | show |
Series | ftrace: Introduce PERMANENT ftrace_ops flag | expand |
On Wed, 16 Oct 2019 13:33:13 +0200 Miroslav Benes <mbenes@suse.cz> wrote: > Livepatch uses ftrace for redirection to new patched functions. It means > that if ftrace is disabled, all live patched functions are disabled as > well. Toggling global 'ftrace_enabled' sysctl thus affect it directly. > It is not a problem per se, because only administrator can set sysctl > values, but it still may be surprising. > > Introduce PERMANENT ftrace_ops flag to amend this. If the > FTRACE_OPS_FL_PERMANENT is set on any ftrace ops, the tracing cannot be > disabled by disabling ftrace_enabled. Equally, a callback with the flag > set cannot be registered if ftrace_enabled is disabled. > > Signed-off-by: Miroslav Benes <mbenes@suse.cz> > Reviewed-by: Petr Mladek <pmladek@suse.com> > Reviewed-by: Kamalesh Babulal <kamalesh@linux.vnet.ibm.com> > --- > I pulled in this patch as is, but then I realized we have a race. This race has been there before this patch series, and actually, been there since the dawn of ftrace. I realized that the user can modify ftrace_enabled out of lock context. That is, the ftrace_enabled is modified directly from the sysctl code, without taking the ftrace_lock mutex. Which means if the user was starting and stopping function tracing while playing with the ftrace_enabled switch, it could potentially cause an accounting failure. I'm going to apply this patch on top of yours. It reverses the role of how ftrace_enabled is set in the sysctl handler. Instead of having it directly modify ftrace_enabled, I have it modify a new variable called sysctl_ftrace_enabled. I no longer need the last_ftrace_enabled. This way we only need to set or disable ftrace_enabled on a change and if all conditions are met. Thoughts? -- Steve diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 8385cafe4f9f..aa2e2c7cef9e 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -79,6 +79,7 @@ static inline int ftrace_mod_get_kallsym(unsigned int symnum, unsigned long *val #ifdef CONFIG_FUNCTION_TRACER extern int ftrace_enabled; +extern int sysctl_ftrace_enabled; extern int ftrace_enable_sysctl(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, @@ -638,6 +639,7 @@ static inline void tracer_disable(void) { #ifdef CONFIG_FUNCTION_TRACER ftrace_enabled = 0; + sysctl_ftrace_enabled = 0; #endif } @@ -651,6 +653,7 @@ static inline int __ftrace_enabled_save(void) #ifdef CONFIG_FUNCTION_TRACER int saved_ftrace_enabled = ftrace_enabled; ftrace_enabled = 0; + sysctl_ftrace_enabled = 0; return saved_ftrace_enabled; #else return 0; @@ -661,6 +664,7 @@ static inline void __ftrace_enabled_restore(int enabled) { #ifdef CONFIG_FUNCTION_TRACER ftrace_enabled = enabled; + sysctl_ftrace_enabled = enabled; #endif } diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 00fcea236eba..773fdfc6c645 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -648,7 +648,7 @@ static struct ctl_table kern_table[] = { #ifdef CONFIG_FUNCTION_TRACER { .procname = "ftrace_enabled", - .data = &ftrace_enabled, + .data = &sysctl_ftrace_enabled, .maxlen = sizeof(int), .mode = 0644, .proc_handler = ftrace_enable_sysctl, diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index dacb8b50a263..b55c9a4e2b5b 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -88,7 +88,7 @@ struct ftrace_ops ftrace_list_end __read_mostly = { /* ftrace_enabled is a method to turn ftrace on or off */ int ftrace_enabled __read_mostly; -static int last_ftrace_enabled; +int sysctl_ftrace_enabled __read_mostly; /* Current function tracing op */ struct ftrace_ops *function_trace_op __read_mostly = &ftrace_list_end; @@ -6221,7 +6221,7 @@ void __init ftrace_init(void) pr_info("ftrace: allocating %ld entries in %ld pages\n", count, count / ENTRIES_PER_PAGE + 1); - last_ftrace_enabled = ftrace_enabled = 1; + sysctl_ftrace_enabled = ftrace_enabled = 1; ret = ftrace_process_locs(NULL, __start_mcount_loc, @@ -6265,6 +6265,7 @@ struct ftrace_ops global_ops = { static int __init ftrace_nodyn_init(void) { ftrace_enabled = 1; + sysctl_ftrace_enabled = 1; return 0; } core_initcall(ftrace_nodyn_init); @@ -6714,6 +6715,7 @@ void ftrace_kill(void) { ftrace_disabled = 1; ftrace_enabled = 0; + sysctl_ftrace_enabled = 0; ftrace_trace_function = ftrace_stub; } @@ -6796,10 +6798,12 @@ ftrace_enable_sysctl(struct ctl_table *table, int write, ret = proc_dointvec(table, write, buffer, lenp, ppos); - if (ret || !write || (last_ftrace_enabled == !!ftrace_enabled)) + if (ret || !write || (ftrace_enabled == !!sysctl_ftrace_enabled)) goto out; - if (ftrace_enabled) { + if (sysctl_ftrace_enabled) { + + ftrace_enabled = true; /* we are starting ftrace again */ if (rcu_dereference_protected(ftrace_ops_list, @@ -6810,19 +6814,21 @@ ftrace_enable_sysctl(struct ctl_table *table, int write, } else { if (is_permanent_ops_registered()) { - ftrace_enabled = true; ret = -EBUSY; goto out; } + ftrace_enabled = false; + /* stopping ftrace calls (just send to ftrace_stub) */ ftrace_trace_function = ftrace_stub; ftrace_shutdown_sysctl(); } - last_ftrace_enabled = !!ftrace_enabled; out: + sysctl_ftrace_enabled = ftrace_enabled; + mutex_unlock(&ftrace_lock); return ret; }
On Wed, 16 Oct 2019 09:48:53 -0400 Steven Rostedt <rostedt@goodmis.org> wrote: > @@ -6796,10 +6798,12 @@ ftrace_enable_sysctl(struct ctl_table *table, int write, > > ret = proc_dointvec(table, write, buffer, lenp, ppos); As you just stated on IRC, the update to ftrace_enabled gets updated in the above routine. I forgot about this :-/ (Senior moment) I guess there's nothing to worry about here. -- Steve > > - if (ret || !write || (last_ftrace_enabled == !!ftrace_enabled)) > + if (ret || !write || (ftrace_enabled == !!sysctl_ftrace_enabled)) > goto out; >
diff --git a/Documentation/trace/ftrace-uses.rst b/Documentation/trace/ftrace-uses.rst index 1fbc69894eed..740bd0224d35 100644 --- a/Documentation/trace/ftrace-uses.rst +++ b/Documentation/trace/ftrace-uses.rst @@ -170,6 +170,14 @@ FTRACE_OPS_FL_RCU a callback may be executed and RCU synchronization will not protect it. +FTRACE_OPS_FL_PERMANENT + If this is set on any ftrace ops, then the tracing cannot disabled by + writing 0 to the proc sysctl ftrace_enabled. Equally, a callback with + the flag set cannot be registered if ftrace_enabled is 0. + + Livepatch uses it not to lose the function redirection, so the system + stays protected. + Filtering which functions to trace ================================== diff --git a/Documentation/trace/ftrace.rst b/Documentation/trace/ftrace.rst index e3060eedb22d..d2b5657ed33e 100644 --- a/Documentation/trace/ftrace.rst +++ b/Documentation/trace/ftrace.rst @@ -2976,7 +2976,9 @@ Note, the proc sysctl ftrace_enable is a big on/off switch for the function tracer. By default it is enabled (when function tracing is enabled in the kernel). If it is disabled, all function tracing is disabled. This includes not only the function tracers for ftrace, but -also for any other uses (perf, kprobes, stack tracing, profiling, etc). +also for any other uses (perf, kprobes, stack tracing, profiling, etc). It +cannot be disabled if there is a callback with FTRACE_OPS_FL_PERMANENT set +registered. Please disable this with care. diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 8a8cb3c401b2..8385cafe4f9f 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -142,6 +142,8 @@ ftrace_func_t ftrace_ops_get_func(struct ftrace_ops *ops); * PID - Is affected by set_ftrace_pid (allows filtering on those pids) * RCU - Set when the ops can only be called when RCU is watching. * TRACE_ARRAY - The ops->private points to a trace_array descriptor. + * PERMANENT - Set when the ops is permanent and should not be affected by + * ftrace_enabled. */ enum { FTRACE_OPS_FL_ENABLED = 1 << 0, @@ -160,6 +162,7 @@ enum { FTRACE_OPS_FL_PID = 1 << 13, FTRACE_OPS_FL_RCU = 1 << 14, FTRACE_OPS_FL_TRACE_ARRAY = 1 << 15, + FTRACE_OPS_FL_PERMANENT = 1 << 16, }; #ifdef CONFIG_DYNAMIC_FTRACE diff --git a/kernel/livepatch/patch.c b/kernel/livepatch/patch.c index bd43537702bd..b552cf2d85f8 100644 --- a/kernel/livepatch/patch.c +++ b/kernel/livepatch/patch.c @@ -196,7 +196,8 @@ static int klp_patch_func(struct klp_func *func) ops->fops.func = klp_ftrace_handler; ops->fops.flags = FTRACE_OPS_FL_SAVE_REGS | FTRACE_OPS_FL_DYNAMIC | - FTRACE_OPS_FL_IPMODIFY; + FTRACE_OPS_FL_IPMODIFY | + FTRACE_OPS_FL_PERMANENT; list_add(&ops->node, &klp_ops); diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 62a50bf399d6..5c8ad14f313a 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -325,6 +325,8 @@ int __register_ftrace_function(struct ftrace_ops *ops) if (ops->flags & FTRACE_OPS_FL_SAVE_REGS_IF_SUPPORTED) ops->flags |= FTRACE_OPS_FL_SAVE_REGS; #endif + if (!ftrace_enabled && (ops->flags & FTRACE_OPS_FL_PERMANENT)) + return -EBUSY; if (!core_kernel_data((unsigned long)ops)) ops->flags |= FTRACE_OPS_FL_DYNAMIC; @@ -6723,6 +6725,18 @@ int unregister_ftrace_function(struct ftrace_ops *ops) } EXPORT_SYMBOL_GPL(unregister_ftrace_function); +static bool is_permanent_ops_registered(void) +{ + struct ftrace_ops *op; + + do_for_each_ftrace_op(op, ftrace_ops_list) { + if (op->flags & FTRACE_OPS_FL_PERMANENT) + return true; + } while_for_each_ftrace_op(op); + + return false; +} + int ftrace_enable_sysctl(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, @@ -6740,8 +6754,6 @@ ftrace_enable_sysctl(struct ctl_table *table, int write, if (ret || !write || (last_ftrace_enabled == !!ftrace_enabled)) goto out; - last_ftrace_enabled = !!ftrace_enabled; - if (ftrace_enabled) { /* we are starting ftrace again */ @@ -6752,12 +6764,19 @@ ftrace_enable_sysctl(struct ctl_table *table, int write, ftrace_startup_sysctl(); } else { + if (is_permanent_ops_registered()) { + ftrace_enabled = true; + ret = -EBUSY; + goto out; + } + /* stopping ftrace calls (just send to ftrace_stub) */ ftrace_trace_function = ftrace_stub; ftrace_shutdown_sysctl(); } + last_ftrace_enabled = !!ftrace_enabled; out: mutex_unlock(&ftrace_lock); return ret;