Message ID | 1513365176-6744-4-git-send-email-josef@toxicpanda.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On 12/15/17 11:12 AM, Josef Bacik wrote: > +#ifdef CONFIG_BPF_KPROBE_OVERRIDE > +BPF_CALL_2(bpf_override_return, struct pt_regs *, regs, unsigned long, rc) > +{ > + __this_cpu_write(bpf_kprobe_override, 1); > + regs_set_return_value(regs, rc); > + arch_ftrace_kprobe_override_function(regs); > + return 0; > +} since you're doing a respin can you adopt the change I did to make this helper fail at load time if that #config is not set instead of runtime? Also how big is the v9-v10 change ? May be do it as separate patch, since previous set already sitting in bpf-next and there are patches on top? -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On 12/15/2017 09:34 PM, Alexei Starovoitov wrote: [...] > Also how big is the v9-v10 change ? > May be do it as separate patch, since previous set already sitting > in bpf-next and there are patches on top? +1 -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Fri, 15 Dec 2017 14:12:54 -0500 Josef Bacik <josef@toxicpanda.com> wrote: > From: Josef Bacik <jbacik@fb.com> > > Error injection is sloppy and very ad-hoc. BPF could fill this niche > perfectly with it's kprobe functionality. We could make sure errors are > only triggered in specific call chains that we care about with very > specific situations. Accomplish this with the bpf_override_funciton > helper. This will modify the probe'd callers return value to the > specified value and set the PC to an override function that simply > returns, bypassing the originally probed function. This gives us a nice > clean way to implement systematic error injection for all of our code > paths. OK, got it. I think the error_injectable function list should be defined in kernel/trace/bpf_trace.c because only bpf calls it and needs to care the "safeness". [...] > diff --git a/arch/x86/kernel/kprobes/ftrace.c b/arch/x86/kernel/kprobes/ftrace.c > index 8dc0161cec8f..1ea748d682fd 100644 > --- a/arch/x86/kernel/kprobes/ftrace.c > +++ b/arch/x86/kernel/kprobes/ftrace.c > @@ -97,3 +97,17 @@ int arch_prepare_kprobe_ftrace(struct kprobe *p) > p->ainsn.boostable = false; > return 0; > } > + > +asmlinkage void override_func(void); > +asm( > + ".type override_func, @function\n" > + "override_func:\n" > + " ret\n" > + ".size override_func, .-override_func\n" > +); > + > +void arch_ftrace_kprobe_override_function(struct pt_regs *regs) > +{ > + regs->ip = (unsigned long)&override_func; > +} > +NOKPROBE_SYMBOL(arch_ftrace_kprobe_override_function); Calling this as "override_function" is meaningless. This is a function which just return. So I think combination of just_return_func() and arch_bpf_override_func_just_return() will be better. Moreover, this arch/x86/kernel/kprobes/ftrace.c is an archtecture dependent implementation of kprobes, not bpf. Hmm, arch/x86/net/bpf_jit_comp.c will be better place? (why don't we have arch/x86/kernel/bpf.c?) [..] > diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c > index 492700c5fb4d..91f4b57dab82 100644 > --- a/kernel/trace/trace_kprobe.c > +++ b/kernel/trace/trace_kprobe.c > @@ -42,6 +42,7 @@ struct trace_kprobe { > (offsetof(struct trace_kprobe, tp.args) + \ > (sizeof(struct probe_arg) * (n))) > > +DEFINE_PER_CPU(int, bpf_kprobe_override); > > static nokprobe_inline bool trace_kprobe_is_return(struct trace_kprobe *tk) > { > @@ -87,6 +88,27 @@ static nokprobe_inline unsigned long trace_kprobe_nhit(struct trace_kprobe *tk) > return nhit; > } > > +int trace_kprobe_ftrace(struct trace_event_call *call) > +{ > + struct trace_kprobe *tk = (struct trace_kprobe *)call->data; > + return kprobe_ftrace(&tk->rp.kp); > +} > + > +int trace_kprobe_error_injectable(struct trace_event_call *call) > +{ > + struct trace_kprobe *tk = (struct trace_kprobe *)call->data; > + unsigned long addr; > + > + if (tk->symbol) { > + addr = (unsigned long) > + kallsyms_lookup_name(trace_kprobe_symbol(tk)); > + addr += tk->rp.kp.offset; If the tk is already registered, you don't need to get address, you can use kp.addr. Anyway, kprobe_ftrace() also requires the kprobe already registered. > + } else { > + addr = (unsigned long)tk->rp.kp.addr; > + } > + return within_kprobe_error_injection_list(addr); > +} > + > static int register_kprobe_event(struct trace_kprobe *tk); > static int unregister_kprobe_event(struct trace_kprobe *tk); > > @@ -1170,7 +1192,7 @@ static int kretprobe_event_define_fields(struct trace_event_call *event_call) > #ifdef CONFIG_PERF_EVENTS > > /* Kprobe profile handler */ > -static void > +static int > kprobe_perf_func(struct trace_kprobe *tk, struct pt_regs *regs) > { > struct trace_event_call *call = &tk->tp.call; > @@ -1179,12 +1201,29 @@ kprobe_perf_func(struct trace_kprobe *tk, struct pt_regs *regs) > int size, __size, dsize; > int rctx; > > - if (bpf_prog_array_valid(call) && !trace_call_bpf(call, regs)) > - return; > + if (bpf_prog_array_valid(call)) { > + int ret; > + > + ret = trace_call_bpf(call, regs); > + > + /* > + * We need to check and see if we modified the pc of the > + * pt_regs, and if so clear the kprobe and return 1 so that we > + * don't do the instruction skipping. Also reset our state so > + * we are clean the next pass through. > + */ > + if (__this_cpu_read(bpf_kprobe_override)) { > + __this_cpu_write(bpf_kprobe_override, 0); > + reset_current_kprobe(); OK, I will fix this issue(reset kprobe and preempt-enable) by removing jprobe soon. (currently waiting for removing {tcp,sctp,dccp}_probe code, which are only users of jprobe in the kernel) Thank you, > + return 1; > + } > + if (!ret) > + return 0; > + } > > head = this_cpu_ptr(call->perf_events); > if (hlist_empty(head)) > - return; > + return 0; > > dsize = __get_data_size(&tk->tp, regs); > __size = sizeof(*entry) + tk->tp.size + dsize; > @@ -1193,13 +1232,14 @@ kprobe_perf_func(struct trace_kprobe *tk, struct pt_regs *regs) > > entry = perf_trace_buf_alloc(size, NULL, &rctx); > if (!entry) > - return; > + return 0; > > entry->ip = (unsigned long)tk->rp.kp.addr; > memset(&entry[1], 0, dsize); > store_trace_args(sizeof(*entry), &tk->tp, regs, (u8 *)&entry[1], dsize); > perf_trace_buf_submit(entry, size, rctx, call->event.type, 1, regs, > head, NULL); > + return 0; > } > NOKPROBE_SYMBOL(kprobe_perf_func); > > @@ -1275,16 +1315,24 @@ static int kprobe_register(struct trace_event_call *event, > static int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs) > { > struct trace_kprobe *tk = container_of(kp, struct trace_kprobe, rp.kp); > + int ret = 0; > > raw_cpu_inc(*tk->nhit); > > if (tk->tp.flags & TP_FLAG_TRACE) > kprobe_trace_func(tk, regs); > #ifdef CONFIG_PERF_EVENTS > - if (tk->tp.flags & TP_FLAG_PROFILE) > - kprobe_perf_func(tk, regs); > + if (tk->tp.flags & TP_FLAG_PROFILE) { > + ret = kprobe_perf_func(tk, regs); > + /* > + * The ftrace kprobe handler leaves it up to us to re-enable > + * preemption here before returning if we've modified the ip. > + */ > + if (ret) > + preempt_enable_no_resched(); > + } > #endif > - return 0; /* We don't tweek kernel, so just return 0 */ > + return ret; > } > NOKPROBE_SYMBOL(kprobe_dispatcher); > > diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h > index fb66e3eaa192..5e54d748c84c 100644 > --- a/kernel/trace/trace_probe.h > +++ b/kernel/trace/trace_probe.h > @@ -252,6 +252,8 @@ struct symbol_cache; > unsigned long update_symbol_cache(struct symbol_cache *sc); > void free_symbol_cache(struct symbol_cache *sc); > struct symbol_cache *alloc_symbol_cache(const char *sym, long offset); > +int trace_kprobe_ftrace(struct trace_event_call *call); > +int trace_kprobe_error_injectable(struct trace_event_call *call); > #else > /* uprobes do not support symbol fetch methods */ > #define fetch_symbol_u8 NULL > @@ -277,6 +279,16 @@ alloc_symbol_cache(const char *sym, long offset) > { > return NULL; > } > + > +static inline int trace_kprobe_ftrace(struct trace_event_call *call) > +{ > + return 0; > +} > + > +static inline int trace_kprobe_error_injectable(struct trace_event_call *call) > +{ > + return 0; > +} > #endif /* CONFIG_KPROBE_EVENTS */ > > struct probe_arg { > -- > 2.7.5 >
On 12/18/2017 10:51 AM, Masami Hiramatsu wrote: > On Fri, 15 Dec 2017 14:12:54 -0500 > Josef Bacik <josef@toxicpanda.com> wrote: >> From: Josef Bacik <jbacik@fb.com> >> >> Error injection is sloppy and very ad-hoc. BPF could fill this niche >> perfectly with it's kprobe functionality. We could make sure errors are >> only triggered in specific call chains that we care about with very >> specific situations. Accomplish this with the bpf_override_funciton >> helper. This will modify the probe'd callers return value to the >> specified value and set the PC to an override function that simply >> returns, bypassing the originally probed function. This gives us a nice >> clean way to implement systematic error injection for all of our code >> paths. > > OK, got it. I think the error_injectable function list should be defined > in kernel/trace/bpf_trace.c because only bpf calls it and needs to care > the "safeness". > > [...] >> diff --git a/arch/x86/kernel/kprobes/ftrace.c b/arch/x86/kernel/kprobes/ftrace.c >> index 8dc0161cec8f..1ea748d682fd 100644 >> --- a/arch/x86/kernel/kprobes/ftrace.c >> +++ b/arch/x86/kernel/kprobes/ftrace.c >> @@ -97,3 +97,17 @@ int arch_prepare_kprobe_ftrace(struct kprobe *p) >> p->ainsn.boostable = false; >> return 0; >> } >> + >> +asmlinkage void override_func(void); >> +asm( >> + ".type override_func, @function\n" >> + "override_func:\n" >> + " ret\n" >> + ".size override_func, .-override_func\n" >> +); >> + >> +void arch_ftrace_kprobe_override_function(struct pt_regs *regs) >> +{ >> + regs->ip = (unsigned long)&override_func; >> +} >> +NOKPROBE_SYMBOL(arch_ftrace_kprobe_override_function); > > Calling this as "override_function" is meaningless. This is a function > which just return. So I think combination of just_return_func() and > arch_bpf_override_func_just_return() will be better. > > Moreover, this arch/x86/kernel/kprobes/ftrace.c is an archtecture > dependent implementation of kprobes, not bpf. Josef, please work out any necessary cleanups that would still need to be addressed based on Masami's feedback and send them as follow-up patches, thanks. > Hmm, arch/x86/net/bpf_jit_comp.c will be better place? (No, it's JIT only and I'd really prefer to keep it that way, mixing this would result in a huge mess.) -- To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
On Mon, 18 Dec 2017 16:09:30 +0100 Daniel Borkmann <daniel@iogearbox.net> wrote: > On 12/18/2017 10:51 AM, Masami Hiramatsu wrote: > > On Fri, 15 Dec 2017 14:12:54 -0500 > > Josef Bacik <josef@toxicpanda.com> wrote: > >> From: Josef Bacik <jbacik@fb.com> > >> > >> Error injection is sloppy and very ad-hoc. BPF could fill this niche > >> perfectly with it's kprobe functionality. We could make sure errors are > >> only triggered in specific call chains that we care about with very > >> specific situations. Accomplish this with the bpf_override_funciton > >> helper. This will modify the probe'd callers return value to the > >> specified value and set the PC to an override function that simply > >> returns, bypassing the originally probed function. This gives us a nice > >> clean way to implement systematic error injection for all of our code > >> paths. > > > > OK, got it. I think the error_injectable function list should be defined > > in kernel/trace/bpf_trace.c because only bpf calls it and needs to care > > the "safeness". > > > > [...] > >> diff --git a/arch/x86/kernel/kprobes/ftrace.c b/arch/x86/kernel/kprobes/ftrace.c > >> index 8dc0161cec8f..1ea748d682fd 100644 > >> --- a/arch/x86/kernel/kprobes/ftrace.c > >> +++ b/arch/x86/kernel/kprobes/ftrace.c > >> @@ -97,3 +97,17 @@ int arch_prepare_kprobe_ftrace(struct kprobe *p) > >> p->ainsn.boostable = false; > >> return 0; > >> } > >> + > >> +asmlinkage void override_func(void); > >> +asm( > >> + ".type override_func, @function\n" > >> + "override_func:\n" > >> + " ret\n" > >> + ".size override_func, .-override_func\n" > >> +); > >> + > >> +void arch_ftrace_kprobe_override_function(struct pt_regs *regs) > >> +{ > >> + regs->ip = (unsigned long)&override_func; > >> +} > >> +NOKPROBE_SYMBOL(arch_ftrace_kprobe_override_function); > > > > Calling this as "override_function" is meaningless. This is a function > > which just return. So I think combination of just_return_func() and > > arch_bpf_override_func_just_return() will be better. > > > > Moreover, this arch/x86/kernel/kprobes/ftrace.c is an archtecture > > dependent implementation of kprobes, not bpf. > > Josef, please work out any necessary cleanups that would still need > to be addressed based on Masami's feedback and send them as follow-up > patches, thanks. > > > Hmm, arch/x86/net/bpf_jit_comp.c will be better place? > > (No, it's JIT only and I'd really prefer to keep it that way, mixing > this would result in a huge mess.) OK, that is same to kprobes. kernel/kprobes.c and arch/x86/kernel/kprobe/* are for instrumentation code. And kernel/trace/trace_kprobe.c is ftrace's kprobe user interface, just one implementation of kprobe usage. So please do not mix it up. It will result in a huge mess to me. Thank you,
diff --git a/arch/Kconfig b/arch/Kconfig index 400b9e1b2f27..d3f4aaf9cb7a 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -196,6 +196,9 @@ config HAVE_OPTPROBES config HAVE_KPROBES_ON_FTRACE bool +config HAVE_KPROBE_OVERRIDE + bool + config HAVE_NMI bool diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 8eed3f94bfc7..04d66e6fa447 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -154,6 +154,7 @@ config X86 select HAVE_KERNEL_XZ select HAVE_KPROBES select HAVE_KPROBES_ON_FTRACE + select HAVE_KPROBE_OVERRIDE select HAVE_KRETPROBES select HAVE_KVM select HAVE_LIVEPATCH if X86_64 diff --git a/arch/x86/include/asm/kprobes.h b/arch/x86/include/asm/kprobes.h index 9f2e3102e0bb..36abb23a7a35 100644 --- a/arch/x86/include/asm/kprobes.h +++ b/arch/x86/include/asm/kprobes.h @@ -67,6 +67,10 @@ extern const int kretprobe_blacklist_size; void arch_remove_kprobe(struct kprobe *p); asmlinkage void kretprobe_trampoline(void); +#ifdef CONFIG_KPROBES_ON_FTRACE +extern void arch_ftrace_kprobe_override_function(struct pt_regs *regs); +#endif + /* Architecture specific copy of original instruction*/ struct arch_specific_insn { /* copy of the original instruction */ diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h index 14131dd06b29..6de1fd3d0097 100644 --- a/arch/x86/include/asm/ptrace.h +++ b/arch/x86/include/asm/ptrace.h @@ -109,6 +109,11 @@ static inline unsigned long regs_return_value(struct pt_regs *regs) return regs->ax; } +static inline void regs_set_return_value(struct pt_regs *regs, unsigned long rc) +{ + regs->ax = rc; +} + /* * user_mode(regs) determines whether a register set came from user * mode. On x86_32, this is true if V8086 mode was enabled OR if the diff --git a/arch/x86/kernel/kprobes/ftrace.c b/arch/x86/kernel/kprobes/ftrace.c index 8dc0161cec8f..1ea748d682fd 100644 --- a/arch/x86/kernel/kprobes/ftrace.c +++ b/arch/x86/kernel/kprobes/ftrace.c @@ -97,3 +97,17 @@ int arch_prepare_kprobe_ftrace(struct kprobe *p) p->ainsn.boostable = false; return 0; } + +asmlinkage void override_func(void); +asm( + ".type override_func, @function\n" + "override_func:\n" + " ret\n" + ".size override_func, .-override_func\n" +); + +void arch_ftrace_kprobe_override_function(struct pt_regs *regs) +{ + regs->ip = (unsigned long)&override_func; +} +NOKPROBE_SYMBOL(arch_ftrace_kprobe_override_function); diff --git a/include/linux/filter.h b/include/linux/filter.h index 0062302e1285..5feb441d3dd9 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -458,7 +458,8 @@ struct bpf_prog { locked:1, /* Program image locked? */ gpl_compatible:1, /* Is filter GPL compatible? */ cb_access:1, /* Is control block accessed? */ - dst_needed:1; /* Do we need dst entry? */ + dst_needed:1, /* Do we need dst entry? */ + kprobe_override:1; /* Do we override a kprobe? */ enum bpf_prog_type type; /* Type of BPF program */ u32 len; /* Number of filter blocks */ u32 jited_len; /* Size of jited insns in bytes */ diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index af44e7c2d577..5fea451f6e28 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -528,6 +528,7 @@ do { \ struct perf_event; DECLARE_PER_CPU(struct pt_regs, perf_trace_regs); +DECLARE_PER_CPU(int, bpf_kprobe_override); extern int perf_trace_init(struct perf_event *event); extern void perf_trace_destroy(struct perf_event *event); diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 80d62e88590c..595bda120cfb 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -677,6 +677,10 @@ union bpf_attr { * @buf: buf to fill * @buf_size: size of the buf * Return : 0 on success or negative error code + * + * int bpf_override_return(pt_regs, rc) + * @pt_regs: pointer to struct pt_regs + * @rc: the return value to set */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -736,7 +740,8 @@ union bpf_attr { FN(xdp_adjust_meta), \ FN(perf_event_read_value), \ FN(perf_prog_read_value), \ - FN(getsockopt), + FN(getsockopt), \ + FN(override_return), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index b9f8686a84cf..fc5a8ab4239a 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -1320,6 +1320,9 @@ EVAL4(PROG_NAME_LIST, 416, 448, 480, 512) bool bpf_prog_array_compatible(struct bpf_array *array, const struct bpf_prog *fp) { + if (fp->kprobe_override) + return false; + if (!array->owner_prog_type) { /* There's no owner yet where we could check for * compatibility. diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 7afa92e9b409..e807bda7fe29 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -4413,6 +4413,8 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env) prog->dst_needed = 1; if (insn->imm == BPF_FUNC_get_prandom_u32) bpf_user_rnd_init_once(); + if (insn->imm == BPF_FUNC_override_return) + prog->kprobe_override = 1; if (insn->imm == BPF_FUNC_tail_call) { /* If we tail call into other programs, we * cannot make any assumptions since they can diff --git a/kernel/events/core.c b/kernel/events/core.c index 16beab4767e1..6e3862bbe9c2 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -8077,6 +8077,13 @@ static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd) return -EINVAL; } + /* Kprobe override only works for kprobes, not uprobes. */ + if (prog->kprobe_override && + !(event->tp_event->flags & TRACE_EVENT_FL_KPROBE)) { + bpf_prog_put(prog); + return -EINVAL; + } + if (is_tracepoint || is_syscall_tp) { int off = trace_event_get_offsets(event->tp_event); diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index af7dad126c13..3e6fd580fe7f 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -529,6 +529,17 @@ config FUNCTION_PROFILER If in doubt, say N. +config BPF_KPROBE_OVERRIDE + bool "Enable BPF programs to override a kprobed function" + depends on BPF_EVENTS + depends on KPROBES_ON_FTRACE + depends on HAVE_KPROBE_OVERRIDE + depends on DYNAMIC_FTRACE_WITH_REGS + default n + help + Allows BPF to override the execution of a probed function and + set a different return value. This is used for error injection. + config FTRACE_MCOUNT_RECORD def_bool y depends on DYNAMIC_FTRACE diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 27d1f4ffa3de..e4bfdbc5a905 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -13,6 +13,10 @@ #include <linux/filter.h> #include <linux/uaccess.h> #include <linux/ctype.h> +#include <linux/kprobes.h> +#include <asm/kprobes.h> + +#include "trace_probe.h" #include "trace.h" u64 bpf_get_stackid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); @@ -76,6 +80,29 @@ unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx) } EXPORT_SYMBOL_GPL(trace_call_bpf); +#ifdef CONFIG_BPF_KPROBE_OVERRIDE +BPF_CALL_2(bpf_override_return, struct pt_regs *, regs, unsigned long, rc) +{ + __this_cpu_write(bpf_kprobe_override, 1); + regs_set_return_value(regs, rc); + arch_ftrace_kprobe_override_function(regs); + return 0; +} +#else +BPF_CALL_2(bpf_override_return, struct pt_regs *, regs, unsigned long, rc) +{ + return -EINVAL; +} +#endif + +static const struct bpf_func_proto bpf_override_return_proto = { + .func = bpf_override_return, + .gpl_only = true, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_ANYTHING, +}; + BPF_CALL_3(bpf_probe_read, void *, dst, u32, size, const void *, unsafe_ptr) { int ret; @@ -551,6 +578,8 @@ static const struct bpf_func_proto *kprobe_prog_func_proto(enum bpf_func_id func return &bpf_get_stackid_proto; case BPF_FUNC_perf_event_read_value: return &bpf_perf_event_read_value_proto; + case BPF_FUNC_override_return: + return &bpf_override_return_proto; default: return tracing_func_proto(func_id); } @@ -766,6 +795,15 @@ int perf_event_attach_bpf_prog(struct perf_event *event, struct bpf_prog_array *new_array; int ret = -EEXIST; + /* + * Kprobe override only works for ftrace based kprobes, and only if they + * are on the opt-in list. + */ + if (prog->kprobe_override && + (!trace_kprobe_ftrace(event->tp_event) || + !trace_kprobe_error_injectable(event->tp_event))) + return -EINVAL; + mutex_lock(&bpf_event_mutex); if (event->prog) diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 492700c5fb4d..91f4b57dab82 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -42,6 +42,7 @@ struct trace_kprobe { (offsetof(struct trace_kprobe, tp.args) + \ (sizeof(struct probe_arg) * (n))) +DEFINE_PER_CPU(int, bpf_kprobe_override); static nokprobe_inline bool trace_kprobe_is_return(struct trace_kprobe *tk) { @@ -87,6 +88,27 @@ static nokprobe_inline unsigned long trace_kprobe_nhit(struct trace_kprobe *tk) return nhit; } +int trace_kprobe_ftrace(struct trace_event_call *call) +{ + struct trace_kprobe *tk = (struct trace_kprobe *)call->data; + return kprobe_ftrace(&tk->rp.kp); +} + +int trace_kprobe_error_injectable(struct trace_event_call *call) +{ + struct trace_kprobe *tk = (struct trace_kprobe *)call->data; + unsigned long addr; + + if (tk->symbol) { + addr = (unsigned long) + kallsyms_lookup_name(trace_kprobe_symbol(tk)); + addr += tk->rp.kp.offset; + } else { + addr = (unsigned long)tk->rp.kp.addr; + } + return within_kprobe_error_injection_list(addr); +} + static int register_kprobe_event(struct trace_kprobe *tk); static int unregister_kprobe_event(struct trace_kprobe *tk); @@ -1170,7 +1192,7 @@ static int kretprobe_event_define_fields(struct trace_event_call *event_call) #ifdef CONFIG_PERF_EVENTS /* Kprobe profile handler */ -static void +static int kprobe_perf_func(struct trace_kprobe *tk, struct pt_regs *regs) { struct trace_event_call *call = &tk->tp.call; @@ -1179,12 +1201,29 @@ kprobe_perf_func(struct trace_kprobe *tk, struct pt_regs *regs) int size, __size, dsize; int rctx; - if (bpf_prog_array_valid(call) && !trace_call_bpf(call, regs)) - return; + if (bpf_prog_array_valid(call)) { + int ret; + + ret = trace_call_bpf(call, regs); + + /* + * We need to check and see if we modified the pc of the + * pt_regs, and if so clear the kprobe and return 1 so that we + * don't do the instruction skipping. Also reset our state so + * we are clean the next pass through. + */ + if (__this_cpu_read(bpf_kprobe_override)) { + __this_cpu_write(bpf_kprobe_override, 0); + reset_current_kprobe(); + return 1; + } + if (!ret) + return 0; + } head = this_cpu_ptr(call->perf_events); if (hlist_empty(head)) - return; + return 0; dsize = __get_data_size(&tk->tp, regs); __size = sizeof(*entry) + tk->tp.size + dsize; @@ -1193,13 +1232,14 @@ kprobe_perf_func(struct trace_kprobe *tk, struct pt_regs *regs) entry = perf_trace_buf_alloc(size, NULL, &rctx); if (!entry) - return; + return 0; entry->ip = (unsigned long)tk->rp.kp.addr; memset(&entry[1], 0, dsize); store_trace_args(sizeof(*entry), &tk->tp, regs, (u8 *)&entry[1], dsize); perf_trace_buf_submit(entry, size, rctx, call->event.type, 1, regs, head, NULL); + return 0; } NOKPROBE_SYMBOL(kprobe_perf_func); @@ -1275,16 +1315,24 @@ static int kprobe_register(struct trace_event_call *event, static int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs) { struct trace_kprobe *tk = container_of(kp, struct trace_kprobe, rp.kp); + int ret = 0; raw_cpu_inc(*tk->nhit); if (tk->tp.flags & TP_FLAG_TRACE) kprobe_trace_func(tk, regs); #ifdef CONFIG_PERF_EVENTS - if (tk->tp.flags & TP_FLAG_PROFILE) - kprobe_perf_func(tk, regs); + if (tk->tp.flags & TP_FLAG_PROFILE) { + ret = kprobe_perf_func(tk, regs); + /* + * The ftrace kprobe handler leaves it up to us to re-enable + * preemption here before returning if we've modified the ip. + */ + if (ret) + preempt_enable_no_resched(); + } #endif - return 0; /* We don't tweek kernel, so just return 0 */ + return ret; } NOKPROBE_SYMBOL(kprobe_dispatcher); diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h index fb66e3eaa192..5e54d748c84c 100644 --- a/kernel/trace/trace_probe.h +++ b/kernel/trace/trace_probe.h @@ -252,6 +252,8 @@ struct symbol_cache; unsigned long update_symbol_cache(struct symbol_cache *sc); void free_symbol_cache(struct symbol_cache *sc); struct symbol_cache *alloc_symbol_cache(const char *sym, long offset); +int trace_kprobe_ftrace(struct trace_event_call *call); +int trace_kprobe_error_injectable(struct trace_event_call *call); #else /* uprobes do not support symbol fetch methods */ #define fetch_symbol_u8 NULL @@ -277,6 +279,16 @@ alloc_symbol_cache(const char *sym, long offset) { return NULL; } + +static inline int trace_kprobe_ftrace(struct trace_event_call *call) +{ + return 0; +} + +static inline int trace_kprobe_error_injectable(struct trace_event_call *call) +{ + return 0; +} #endif /* CONFIG_KPROBE_EVENTS */ struct probe_arg {