diff mbox

[v10,3/5] bpf: add a bpf_override_function helper

Message ID 1513365176-6744-4-git-send-email-josef@toxicpanda.com (mailing list archive)
State New, archived
Headers show

Commit Message

Josef Bacik Dec. 15, 2017, 7:12 p.m. UTC
From: Josef Bacik <jbacik@fb.com>

Error injection is sloppy and very ad-hoc.  BPF could fill this niche
perfectly with it's kprobe functionality.  We could make sure errors are
only triggered in specific call chains that we care about with very
specific situations.  Accomplish this with the bpf_override_funciton
helper.  This will modify the probe'd callers return value to the
specified value and set the PC to an override function that simply
returns, bypassing the originally probed function.  This gives us a nice
clean way to implement systematic error injection for all of our code
paths.

Acked-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Josef Bacik <jbacik@fb.com>
---
 arch/Kconfig                     |  3 ++
 arch/x86/Kconfig                 |  1 +
 arch/x86/include/asm/kprobes.h   |  4 +++
 arch/x86/include/asm/ptrace.h    |  5 ++++
 arch/x86/kernel/kprobes/ftrace.c | 14 +++++++++
 include/linux/filter.h           |  3 +-
 include/linux/trace_events.h     |  1 +
 include/uapi/linux/bpf.h         |  7 ++++-
 kernel/bpf/core.c                |  3 ++
 kernel/bpf/verifier.c            |  2 ++
 kernel/events/core.c             |  7 +++++
 kernel/trace/Kconfig             | 11 +++++++
 kernel/trace/bpf_trace.c         | 38 ++++++++++++++++++++++++
 kernel/trace/trace_kprobe.c      | 64 +++++++++++++++++++++++++++++++++++-----
 kernel/trace/trace_probe.h       | 12 ++++++++
 15 files changed, 165 insertions(+), 10 deletions(-)

Comments

Alexei Starovoitov Dec. 15, 2017, 8:34 p.m. UTC | #1
On 12/15/17 11:12 AM, Josef Bacik wrote:
> +#ifdef CONFIG_BPF_KPROBE_OVERRIDE
> +BPF_CALL_2(bpf_override_return, struct pt_regs *, regs, unsigned long, rc)
> +{
> +	__this_cpu_write(bpf_kprobe_override, 1);
> +	regs_set_return_value(regs, rc);
> +	arch_ftrace_kprobe_override_function(regs);
> +	return 0;
> +}

since you're doing a respin can you adopt the change I did to make
this helper fail at load time if that #config is not set
instead of runtime?

Also how big is the v9-v10 change ?
May be do it as separate patch, since previous set already sitting
in bpf-next and there are patches on top?

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Daniel Borkmann Dec. 15, 2017, 8:53 p.m. UTC | #2
On 12/15/2017 09:34 PM, Alexei Starovoitov wrote:
[...]
> Also how big is the v9-v10 change ?
> May be do it as separate patch, since previous set already sitting
> in bpf-next and there are patches on top?

+1
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Masami Hiramatsu (Google) Dec. 18, 2017, 9:51 a.m. UTC | #3
On Fri, 15 Dec 2017 14:12:54 -0500
Josef Bacik <josef@toxicpanda.com> wrote:

> From: Josef Bacik <jbacik@fb.com>
> 
> Error injection is sloppy and very ad-hoc.  BPF could fill this niche
> perfectly with it's kprobe functionality.  We could make sure errors are
> only triggered in specific call chains that we care about with very
> specific situations.  Accomplish this with the bpf_override_funciton
> helper.  This will modify the probe'd callers return value to the
> specified value and set the PC to an override function that simply
> returns, bypassing the originally probed function.  This gives us a nice
> clean way to implement systematic error injection for all of our code
> paths.

OK, got it. I think the error_injectable function list should be defined
in kernel/trace/bpf_trace.c because only bpf calls it and needs to care
the "safeness".

[...]
> diff --git a/arch/x86/kernel/kprobes/ftrace.c b/arch/x86/kernel/kprobes/ftrace.c
> index 8dc0161cec8f..1ea748d682fd 100644
> --- a/arch/x86/kernel/kprobes/ftrace.c
> +++ b/arch/x86/kernel/kprobes/ftrace.c
> @@ -97,3 +97,17 @@ int arch_prepare_kprobe_ftrace(struct kprobe *p)
>  	p->ainsn.boostable = false;
>  	return 0;
>  }
> +
> +asmlinkage void override_func(void);
> +asm(
> +	".type override_func, @function\n"
> +	"override_func:\n"
> +	"	ret\n"
> +	".size override_func, .-override_func\n"
> +);
> +
> +void arch_ftrace_kprobe_override_function(struct pt_regs *regs)
> +{
> +	regs->ip = (unsigned long)&override_func;
> +}
> +NOKPROBE_SYMBOL(arch_ftrace_kprobe_override_function);

Calling this as "override_function" is meaningless. This is a function
which just return. So I think combination of just_return_func() and
arch_bpf_override_func_just_return() will be better.

Moreover, this arch/x86/kernel/kprobes/ftrace.c is an archtecture
dependent implementation of kprobes, not bpf.

Hmm, arch/x86/net/bpf_jit_comp.c will be better place?
(why don't we have arch/x86/kernel/bpf.c?)

[..]
> diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
> index 492700c5fb4d..91f4b57dab82 100644
> --- a/kernel/trace/trace_kprobe.c
> +++ b/kernel/trace/trace_kprobe.c
> @@ -42,6 +42,7 @@ struct trace_kprobe {
>  	(offsetof(struct trace_kprobe, tp.args) +	\
>  	(sizeof(struct probe_arg) * (n)))
>  
> +DEFINE_PER_CPU(int, bpf_kprobe_override);
>  
>  static nokprobe_inline bool trace_kprobe_is_return(struct trace_kprobe *tk)
>  {
> @@ -87,6 +88,27 @@ static nokprobe_inline unsigned long trace_kprobe_nhit(struct trace_kprobe *tk)
>  	return nhit;
>  }
>  
> +int trace_kprobe_ftrace(struct trace_event_call *call)
> +{
> +	struct trace_kprobe *tk = (struct trace_kprobe *)call->data;
> +	return kprobe_ftrace(&tk->rp.kp);
> +}
> +
> +int trace_kprobe_error_injectable(struct trace_event_call *call)
> +{
> +	struct trace_kprobe *tk = (struct trace_kprobe *)call->data;
> +	unsigned long addr;
> +
> +	if (tk->symbol) {
> +		addr = (unsigned long)
> +			kallsyms_lookup_name(trace_kprobe_symbol(tk));
> +		addr += tk->rp.kp.offset;

If the tk is already registered, you don't need to get address,
you can use kp.addr. Anyway, kprobe_ftrace() also requires the
kprobe already registered.

> +	} else {
> +		addr = (unsigned long)tk->rp.kp.addr;
> +	}
> +	return within_kprobe_error_injection_list(addr);
> +}
> +
>  static int register_kprobe_event(struct trace_kprobe *tk);
>  static int unregister_kprobe_event(struct trace_kprobe *tk);
>  
> @@ -1170,7 +1192,7 @@ static int kretprobe_event_define_fields(struct trace_event_call *event_call)
>  #ifdef CONFIG_PERF_EVENTS
>  
>  /* Kprobe profile handler */
> -static void
> +static int
>  kprobe_perf_func(struct trace_kprobe *tk, struct pt_regs *regs)
>  {
>  	struct trace_event_call *call = &tk->tp.call;
> @@ -1179,12 +1201,29 @@ kprobe_perf_func(struct trace_kprobe *tk, struct pt_regs *regs)
>  	int size, __size, dsize;
>  	int rctx;
>  
> -	if (bpf_prog_array_valid(call) && !trace_call_bpf(call, regs))
> -		return;
> +	if (bpf_prog_array_valid(call)) {
> +		int ret;
> +
> +		ret = trace_call_bpf(call, regs);
> +
> +		/*
> +		 * We need to check and see if we modified the pc of the
> +		 * pt_regs, and if so clear the kprobe and return 1 so that we
> +		 * don't do the instruction skipping.  Also reset our state so
> +		 * we are clean the next pass through.
> +		 */
> +		if (__this_cpu_read(bpf_kprobe_override)) {
> +			__this_cpu_write(bpf_kprobe_override, 0);
> +			reset_current_kprobe();

OK, I will fix this issue(reset kprobe and preempt-enable) by removing
jprobe soon.
(currently waiting for removing {tcp,sctp,dccp}_probe code, which are
 only users of jprobe in the kernel)

Thank you,


> +			return 1;
> +		}
> +		if (!ret)
> +			return 0;
> +	}
>  
>  	head = this_cpu_ptr(call->perf_events);
>  	if (hlist_empty(head))
> -		return;
> +		return 0;
>  
>  	dsize = __get_data_size(&tk->tp, regs);
>  	__size = sizeof(*entry) + tk->tp.size + dsize;
> @@ -1193,13 +1232,14 @@ kprobe_perf_func(struct trace_kprobe *tk, struct pt_regs *regs)
>  
>  	entry = perf_trace_buf_alloc(size, NULL, &rctx);
>  	if (!entry)
> -		return;
> +		return 0;
>  
>  	entry->ip = (unsigned long)tk->rp.kp.addr;
>  	memset(&entry[1], 0, dsize);
>  	store_trace_args(sizeof(*entry), &tk->tp, regs, (u8 *)&entry[1], dsize);
>  	perf_trace_buf_submit(entry, size, rctx, call->event.type, 1, regs,
>  			      head, NULL);
> +	return 0;
>  }
>  NOKPROBE_SYMBOL(kprobe_perf_func);
>  
> @@ -1275,16 +1315,24 @@ static int kprobe_register(struct trace_event_call *event,
>  static int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs)
>  {
>  	struct trace_kprobe *tk = container_of(kp, struct trace_kprobe, rp.kp);
> +	int ret = 0;
>  
>  	raw_cpu_inc(*tk->nhit);
>  
>  	if (tk->tp.flags & TP_FLAG_TRACE)
>  		kprobe_trace_func(tk, regs);
>  #ifdef CONFIG_PERF_EVENTS
> -	if (tk->tp.flags & TP_FLAG_PROFILE)
> -		kprobe_perf_func(tk, regs);
> +	if (tk->tp.flags & TP_FLAG_PROFILE) {
> +		ret = kprobe_perf_func(tk, regs);
> +		/*
> +		 * The ftrace kprobe handler leaves it up to us to re-enable
> +		 * preemption here before returning if we've modified the ip.
> +		 */
> +		if (ret)
> +			preempt_enable_no_resched();
> +	}
>  #endif
> -	return 0;	/* We don't tweek kernel, so just return 0 */
> +	return ret;
>  }
>  NOKPROBE_SYMBOL(kprobe_dispatcher);
>  
> diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h
> index fb66e3eaa192..5e54d748c84c 100644
> --- a/kernel/trace/trace_probe.h
> +++ b/kernel/trace/trace_probe.h
> @@ -252,6 +252,8 @@ struct symbol_cache;
>  unsigned long update_symbol_cache(struct symbol_cache *sc);
>  void free_symbol_cache(struct symbol_cache *sc);
>  struct symbol_cache *alloc_symbol_cache(const char *sym, long offset);
> +int trace_kprobe_ftrace(struct trace_event_call *call);
> +int trace_kprobe_error_injectable(struct trace_event_call *call);
>  #else
>  /* uprobes do not support symbol fetch methods */
>  #define fetch_symbol_u8			NULL
> @@ -277,6 +279,16 @@ alloc_symbol_cache(const char *sym, long offset)
>  {
>  	return NULL;
>  }
> +
> +static inline int trace_kprobe_ftrace(struct trace_event_call *call)
> +{
> +	return 0;
> +}
> +
> +static inline int trace_kprobe_error_injectable(struct trace_event_call *call)
> +{
> +	return 0;
> +}
>  #endif /* CONFIG_KPROBE_EVENTS */
>  
>  struct probe_arg {
> -- 
> 2.7.5
>
Daniel Borkmann Dec. 18, 2017, 3:09 p.m. UTC | #4
On 12/18/2017 10:51 AM, Masami Hiramatsu wrote:
> On Fri, 15 Dec 2017 14:12:54 -0500
> Josef Bacik <josef@toxicpanda.com> wrote:
>> From: Josef Bacik <jbacik@fb.com>
>>
>> Error injection is sloppy and very ad-hoc.  BPF could fill this niche
>> perfectly with it's kprobe functionality.  We could make sure errors are
>> only triggered in specific call chains that we care about with very
>> specific situations.  Accomplish this with the bpf_override_funciton
>> helper.  This will modify the probe'd callers return value to the
>> specified value and set the PC to an override function that simply
>> returns, bypassing the originally probed function.  This gives us a nice
>> clean way to implement systematic error injection for all of our code
>> paths.
> 
> OK, got it. I think the error_injectable function list should be defined
> in kernel/trace/bpf_trace.c because only bpf calls it and needs to care
> the "safeness".
> 
> [...]
>> diff --git a/arch/x86/kernel/kprobes/ftrace.c b/arch/x86/kernel/kprobes/ftrace.c
>> index 8dc0161cec8f..1ea748d682fd 100644
>> --- a/arch/x86/kernel/kprobes/ftrace.c
>> +++ b/arch/x86/kernel/kprobes/ftrace.c
>> @@ -97,3 +97,17 @@ int arch_prepare_kprobe_ftrace(struct kprobe *p)
>>  	p->ainsn.boostable = false;
>>  	return 0;
>>  }
>> +
>> +asmlinkage void override_func(void);
>> +asm(
>> +	".type override_func, @function\n"
>> +	"override_func:\n"
>> +	"	ret\n"
>> +	".size override_func, .-override_func\n"
>> +);
>> +
>> +void arch_ftrace_kprobe_override_function(struct pt_regs *regs)
>> +{
>> +	regs->ip = (unsigned long)&override_func;
>> +}
>> +NOKPROBE_SYMBOL(arch_ftrace_kprobe_override_function);
> 
> Calling this as "override_function" is meaningless. This is a function
> which just return. So I think combination of just_return_func() and
> arch_bpf_override_func_just_return() will be better.
> 
> Moreover, this arch/x86/kernel/kprobes/ftrace.c is an archtecture
> dependent implementation of kprobes, not bpf.

Josef, please work out any necessary cleanups that would still need
to be addressed based on Masami's feedback and send them as follow-up
patches, thanks.

> Hmm, arch/x86/net/bpf_jit_comp.c will be better place?

(No, it's JIT only and I'd really prefer to keep it that way, mixing
 this would result in a huge mess.)
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Masami Hiramatsu (Google) Dec. 19, 2017, 6:38 a.m. UTC | #5
On Mon, 18 Dec 2017 16:09:30 +0100
Daniel Borkmann <daniel@iogearbox.net> wrote:

> On 12/18/2017 10:51 AM, Masami Hiramatsu wrote:
> > On Fri, 15 Dec 2017 14:12:54 -0500
> > Josef Bacik <josef@toxicpanda.com> wrote:
> >> From: Josef Bacik <jbacik@fb.com>
> >>
> >> Error injection is sloppy and very ad-hoc.  BPF could fill this niche
> >> perfectly with it's kprobe functionality.  We could make sure errors are
> >> only triggered in specific call chains that we care about with very
> >> specific situations.  Accomplish this with the bpf_override_funciton
> >> helper.  This will modify the probe'd callers return value to the
> >> specified value and set the PC to an override function that simply
> >> returns, bypassing the originally probed function.  This gives us a nice
> >> clean way to implement systematic error injection for all of our code
> >> paths.
> > 
> > OK, got it. I think the error_injectable function list should be defined
> > in kernel/trace/bpf_trace.c because only bpf calls it and needs to care
> > the "safeness".
> > 
> > [...]
> >> diff --git a/arch/x86/kernel/kprobes/ftrace.c b/arch/x86/kernel/kprobes/ftrace.c
> >> index 8dc0161cec8f..1ea748d682fd 100644
> >> --- a/arch/x86/kernel/kprobes/ftrace.c
> >> +++ b/arch/x86/kernel/kprobes/ftrace.c
> >> @@ -97,3 +97,17 @@ int arch_prepare_kprobe_ftrace(struct kprobe *p)
> >>  	p->ainsn.boostable = false;
> >>  	return 0;
> >>  }
> >> +
> >> +asmlinkage void override_func(void);
> >> +asm(
> >> +	".type override_func, @function\n"
> >> +	"override_func:\n"
> >> +	"	ret\n"
> >> +	".size override_func, .-override_func\n"
> >> +);
> >> +
> >> +void arch_ftrace_kprobe_override_function(struct pt_regs *regs)
> >> +{
> >> +	regs->ip = (unsigned long)&override_func;
> >> +}
> >> +NOKPROBE_SYMBOL(arch_ftrace_kprobe_override_function);
> > 
> > Calling this as "override_function" is meaningless. This is a function
> > which just return. So I think combination of just_return_func() and
> > arch_bpf_override_func_just_return() will be better.
> > 
> > Moreover, this arch/x86/kernel/kprobes/ftrace.c is an archtecture
> > dependent implementation of kprobes, not bpf.
> 
> Josef, please work out any necessary cleanups that would still need
> to be addressed based on Masami's feedback and send them as follow-up
> patches, thanks.
> 
> > Hmm, arch/x86/net/bpf_jit_comp.c will be better place?
> 
> (No, it's JIT only and I'd really prefer to keep it that way, mixing
>  this would result in a huge mess.)

OK, that is same to kprobes. kernel/kprobes.c and arch/x86/kernel/kprobe/*
are for instrumentation code. And kernel/trace/trace_kprobe.c is ftrace's
kprobe user interface, just one implementation of kprobe usage. So please
do not mix it up. It will result in a huge mess to me.

Thank you,
diff mbox

Patch

diff --git a/arch/Kconfig b/arch/Kconfig
index 400b9e1b2f27..d3f4aaf9cb7a 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -196,6 +196,9 @@  config HAVE_OPTPROBES
 config HAVE_KPROBES_ON_FTRACE
 	bool
 
+config HAVE_KPROBE_OVERRIDE
+	bool
+
 config HAVE_NMI
 	bool
 
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 8eed3f94bfc7..04d66e6fa447 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -154,6 +154,7 @@  config X86
 	select HAVE_KERNEL_XZ
 	select HAVE_KPROBES
 	select HAVE_KPROBES_ON_FTRACE
+	select HAVE_KPROBE_OVERRIDE
 	select HAVE_KRETPROBES
 	select HAVE_KVM
 	select HAVE_LIVEPATCH			if X86_64
diff --git a/arch/x86/include/asm/kprobes.h b/arch/x86/include/asm/kprobes.h
index 9f2e3102e0bb..36abb23a7a35 100644
--- a/arch/x86/include/asm/kprobes.h
+++ b/arch/x86/include/asm/kprobes.h
@@ -67,6 +67,10 @@  extern const int kretprobe_blacklist_size;
 void arch_remove_kprobe(struct kprobe *p);
 asmlinkage void kretprobe_trampoline(void);
 
+#ifdef CONFIG_KPROBES_ON_FTRACE
+extern void arch_ftrace_kprobe_override_function(struct pt_regs *regs);
+#endif
+
 /* Architecture specific copy of original instruction*/
 struct arch_specific_insn {
 	/* copy of the original instruction */
diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h
index 14131dd06b29..6de1fd3d0097 100644
--- a/arch/x86/include/asm/ptrace.h
+++ b/arch/x86/include/asm/ptrace.h
@@ -109,6 +109,11 @@  static inline unsigned long regs_return_value(struct pt_regs *regs)
 	return regs->ax;
 }
 
+static inline void regs_set_return_value(struct pt_regs *regs, unsigned long rc)
+{
+	regs->ax = rc;
+}
+
 /*
  * user_mode(regs) determines whether a register set came from user
  * mode.  On x86_32, this is true if V8086 mode was enabled OR if the
diff --git a/arch/x86/kernel/kprobes/ftrace.c b/arch/x86/kernel/kprobes/ftrace.c
index 8dc0161cec8f..1ea748d682fd 100644
--- a/arch/x86/kernel/kprobes/ftrace.c
+++ b/arch/x86/kernel/kprobes/ftrace.c
@@ -97,3 +97,17 @@  int arch_prepare_kprobe_ftrace(struct kprobe *p)
 	p->ainsn.boostable = false;
 	return 0;
 }
+
+asmlinkage void override_func(void);
+asm(
+	".type override_func, @function\n"
+	"override_func:\n"
+	"	ret\n"
+	".size override_func, .-override_func\n"
+);
+
+void arch_ftrace_kprobe_override_function(struct pt_regs *regs)
+{
+	regs->ip = (unsigned long)&override_func;
+}
+NOKPROBE_SYMBOL(arch_ftrace_kprobe_override_function);
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 0062302e1285..5feb441d3dd9 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -458,7 +458,8 @@  struct bpf_prog {
 				locked:1,	/* Program image locked? */
 				gpl_compatible:1, /* Is filter GPL compatible? */
 				cb_access:1,	/* Is control block accessed? */
-				dst_needed:1;	/* Do we need dst entry? */
+				dst_needed:1,	/* Do we need dst entry? */
+				kprobe_override:1; /* Do we override a kprobe? */
 	enum bpf_prog_type	type;		/* Type of BPF program */
 	u32			len;		/* Number of filter blocks */
 	u32			jited_len;	/* Size of jited insns in bytes */
diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
index af44e7c2d577..5fea451f6e28 100644
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -528,6 +528,7 @@  do {									\
 struct perf_event;
 
 DECLARE_PER_CPU(struct pt_regs, perf_trace_regs);
+DECLARE_PER_CPU(int, bpf_kprobe_override);
 
 extern int  perf_trace_init(struct perf_event *event);
 extern void perf_trace_destroy(struct perf_event *event);
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 80d62e88590c..595bda120cfb 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -677,6 +677,10 @@  union bpf_attr {
  *     @buf: buf to fill
  *     @buf_size: size of the buf
  *     Return : 0 on success or negative error code
+ *
+ * int bpf_override_return(pt_regs, rc)
+ *	@pt_regs: pointer to struct pt_regs
+ *	@rc: the return value to set
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -736,7 +740,8 @@  union bpf_attr {
 	FN(xdp_adjust_meta),		\
 	FN(perf_event_read_value),	\
 	FN(perf_prog_read_value),	\
-	FN(getsockopt),
+	FN(getsockopt),			\
+	FN(override_return),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index b9f8686a84cf..fc5a8ab4239a 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -1320,6 +1320,9 @@  EVAL4(PROG_NAME_LIST, 416, 448, 480, 512)
 bool bpf_prog_array_compatible(struct bpf_array *array,
 			       const struct bpf_prog *fp)
 {
+	if (fp->kprobe_override)
+		return false;
+
 	if (!array->owner_prog_type) {
 		/* There's no owner yet where we could check for
 		 * compatibility.
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 7afa92e9b409..e807bda7fe29 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -4413,6 +4413,8 @@  static int fixup_bpf_calls(struct bpf_verifier_env *env)
 			prog->dst_needed = 1;
 		if (insn->imm == BPF_FUNC_get_prandom_u32)
 			bpf_user_rnd_init_once();
+		if (insn->imm == BPF_FUNC_override_return)
+			prog->kprobe_override = 1;
 		if (insn->imm == BPF_FUNC_tail_call) {
 			/* If we tail call into other programs, we
 			 * cannot make any assumptions since they can
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 16beab4767e1..6e3862bbe9c2 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -8077,6 +8077,13 @@  static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd)
 		return -EINVAL;
 	}
 
+	/* Kprobe override only works for kprobes, not uprobes. */
+	if (prog->kprobe_override &&
+	    !(event->tp_event->flags & TRACE_EVENT_FL_KPROBE)) {
+		bpf_prog_put(prog);
+		return -EINVAL;
+	}
+
 	if (is_tracepoint || is_syscall_tp) {
 		int off = trace_event_get_offsets(event->tp_event);
 
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index af7dad126c13..3e6fd580fe7f 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -529,6 +529,17 @@  config FUNCTION_PROFILER
 
 	  If in doubt, say N.
 
+config BPF_KPROBE_OVERRIDE
+	bool "Enable BPF programs to override a kprobed function"
+	depends on BPF_EVENTS
+	depends on KPROBES_ON_FTRACE
+	depends on HAVE_KPROBE_OVERRIDE
+	depends on DYNAMIC_FTRACE_WITH_REGS
+	default n
+	help
+	 Allows BPF to override the execution of a probed function and
+	 set a different return value.  This is used for error injection.
+
 config FTRACE_MCOUNT_RECORD
 	def_bool y
 	depends on DYNAMIC_FTRACE
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 27d1f4ffa3de..e4bfdbc5a905 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -13,6 +13,10 @@ 
 #include <linux/filter.h>
 #include <linux/uaccess.h>
 #include <linux/ctype.h>
+#include <linux/kprobes.h>
+#include <asm/kprobes.h>
+
+#include "trace_probe.h"
 #include "trace.h"
 
 u64 bpf_get_stackid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
@@ -76,6 +80,29 @@  unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx)
 }
 EXPORT_SYMBOL_GPL(trace_call_bpf);
 
+#ifdef CONFIG_BPF_KPROBE_OVERRIDE
+BPF_CALL_2(bpf_override_return, struct pt_regs *, regs, unsigned long, rc)
+{
+	__this_cpu_write(bpf_kprobe_override, 1);
+	regs_set_return_value(regs, rc);
+	arch_ftrace_kprobe_override_function(regs);
+	return 0;
+}
+#else
+BPF_CALL_2(bpf_override_return, struct pt_regs *, regs, unsigned long, rc)
+{
+	return -EINVAL;
+}
+#endif
+
+static const struct bpf_func_proto bpf_override_return_proto = {
+	.func		= bpf_override_return,
+	.gpl_only	= true,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_CTX,
+	.arg2_type	= ARG_ANYTHING,
+};
+
 BPF_CALL_3(bpf_probe_read, void *, dst, u32, size, const void *, unsafe_ptr)
 {
 	int ret;
@@ -551,6 +578,8 @@  static const struct bpf_func_proto *kprobe_prog_func_proto(enum bpf_func_id func
 		return &bpf_get_stackid_proto;
 	case BPF_FUNC_perf_event_read_value:
 		return &bpf_perf_event_read_value_proto;
+	case BPF_FUNC_override_return:
+		return &bpf_override_return_proto;
 	default:
 		return tracing_func_proto(func_id);
 	}
@@ -766,6 +795,15 @@  int perf_event_attach_bpf_prog(struct perf_event *event,
 	struct bpf_prog_array *new_array;
 	int ret = -EEXIST;
 
+	/*
+	 * Kprobe override only works for ftrace based kprobes, and only if they
+	 * are on the opt-in list.
+	 */
+	if (prog->kprobe_override &&
+	    (!trace_kprobe_ftrace(event->tp_event) ||
+	     !trace_kprobe_error_injectable(event->tp_event)))
+		return -EINVAL;
+
 	mutex_lock(&bpf_event_mutex);
 
 	if (event->prog)
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 492700c5fb4d..91f4b57dab82 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -42,6 +42,7 @@  struct trace_kprobe {
 	(offsetof(struct trace_kprobe, tp.args) +	\
 	(sizeof(struct probe_arg) * (n)))
 
+DEFINE_PER_CPU(int, bpf_kprobe_override);
 
 static nokprobe_inline bool trace_kprobe_is_return(struct trace_kprobe *tk)
 {
@@ -87,6 +88,27 @@  static nokprobe_inline unsigned long trace_kprobe_nhit(struct trace_kprobe *tk)
 	return nhit;
 }
 
+int trace_kprobe_ftrace(struct trace_event_call *call)
+{
+	struct trace_kprobe *tk = (struct trace_kprobe *)call->data;
+	return kprobe_ftrace(&tk->rp.kp);
+}
+
+int trace_kprobe_error_injectable(struct trace_event_call *call)
+{
+	struct trace_kprobe *tk = (struct trace_kprobe *)call->data;
+	unsigned long addr;
+
+	if (tk->symbol) {
+		addr = (unsigned long)
+			kallsyms_lookup_name(trace_kprobe_symbol(tk));
+		addr += tk->rp.kp.offset;
+	} else {
+		addr = (unsigned long)tk->rp.kp.addr;
+	}
+	return within_kprobe_error_injection_list(addr);
+}
+
 static int register_kprobe_event(struct trace_kprobe *tk);
 static int unregister_kprobe_event(struct trace_kprobe *tk);
 
@@ -1170,7 +1192,7 @@  static int kretprobe_event_define_fields(struct trace_event_call *event_call)
 #ifdef CONFIG_PERF_EVENTS
 
 /* Kprobe profile handler */
-static void
+static int
 kprobe_perf_func(struct trace_kprobe *tk, struct pt_regs *regs)
 {
 	struct trace_event_call *call = &tk->tp.call;
@@ -1179,12 +1201,29 @@  kprobe_perf_func(struct trace_kprobe *tk, struct pt_regs *regs)
 	int size, __size, dsize;
 	int rctx;
 
-	if (bpf_prog_array_valid(call) && !trace_call_bpf(call, regs))
-		return;
+	if (bpf_prog_array_valid(call)) {
+		int ret;
+
+		ret = trace_call_bpf(call, regs);
+
+		/*
+		 * We need to check and see if we modified the pc of the
+		 * pt_regs, and if so clear the kprobe and return 1 so that we
+		 * don't do the instruction skipping.  Also reset our state so
+		 * we are clean the next pass through.
+		 */
+		if (__this_cpu_read(bpf_kprobe_override)) {
+			__this_cpu_write(bpf_kprobe_override, 0);
+			reset_current_kprobe();
+			return 1;
+		}
+		if (!ret)
+			return 0;
+	}
 
 	head = this_cpu_ptr(call->perf_events);
 	if (hlist_empty(head))
-		return;
+		return 0;
 
 	dsize = __get_data_size(&tk->tp, regs);
 	__size = sizeof(*entry) + tk->tp.size + dsize;
@@ -1193,13 +1232,14 @@  kprobe_perf_func(struct trace_kprobe *tk, struct pt_regs *regs)
 
 	entry = perf_trace_buf_alloc(size, NULL, &rctx);
 	if (!entry)
-		return;
+		return 0;
 
 	entry->ip = (unsigned long)tk->rp.kp.addr;
 	memset(&entry[1], 0, dsize);
 	store_trace_args(sizeof(*entry), &tk->tp, regs, (u8 *)&entry[1], dsize);
 	perf_trace_buf_submit(entry, size, rctx, call->event.type, 1, regs,
 			      head, NULL);
+	return 0;
 }
 NOKPROBE_SYMBOL(kprobe_perf_func);
 
@@ -1275,16 +1315,24 @@  static int kprobe_register(struct trace_event_call *event,
 static int kprobe_dispatcher(struct kprobe *kp, struct pt_regs *regs)
 {
 	struct trace_kprobe *tk = container_of(kp, struct trace_kprobe, rp.kp);
+	int ret = 0;
 
 	raw_cpu_inc(*tk->nhit);
 
 	if (tk->tp.flags & TP_FLAG_TRACE)
 		kprobe_trace_func(tk, regs);
 #ifdef CONFIG_PERF_EVENTS
-	if (tk->tp.flags & TP_FLAG_PROFILE)
-		kprobe_perf_func(tk, regs);
+	if (tk->tp.flags & TP_FLAG_PROFILE) {
+		ret = kprobe_perf_func(tk, regs);
+		/*
+		 * The ftrace kprobe handler leaves it up to us to re-enable
+		 * preemption here before returning if we've modified the ip.
+		 */
+		if (ret)
+			preempt_enable_no_resched();
+	}
 #endif
-	return 0;	/* We don't tweek kernel, so just return 0 */
+	return ret;
 }
 NOKPROBE_SYMBOL(kprobe_dispatcher);
 
diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h
index fb66e3eaa192..5e54d748c84c 100644
--- a/kernel/trace/trace_probe.h
+++ b/kernel/trace/trace_probe.h
@@ -252,6 +252,8 @@  struct symbol_cache;
 unsigned long update_symbol_cache(struct symbol_cache *sc);
 void free_symbol_cache(struct symbol_cache *sc);
 struct symbol_cache *alloc_symbol_cache(const char *sym, long offset);
+int trace_kprobe_ftrace(struct trace_event_call *call);
+int trace_kprobe_error_injectable(struct trace_event_call *call);
 #else
 /* uprobes do not support symbol fetch methods */
 #define fetch_symbol_u8			NULL
@@ -277,6 +279,16 @@  alloc_symbol_cache(const char *sym, long offset)
 {
 	return NULL;
 }
+
+static inline int trace_kprobe_ftrace(struct trace_event_call *call)
+{
+	return 0;
+}
+
+static inline int trace_kprobe_error_injectable(struct trace_event_call *call)
+{
+	return 0;
+}
 #endif /* CONFIG_KPROBE_EVENTS */
 
 struct probe_arg {