diff mbox series

arm64: kprobes: Use BRK instead of single-step when executing instructions out-of-line

Message ID 20201029173440.117174-1-jean-philippe@linaro.org (mailing list archive)
State New, archived
Headers show
Series arm64: kprobes: Use BRK instead of single-step when executing instructions out-of-line | expand

Commit Message

Jean-Philippe Brucker Oct. 29, 2020, 5:34 p.m. UTC
Commit 36dadef23fcc ("kprobes: Init kprobes in early_initcall") enabled
using kprobes from early_initcall. Unfortunately at this point the
hardware debug infrastructure is not operational. The OS lock may still
be locked, and the hardware watchpoints may have unknown values when
kprobe enables debug monitors to single-step instructions.

Rather than using hardware single-step, append a BRK instruction after
the instruction to be executed out-of-line.

Fixes: 36dadef23fcc ("kprobes: Init kprobes in early_initcall")
Suggested-by: Will Deacon <will@kernel.org>
Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
---

An alternative to fix [1]. I haven't done uprobes yet since they don't
suffer the same problem, but could add it to the bottom of my list.
Lightly tested with kprobe smoke test and the BPF selftests.
Interestingly on Seattle when running the "overhead" BPF selftest, that
triggers a kprobe a bunch of times, I see a 20-30% improvement with this
patch. I'm guessing it's the cost of touching the debug sysregs?

[1] https://lore.kernel.org/linux-arm-kernel/20201026172907.1468294-1-jean-philippe@linaro.org/
---
 arch/arm64/include/asm/brk-imm.h        |  2 +
 arch/arm64/include/asm/debug-monitors.h |  1 +
 arch/arm64/include/asm/kprobes.h        |  2 +-
 arch/arm64/kernel/probes/kprobes.c      | 56 +++++++++----------------
 4 files changed, 24 insertions(+), 37 deletions(-)

Comments

Masami Hiramatsu (Google) Oct. 29, 2020, 11:34 p.m. UTC | #1
On Thu, 29 Oct 2020 18:34:42 +0100
Jean-Philippe Brucker <jean-philippe@linaro.org> wrote:

> Commit 36dadef23fcc ("kprobes: Init kprobes in early_initcall") enabled
> using kprobes from early_initcall. Unfortunately at this point the
> hardware debug infrastructure is not operational. The OS lock may still
> be locked, and the hardware watchpoints may have unknown values when
> kprobe enables debug monitors to single-step instructions.
> 
> Rather than using hardware single-step, append a BRK instruction after
> the instruction to be executed out-of-line.

Ok, this looks good to me. One comment is that we can remove ss_ctx too.
ss_ctx has ss_pending bit and match_addr, those are redundant because
those can be replaced by KPROBE_HIT_SS and &cur_kprobe->ainsn.api.insn[1]
respectively. But that should be done in another patch. To fix the bug,
I think this is good.

Acked-by: Masami Hiramatsu <mhiramat@kernel.org>

> Fixes: 36dadef23fcc ("kprobes: Init kprobes in early_initcall")
> Suggested-by: Will Deacon <will@kernel.org>
> Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
> ---
> 
> An alternative to fix [1]. I haven't done uprobes yet since they don't
> suffer the same problem, but could add it to the bottom of my list.
> Lightly tested with kprobe smoke test and the BPF selftests.
> Interestingly on Seattle when running the "overhead" BPF selftest, that
> triggers a kprobe a bunch of times, I see a 20-30% improvement with this
> patch. I'm guessing it's the cost of touching the debug sysregs?

Interesting :) I think that is the cost of the debug exception itself.
I guess there might be a cost to route the debug logic.

Thank you,

> [1] https://lore.kernel.org/linux-arm-kernel/20201026172907.1468294-1-jean-philippe@linaro.org/
> ---
>  arch/arm64/include/asm/brk-imm.h        |  2 +
>  arch/arm64/include/asm/debug-monitors.h |  1 +
>  arch/arm64/include/asm/kprobes.h        |  2 +-
>  arch/arm64/kernel/probes/kprobes.c      | 56 +++++++++----------------
>  4 files changed, 24 insertions(+), 37 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/brk-imm.h b/arch/arm64/include/asm/brk-imm.h
> index e3d47b52161d..ec7720dbe2c8 100644
> --- a/arch/arm64/include/asm/brk-imm.h
> +++ b/arch/arm64/include/asm/brk-imm.h
> @@ -10,6 +10,7 @@
>   * #imm16 values used for BRK instruction generation
>   * 0x004: for installing kprobes
>   * 0x005: for installing uprobes
> + * 0x006: for kprobe software single-step
>   * Allowed values for kgdb are 0x400 - 0x7ff
>   * 0x100: for triggering a fault on purpose (reserved)
>   * 0x400: for dynamic BRK instruction
> @@ -19,6 +20,7 @@
>   */
>  #define KPROBES_BRK_IMM			0x004
>  #define UPROBES_BRK_IMM			0x005
> +#define KPROBES_BRK_SS_IMM		0x006
>  #define FAULT_BRK_IMM			0x100
>  #define KGDB_DYN_DBG_BRK_IMM		0x400
>  #define KGDB_COMPILED_DBG_BRK_IMM	0x401
> diff --git a/arch/arm64/include/asm/debug-monitors.h b/arch/arm64/include/asm/debug-monitors.h
> index 0b298f48f5bf..657c921fd784 100644
> --- a/arch/arm64/include/asm/debug-monitors.h
> +++ b/arch/arm64/include/asm/debug-monitors.h
> @@ -53,6 +53,7 @@
>  
>  /* kprobes BRK opcodes with ESR encoding  */
>  #define BRK64_OPCODE_KPROBES	(AARCH64_BREAK_MON | (KPROBES_BRK_IMM << 5))
> +#define BRK64_OPCODE_KPROBES_SS	(AARCH64_BREAK_MON | (KPROBES_BRK_SS_IMM << 5))
>  /* uprobes BRK opcodes with ESR encoding  */
>  #define BRK64_OPCODE_UPROBES	(AARCH64_BREAK_MON | (UPROBES_BRK_IMM << 5))
>  
> diff --git a/arch/arm64/include/asm/kprobes.h b/arch/arm64/include/asm/kprobes.h
> index 97e511d645a2..8699ce30f587 100644
> --- a/arch/arm64/include/asm/kprobes.h
> +++ b/arch/arm64/include/asm/kprobes.h
> @@ -16,7 +16,7 @@
>  #include <linux/percpu.h>
>  
>  #define __ARCH_WANT_KPROBES_INSN_SLOT
> -#define MAX_INSN_SIZE			1
> +#define MAX_INSN_SIZE			2
>  
>  #define flush_insn_slot(p)		do { } while (0)
>  #define kretprobe_blacklist_size	0
> diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c
> index deba738142ed..ec1446ceacc9 100644
> --- a/arch/arm64/kernel/probes/kprobes.c
> +++ b/arch/arm64/kernel/probes/kprobes.c
> @@ -36,21 +36,25 @@ DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
>  static void __kprobes
>  post_kprobe_handler(struct kprobe_ctlblk *, struct pt_regs *);
>  
> -static int __kprobes patch_text(kprobe_opcode_t *addr, u32 opcode)
> +static int __kprobes patch_text(kprobe_opcode_t *addr, u32 opc1, u32 opc2)
>  {
> -	void *addrs[1];
> -	u32 insns[1];
> +	void *addrs[2];
> +	u32 insns[2];
>  
>  	addrs[0] = addr;
> -	insns[0] = opcode;
> +	insns[0] = opc1;
> +	if (opc2) {
> +		addrs[1] = addr + 1;
> +		insns[1] = opc2;
> +	}
>  
> -	return aarch64_insn_patch_text(addrs, insns, 1);
> +	return aarch64_insn_patch_text(addrs, insns, opc2 ? 2 : 1);
>  }
>  
>  static void __kprobes arch_prepare_ss_slot(struct kprobe *p)
>  {
>  	/* prepare insn slot */
> -	patch_text(p->ainsn.api.insn, p->opcode);
> +	patch_text(p->ainsn.api.insn, p->opcode, BRK64_OPCODE_KPROBES_SS);
>  
>  	flush_icache_range((uintptr_t) (p->ainsn.api.insn),
>  			   (uintptr_t) (p->ainsn.api.insn) +
> @@ -128,13 +132,13 @@ void *alloc_insn_page(void)
>  /* arm kprobe: install breakpoint in text */
>  void __kprobes arch_arm_kprobe(struct kprobe *p)
>  {
> -	patch_text(p->addr, BRK64_OPCODE_KPROBES);
> +	patch_text(p->addr, BRK64_OPCODE_KPROBES, 0);
>  }
>  
>  /* disarm kprobe: remove breakpoint from text */
>  void __kprobes arch_disarm_kprobe(struct kprobe *p)
>  {
> -	patch_text(p->addr, p->opcode);
> +	patch_text(p->addr, p->opcode, 0);
>  }
>  
>  void __kprobes arch_remove_kprobe(struct kprobe *p)
> @@ -163,20 +167,14 @@ static void __kprobes set_current_kprobe(struct kprobe *p)
>  }
>  
>  /*
> - * Interrupts need to be disabled before single-step mode is set, and not
> - * reenabled until after single-step mode ends.
> - * Without disabling interrupt on local CPU, there is a chance of
> - * interrupt occurrence in the period of exception return and  start of
> - * out-of-line single-step, that result in wrongly single stepping
> - * into the interrupt handler.
> + * Keep interrupts disabled while executing the instruction out-of-line. Since
> + * the kprobe state is per-CPU, we can't afford to be migrated.
>   */
>  static void __kprobes kprobes_save_local_irqflag(struct kprobe_ctlblk *kcb,
>  						struct pt_regs *regs)
>  {
>  	kcb->saved_irqflag = regs->pstate & DAIF_MASK;
>  	regs->pstate |= PSR_I_BIT;
> -	/* Unmask PSTATE.D for enabling software step exceptions. */
> -	regs->pstate &= ~PSR_D_BIT;
>  }
>  
>  static void __kprobes kprobes_restore_local_irqflag(struct kprobe_ctlblk *kcb,
> @@ -219,10 +217,7 @@ static void __kprobes setup_singlestep(struct kprobe *p,
>  		slot = (unsigned long)p->ainsn.api.insn;
>  
>  		set_ss_context(kcb, slot);	/* mark pending ss */
> -
> -		/* IRQs and single stepping do not mix well. */
>  		kprobes_save_local_irqflag(kcb, regs);
> -		kernel_enable_single_step(regs);
>  		instruction_pointer_set(regs, slot);
>  	} else {
>  		/* insn simulation */
> @@ -273,12 +268,8 @@ post_kprobe_handler(struct kprobe_ctlblk *kcb, struct pt_regs *regs)
>  	}
>  	/* call post handler */
>  	kcb->kprobe_status = KPROBE_HIT_SSDONE;
> -	if (cur->post_handler)	{
> -		/* post_handler can hit breakpoint and single step
> -		 * again, so we enable D-flag for recursive exception.
> -		 */
> +	if (cur->post_handler)
>  		cur->post_handler(cur, regs, 0);
> -	}
>  
>  	reset_current_kprobe();
>  }
> @@ -302,8 +293,6 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, unsigned int fsr)
>  		if (!instruction_pointer(regs))
>  			BUG();
>  
> -		kernel_disable_single_step();
> -
>  		if (kcb->kprobe_status == KPROBE_REENTER)
>  			restore_previous_kprobe(kcb);
>  		else
> @@ -365,10 +354,6 @@ static void __kprobes kprobe_handler(struct pt_regs *regs)
>  			 * pre-handler and it returned non-zero, it will
>  			 * modify the execution path and no need to single
>  			 * stepping. Let's just reset current kprobe and exit.
> -			 *
> -			 * pre_handler can hit a breakpoint and can step thru
> -			 * before return, keep PSTATE D-flag enabled until
> -			 * pre_handler return back.
>  			 */
>  			if (!p->pre_handler || !p->pre_handler(p, regs)) {
>  				setup_singlestep(p, regs, kcb, 0);
> @@ -399,7 +384,7 @@ kprobe_ss_hit(struct kprobe_ctlblk *kcb, unsigned long addr)
>  }
>  
>  static int __kprobes
> -kprobe_single_step_handler(struct pt_regs *regs, unsigned int esr)
> +kprobe_breakpoint_ss_handler(struct pt_regs *regs, unsigned int esr)
>  {
>  	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
>  	int retval;
> @@ -409,16 +394,15 @@ kprobe_single_step_handler(struct pt_regs *regs, unsigned int esr)
>  
>  	if (retval == DBG_HOOK_HANDLED) {
>  		kprobes_restore_local_irqflag(kcb, regs);
> -		kernel_disable_single_step();
> -
>  		post_kprobe_handler(kcb, regs);
>  	}
>  
>  	return retval;
>  }
>  
> -static struct step_hook kprobes_step_hook = {
> -	.fn = kprobe_single_step_handler,
> +static struct break_hook kprobes_break_ss_hook = {
> +	.imm = KPROBES_BRK_SS_IMM,
> +	.fn = kprobe_breakpoint_ss_handler,
>  };
>  
>  static int __kprobes
> @@ -486,7 +470,7 @@ int __kprobes arch_trampoline_kprobe(struct kprobe *p)
>  int __init arch_init_kprobes(void)
>  {
>  	register_kernel_break_hook(&kprobes_break_hook);
> -	register_kernel_step_hook(&kprobes_step_hook);
> +	register_kernel_break_hook(&kprobes_break_ss_hook);
>  
>  	return 0;
>  }
> -- 
> 2.29.1
>
Will Deacon Oct. 30, 2020, 4:06 p.m. UTC | #2
On Thu, Oct 29, 2020 at 06:34:42PM +0100, Jean-Philippe Brucker wrote:
> Commit 36dadef23fcc ("kprobes: Init kprobes in early_initcall") enabled
> using kprobes from early_initcall. Unfortunately at this point the
> hardware debug infrastructure is not operational. The OS lock may still
> be locked, and the hardware watchpoints may have unknown values when
> kprobe enables debug monitors to single-step instructions.
> 
> Rather than using hardware single-step, append a BRK instruction after
> the instruction to be executed out-of-line.
> 
> Fixes: 36dadef23fcc ("kprobes: Init kprobes in early_initcall")
> Suggested-by: Will Deacon <will@kernel.org>
> Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
> ---
> 
> An alternative to fix [1]. I haven't done uprobes yet since they don't
> suffer the same problem, but could add it to the bottom of my list.
> Lightly tested with kprobe smoke test and the BPF selftests.
> Interestingly on Seattle when running the "overhead" BPF selftest, that
> triggers a kprobe a bunch of times, I see a 20-30% improvement with this
> patch. I'm guessing it's the cost of touching the debug sysregs?
> 
> [1] https://lore.kernel.org/linux-arm-kernel/20201026172907.1468294-1-jean-philippe@linaro.org/
> ---
>  arch/arm64/include/asm/brk-imm.h        |  2 +
>  arch/arm64/include/asm/debug-monitors.h |  1 +
>  arch/arm64/include/asm/kprobes.h        |  2 +-
>  arch/arm64/kernel/probes/kprobes.c      | 56 +++++++++----------------
>  4 files changed, 24 insertions(+), 37 deletions(-)

Thanks! I'm about to send fixes for -rc2, but I'd like this to get a little
bit of time in linux-next, so I aim to queue it next week for -rc3. Just
wanted to make sure you don't think I'm ignored you.

Cheers,

Will
Jean-Philippe Brucker Oct. 30, 2020, 4:13 p.m. UTC | #3
On Fri, Oct 30, 2020 at 04:06:24PM +0000, Will Deacon wrote:
> On Thu, Oct 29, 2020 at 06:34:42PM +0100, Jean-Philippe Brucker wrote:
> > Commit 36dadef23fcc ("kprobes: Init kprobes in early_initcall") enabled
> > using kprobes from early_initcall. Unfortunately at this point the
> > hardware debug infrastructure is not operational. The OS lock may still
> > be locked, and the hardware watchpoints may have unknown values when
> > kprobe enables debug monitors to single-step instructions.
> > 
> > Rather than using hardware single-step, append a BRK instruction after
> > the instruction to be executed out-of-line.
> > 
> > Fixes: 36dadef23fcc ("kprobes: Init kprobes in early_initcall")
> > Suggested-by: Will Deacon <will@kernel.org>
> > Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
> > ---
> > 
> > An alternative to fix [1]. I haven't done uprobes yet since they don't
> > suffer the same problem, but could add it to the bottom of my list.
> > Lightly tested with kprobe smoke test and the BPF selftests.
> > Interestingly on Seattle when running the "overhead" BPF selftest, that
> > triggers a kprobe a bunch of times, I see a 20-30% improvement with this
> > patch. I'm guessing it's the cost of touching the debug sysregs?
> > 
> > [1] https://lore.kernel.org/linux-arm-kernel/20201026172907.1468294-1-jean-philippe@linaro.org/
> > ---
> >  arch/arm64/include/asm/brk-imm.h        |  2 +
> >  arch/arm64/include/asm/debug-monitors.h |  1 +
> >  arch/arm64/include/asm/kprobes.h        |  2 +-
> >  arch/arm64/kernel/probes/kprobes.c      | 56 +++++++++----------------
> >  4 files changed, 24 insertions(+), 37 deletions(-)
> 
> Thanks! I'm about to send fixes for -rc2, but I'd like this to get a little
> bit of time in linux-next, so I aim to queue it next week for -rc3. Just
> wanted to make sure you don't think I'm ignored you.

No worries, I'm more confortable having this sit in linux-next

Thanks,
Jean
Will Deacon Nov. 2, 2020, 11:41 a.m. UTC | #4
On Thu, Oct 29, 2020 at 06:34:42PM +0100, Jean-Philippe Brucker wrote:
> Commit 36dadef23fcc ("kprobes: Init kprobes in early_initcall") enabled
> using kprobes from early_initcall. Unfortunately at this point the
> hardware debug infrastructure is not operational. The OS lock may still
> be locked, and the hardware watchpoints may have unknown values when
> kprobe enables debug monitors to single-step instructions.
> 
> Rather than using hardware single-step, append a BRK instruction after
> the instruction to be executed out-of-line.
> 
> Fixes: 36dadef23fcc ("kprobes: Init kprobes in early_initcall")
> Suggested-by: Will Deacon <will@kernel.org>
> Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
> ---
> 
> An alternative to fix [1]. I haven't done uprobes yet since they don't
> suffer the same problem, but could add it to the bottom of my list.
> Lightly tested with kprobe smoke test and the BPF selftests.
> Interestingly on Seattle when running the "overhead" BPF selftest, that
> triggers a kprobe a bunch of times, I see a 20-30% improvement with this
> patch. I'm guessing it's the cost of touching the debug sysregs?
> 
> [1] https://lore.kernel.org/linux-arm-kernel/20201026172907.1468294-1-jean-philippe@linaro.org/
> ---
>  arch/arm64/include/asm/brk-imm.h        |  2 +
>  arch/arm64/include/asm/debug-monitors.h |  1 +
>  arch/arm64/include/asm/kprobes.h        |  2 +-
>  arch/arm64/kernel/probes/kprobes.c      | 56 +++++++++----------------
>  4 files changed, 24 insertions(+), 37 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/brk-imm.h b/arch/arm64/include/asm/brk-imm.h
> index e3d47b52161d..ec7720dbe2c8 100644
> --- a/arch/arm64/include/asm/brk-imm.h
> +++ b/arch/arm64/include/asm/brk-imm.h
> @@ -10,6 +10,7 @@
>   * #imm16 values used for BRK instruction generation
>   * 0x004: for installing kprobes
>   * 0x005: for installing uprobes
> + * 0x006: for kprobe software single-step
>   * Allowed values for kgdb are 0x400 - 0x7ff
>   * 0x100: for triggering a fault on purpose (reserved)
>   * 0x400: for dynamic BRK instruction
> @@ -19,6 +20,7 @@
>   */
>  #define KPROBES_BRK_IMM			0x004
>  #define UPROBES_BRK_IMM			0x005
> +#define KPROBES_BRK_SS_IMM		0x006
>  #define FAULT_BRK_IMM			0x100
>  #define KGDB_DYN_DBG_BRK_IMM		0x400
>  #define KGDB_COMPILED_DBG_BRK_IMM	0x401
> diff --git a/arch/arm64/include/asm/debug-monitors.h b/arch/arm64/include/asm/debug-monitors.h
> index 0b298f48f5bf..657c921fd784 100644
> --- a/arch/arm64/include/asm/debug-monitors.h
> +++ b/arch/arm64/include/asm/debug-monitors.h
> @@ -53,6 +53,7 @@
>  
>  /* kprobes BRK opcodes with ESR encoding  */
>  #define BRK64_OPCODE_KPROBES	(AARCH64_BREAK_MON | (KPROBES_BRK_IMM << 5))
> +#define BRK64_OPCODE_KPROBES_SS	(AARCH64_BREAK_MON | (KPROBES_BRK_SS_IMM << 5))
>  /* uprobes BRK opcodes with ESR encoding  */
>  #define BRK64_OPCODE_UPROBES	(AARCH64_BREAK_MON | (UPROBES_BRK_IMM << 5))
>  
> diff --git a/arch/arm64/include/asm/kprobes.h b/arch/arm64/include/asm/kprobes.h
> index 97e511d645a2..8699ce30f587 100644
> --- a/arch/arm64/include/asm/kprobes.h
> +++ b/arch/arm64/include/asm/kprobes.h
> @@ -16,7 +16,7 @@
>  #include <linux/percpu.h>
>  
>  #define __ARCH_WANT_KPROBES_INSN_SLOT
> -#define MAX_INSN_SIZE			1
> +#define MAX_INSN_SIZE			2
>  
>  #define flush_insn_slot(p)		do { } while (0)
>  #define kretprobe_blacklist_size	0
> diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c
> index deba738142ed..ec1446ceacc9 100644
> --- a/arch/arm64/kernel/probes/kprobes.c
> +++ b/arch/arm64/kernel/probes/kprobes.c
> @@ -36,21 +36,25 @@ DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
>  static void __kprobes
>  post_kprobe_handler(struct kprobe_ctlblk *, struct pt_regs *);
>  
> -static int __kprobes patch_text(kprobe_opcode_t *addr, u32 opcode)
> +static int __kprobes patch_text(kprobe_opcode_t *addr, u32 opc1, u32 opc2)
>  {
> -	void *addrs[1];
> -	u32 insns[1];
> +	void *addrs[2];
> +	u32 insns[2];
>  
>  	addrs[0] = addr;
> -	insns[0] = opcode;
> +	insns[0] = opc1;
> +	if (opc2) {
> +		addrs[1] = addr + 1;
> +		insns[1] = opc2;
> +	}
>  
> -	return aarch64_insn_patch_text(addrs, insns, 1);
> +	return aarch64_insn_patch_text(addrs, insns, opc2 ? 2 : 1);
>  }
>  
>  static void __kprobes arch_prepare_ss_slot(struct kprobe *p)
>  {
>  	/* prepare insn slot */
> -	patch_text(p->ainsn.api.insn, p->opcode);
> +	patch_text(p->ainsn.api.insn, p->opcode, BRK64_OPCODE_KPROBES_SS);
>  
>  	flush_icache_range((uintptr_t) (p->ainsn.api.insn),
>  			   (uintptr_t) (p->ainsn.api.insn) +
> @@ -128,13 +132,13 @@ void *alloc_insn_page(void)
>  /* arm kprobe: install breakpoint in text */
>  void __kprobes arch_arm_kprobe(struct kprobe *p)
>  {
> -	patch_text(p->addr, BRK64_OPCODE_KPROBES);
> +	patch_text(p->addr, BRK64_OPCODE_KPROBES, 0);
>  }
>  
>  /* disarm kprobe: remove breakpoint from text */
>  void __kprobes arch_disarm_kprobe(struct kprobe *p)
>  {
> -	patch_text(p->addr, p->opcode);
> +	patch_text(p->addr, p->opcode, 0);

I don't think patch_text() is offering an awful lot to these callers. Why
don't we just call aarch64_insn_patch_text() directly, which I think will
make the code easier to read too?

>  void __kprobes arch_remove_kprobe(struct kprobe *p)
> @@ -163,20 +167,14 @@ static void __kprobes set_current_kprobe(struct kprobe *p)
>  }
>  
>  /*
> - * Interrupts need to be disabled before single-step mode is set, and not
> - * reenabled until after single-step mode ends.
> - * Without disabling interrupt on local CPU, there is a chance of
> - * interrupt occurrence in the period of exception return and  start of
> - * out-of-line single-step, that result in wrongly single stepping
> - * into the interrupt handler.
> + * Keep interrupts disabled while executing the instruction out-of-line. Since
> + * the kprobe state is per-CPU, we can't afford to be migrated.
>   */
>  static void __kprobes kprobes_save_local_irqflag(struct kprobe_ctlblk *kcb,
>  						struct pt_regs *regs)
>  {
>  	kcb->saved_irqflag = regs->pstate & DAIF_MASK;
>  	regs->pstate |= PSR_I_BIT;
> -	/* Unmask PSTATE.D for enabling software step exceptions. */
> -	regs->pstate &= ~PSR_D_BIT;

Can we set the D bit now? Would be one less thing to worry about.

Cheers,

Will
Masami Hiramatsu (Google) Nov. 2, 2020, 1:52 p.m. UTC | #5
On Mon, 2 Nov 2020 11:41:52 +0000
Will Deacon <will@kernel.org> wrote:

> On Thu, Oct 29, 2020 at 06:34:42PM +0100, Jean-Philippe Brucker wrote:
> > Commit 36dadef23fcc ("kprobes: Init kprobes in early_initcall") enabled
> > using kprobes from early_initcall. Unfortunately at this point the
> > hardware debug infrastructure is not operational. The OS lock may still
> > be locked, and the hardware watchpoints may have unknown values when
> > kprobe enables debug monitors to single-step instructions.
> > 
> > Rather than using hardware single-step, append a BRK instruction after
> > the instruction to be executed out-of-line.
> > 
> > Fixes: 36dadef23fcc ("kprobes: Init kprobes in early_initcall")
> > Suggested-by: Will Deacon <will@kernel.org>
> > Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
> > ---
> > 
> > An alternative to fix [1]. I haven't done uprobes yet since they don't
> > suffer the same problem, but could add it to the bottom of my list.
> > Lightly tested with kprobe smoke test and the BPF selftests.
> > Interestingly on Seattle when running the "overhead" BPF selftest, that
> > triggers a kprobe a bunch of times, I see a 20-30% improvement with this
> > patch. I'm guessing it's the cost of touching the debug sysregs?
> > 
> > [1] https://lore.kernel.org/linux-arm-kernel/20201026172907.1468294-1-jean-philippe@linaro.org/
> > ---
> >  arch/arm64/include/asm/brk-imm.h        |  2 +
> >  arch/arm64/include/asm/debug-monitors.h |  1 +
> >  arch/arm64/include/asm/kprobes.h        |  2 +-
> >  arch/arm64/kernel/probes/kprobes.c      | 56 +++++++++----------------
> >  4 files changed, 24 insertions(+), 37 deletions(-)
> > 
> > diff --git a/arch/arm64/include/asm/brk-imm.h b/arch/arm64/include/asm/brk-imm.h
> > index e3d47b52161d..ec7720dbe2c8 100644
> > --- a/arch/arm64/include/asm/brk-imm.h
> > +++ b/arch/arm64/include/asm/brk-imm.h
> > @@ -10,6 +10,7 @@
> >   * #imm16 values used for BRK instruction generation
> >   * 0x004: for installing kprobes
> >   * 0x005: for installing uprobes
> > + * 0x006: for kprobe software single-step
> >   * Allowed values for kgdb are 0x400 - 0x7ff
> >   * 0x100: for triggering a fault on purpose (reserved)
> >   * 0x400: for dynamic BRK instruction
> > @@ -19,6 +20,7 @@
> >   */
> >  #define KPROBES_BRK_IMM			0x004
> >  #define UPROBES_BRK_IMM			0x005
> > +#define KPROBES_BRK_SS_IMM		0x006
> >  #define FAULT_BRK_IMM			0x100
> >  #define KGDB_DYN_DBG_BRK_IMM		0x400
> >  #define KGDB_COMPILED_DBG_BRK_IMM	0x401
> > diff --git a/arch/arm64/include/asm/debug-monitors.h b/arch/arm64/include/asm/debug-monitors.h
> > index 0b298f48f5bf..657c921fd784 100644
> > --- a/arch/arm64/include/asm/debug-monitors.h
> > +++ b/arch/arm64/include/asm/debug-monitors.h
> > @@ -53,6 +53,7 @@
> >  
> >  /* kprobes BRK opcodes with ESR encoding  */
> >  #define BRK64_OPCODE_KPROBES	(AARCH64_BREAK_MON | (KPROBES_BRK_IMM << 5))
> > +#define BRK64_OPCODE_KPROBES_SS	(AARCH64_BREAK_MON | (KPROBES_BRK_SS_IMM << 5))
> >  /* uprobes BRK opcodes with ESR encoding  */
> >  #define BRK64_OPCODE_UPROBES	(AARCH64_BREAK_MON | (UPROBES_BRK_IMM << 5))
> >  
> > diff --git a/arch/arm64/include/asm/kprobes.h b/arch/arm64/include/asm/kprobes.h
> > index 97e511d645a2..8699ce30f587 100644
> > --- a/arch/arm64/include/asm/kprobes.h
> > +++ b/arch/arm64/include/asm/kprobes.h
> > @@ -16,7 +16,7 @@
> >  #include <linux/percpu.h>
> >  
> >  #define __ARCH_WANT_KPROBES_INSN_SLOT
> > -#define MAX_INSN_SIZE			1
> > +#define MAX_INSN_SIZE			2
> >  
> >  #define flush_insn_slot(p)		do { } while (0)
> >  #define kretprobe_blacklist_size	0
> > diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c
> > index deba738142ed..ec1446ceacc9 100644
> > --- a/arch/arm64/kernel/probes/kprobes.c
> > +++ b/arch/arm64/kernel/probes/kprobes.c
> > @@ -36,21 +36,25 @@ DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
> >  static void __kprobes
> >  post_kprobe_handler(struct kprobe_ctlblk *, struct pt_regs *);
> >  
> > -static int __kprobes patch_text(kprobe_opcode_t *addr, u32 opcode)
> > +static int __kprobes patch_text(kprobe_opcode_t *addr, u32 opc1, u32 opc2)
> >  {
> > -	void *addrs[1];
> > -	u32 insns[1];
> > +	void *addrs[2];
> > +	u32 insns[2];
> >  
> >  	addrs[0] = addr;
> > -	insns[0] = opcode;
> > +	insns[0] = opc1;
> > +	if (opc2) {
> > +		addrs[1] = addr + 1;
> > +		insns[1] = opc2;
> > +	}
> >  
> > -	return aarch64_insn_patch_text(addrs, insns, 1);
> > +	return aarch64_insn_patch_text(addrs, insns, opc2 ? 2 : 1);
> >  }
> >  
> >  static void __kprobes arch_prepare_ss_slot(struct kprobe *p)
> >  {
> >  	/* prepare insn slot */
> > -	patch_text(p->ainsn.api.insn, p->opcode);
> > +	patch_text(p->ainsn.api.insn, p->opcode, BRK64_OPCODE_KPROBES_SS);
> >  
> >  	flush_icache_range((uintptr_t) (p->ainsn.api.insn),
> >  			   (uintptr_t) (p->ainsn.api.insn) +
> > @@ -128,13 +132,13 @@ void *alloc_insn_page(void)
> >  /* arm kprobe: install breakpoint in text */
> >  void __kprobes arch_arm_kprobe(struct kprobe *p)
> >  {
> > -	patch_text(p->addr, BRK64_OPCODE_KPROBES);
> > +	patch_text(p->addr, BRK64_OPCODE_KPROBES, 0);
> >  }
> >  
> >  /* disarm kprobe: remove breakpoint from text */
> >  void __kprobes arch_disarm_kprobe(struct kprobe *p)
> >  {
> > -	patch_text(p->addr, p->opcode);
> > +	patch_text(p->addr, p->opcode, 0);
> 
> I don't think patch_text() is offering an awful lot to these callers. Why
> don't we just call aarch64_insn_patch_text() directly, which I think will
> make the code easier to read too?

Agreed. I guess it's just because histrically used.

> >  void __kprobes arch_remove_kprobe(struct kprobe *p)
> > @@ -163,20 +167,14 @@ static void __kprobes set_current_kprobe(struct kprobe *p)
> >  }
> >  
> >  /*
> > - * Interrupts need to be disabled before single-step mode is set, and not
> > - * reenabled until after single-step mode ends.
> > - * Without disabling interrupt on local CPU, there is a chance of
> > - * interrupt occurrence in the period of exception return and  start of
> > - * out-of-line single-step, that result in wrongly single stepping
> > - * into the interrupt handler.
> > + * Keep interrupts disabled while executing the instruction out-of-line. Since
> > + * the kprobe state is per-CPU, we can't afford to be migrated.
> >   */
> >  static void __kprobes kprobes_save_local_irqflag(struct kprobe_ctlblk *kcb,
> >  						struct pt_regs *regs)
> >  {
> >  	kcb->saved_irqflag = regs->pstate & DAIF_MASK;
> >  	regs->pstate |= PSR_I_BIT;
> > -	/* Unmask PSTATE.D for enabling software step exceptions. */
> > -	regs->pstate &= ~PSR_D_BIT;
> 
> Can we set the D bit now? Would be one less thing to worry about.

Hmm, I think we just ignore D bit above. If someone (kgdb?) is set D flag
then there may be no reason to prohibit it on the trampoline.
(Of course they must handle this case)

Thank you,
Jean-Philippe Brucker Nov. 3, 2020, 9:13 a.m. UTC | #6
On Mon, Nov 02, 2020 at 10:52:55PM +0900, Masami Hiramatsu wrote:
> > > -	patch_text(p->addr, p->opcode);
> > > +	patch_text(p->addr, p->opcode, 0);
> > 
> > I don't think patch_text() is offering an awful lot to these callers. Why
> > don't we just call aarch64_insn_patch_text() directly, which I think will
> > make the code easier to read too?
> 
> Agreed. I guess it's just because histrically used.

Yes it's easy to remove, sending a v2

> 
> > >  void __kprobes arch_remove_kprobe(struct kprobe *p)
> > > @@ -163,20 +167,14 @@ static void __kprobes set_current_kprobe(struct kprobe *p)
> > >  }
> > >  
> > >  /*
> > > - * Interrupts need to be disabled before single-step mode is set, and not
> > > - * reenabled until after single-step mode ends.
> > > - * Without disabling interrupt on local CPU, there is a chance of
> > > - * interrupt occurrence in the period of exception return and  start of
> > > - * out-of-line single-step, that result in wrongly single stepping
> > > - * into the interrupt handler.
> > > + * Keep interrupts disabled while executing the instruction out-of-line. Since
> > > + * the kprobe state is per-CPU, we can't afford to be migrated.
> > >   */
> > >  static void __kprobes kprobes_save_local_irqflag(struct kprobe_ctlblk *kcb,
> > >  						struct pt_regs *regs)
> > >  {
> > >  	kcb->saved_irqflag = regs->pstate & DAIF_MASK;
> > >  	regs->pstate |= PSR_I_BIT;
> > > -	/* Unmask PSTATE.D for enabling software step exceptions. */
> > > -	regs->pstate &= ~PSR_D_BIT;
> > 
> > Can we set the D bit now? Would be one less thing to worry about.
> 
> Hmm, I think we just ignore D bit above. If someone (kgdb?) is set D flag
> then there may be no reason to prohibit it on the trampoline.
> (Of course they must handle this case)

Masking debug just for the trampoline would simplify reasoning about
nesting exceptions. Although it prevents from debugging kprobes using
KGDB, it seems reasonable to me because I don't think we can provide
guarantees that KGDB works reliably with kprobes. For example it doesn't
seem like a watchpoint would fire if the instruction performing the access
is simulated by kprobe.

Thanks,
Jean
Will Deacon Nov. 3, 2020, 9:23 a.m. UTC | #7
On Tue, Nov 03, 2020 at 10:13:05AM +0100, Jean-Philippe Brucker wrote:
> On Mon, Nov 02, 2020 at 10:52:55PM +0900, Masami Hiramatsu wrote:
> > > > -	patch_text(p->addr, p->opcode);
> > > > +	patch_text(p->addr, p->opcode, 0);
> > > 
> > > I don't think patch_text() is offering an awful lot to these callers. Why
> > > don't we just call aarch64_insn_patch_text() directly, which I think will
> > > make the code easier to read too?
> > 
> > Agreed. I guess it's just because histrically used.
> 
> Yes it's easy to remove, sending a v2
> 
> > 
> > > >  void __kprobes arch_remove_kprobe(struct kprobe *p)
> > > > @@ -163,20 +167,14 @@ static void __kprobes set_current_kprobe(struct kprobe *p)
> > > >  }
> > > >  
> > > >  /*
> > > > - * Interrupts need to be disabled before single-step mode is set, and not
> > > > - * reenabled until after single-step mode ends.
> > > > - * Without disabling interrupt on local CPU, there is a chance of
> > > > - * interrupt occurrence in the period of exception return and  start of
> > > > - * out-of-line single-step, that result in wrongly single stepping
> > > > - * into the interrupt handler.
> > > > + * Keep interrupts disabled while executing the instruction out-of-line. Since
> > > > + * the kprobe state is per-CPU, we can't afford to be migrated.
> > > >   */
> > > >  static void __kprobes kprobes_save_local_irqflag(struct kprobe_ctlblk *kcb,
> > > >  						struct pt_regs *regs)
> > > >  {
> > > >  	kcb->saved_irqflag = regs->pstate & DAIF_MASK;
> > > >  	regs->pstate |= PSR_I_BIT;
> > > > -	/* Unmask PSTATE.D for enabling software step exceptions. */
> > > > -	regs->pstate &= ~PSR_D_BIT;
> > > 
> > > Can we set the D bit now? Would be one less thing to worry about.
> > 
> > Hmm, I think we just ignore D bit above. If someone (kgdb?) is set D flag
> > then there may be no reason to prohibit it on the trampoline.
> > (Of course they must handle this case)
> 
> Masking debug just for the trampoline would simplify reasoning about
> nesting exceptions. Although it prevents from debugging kprobes using
> KGDB, it seems reasonable to me because I don't think we can provide
> guarantees that KGDB works reliably with kprobes. For example it doesn't
> seem like a watchpoint would fire if the instruction performing the access
> is simulated by kprobe.

Yes, let's just set all of DAIF during the trampoline. Also, while I've got
you, If you get a chance, I'd appreciate any feedback on my proposal for
reworking our debug exception handling altogether:

https://lore.kernel.org/r/20200626095551.GA9312@willie-the-truck

Will
Jean-Philippe Brucker Nov. 17, 2020, 5:28 p.m. UTC | #8
On Tue, Nov 03, 2020 at 09:23:16AM +0000, Will Deacon wrote:
> Yes, let's just set all of DAIF during the trampoline. Also, while I've got
> you, If you get a chance, I'd appreciate any feedback on my proposal for
> reworking our debug exception handling altogether:
> 
> https://lore.kernel.org/r/20200626095551.GA9312@willie-the-truck

Well, I stared at this for a while... It looks fine to me, but I don't
have a full picture of the trap infrastructure (not sure whether you were
asking me). I could help with testing if you get around to reworking it.

> On taking an interrupt from EL1, stash MDSCR_EL1.SS in a pcpu variable and
> clear the register bit if it was set. Then unmask only D and leave I set. On
> return from the exception, set D and restore MDSCR_EL1.SS. If we decide to
> reschedule, unmask D (i.e. we only step into interrupts if we need a
> reschedule. Alternatively, we could skip the reschedule if we were
> stepping.)

Any specific reason to treat reschedule differently, or just to keep
things simple?  I'm asking because that could be a problem with the
current code: when taking an interrupt while stepping EL1, we keep
MDSCR_EL1.SS set and unmask D before calling the IRQ handler. The step
exception might only be taken after the next context synchronization event
(on QEMU it happens at an isb in the timer handler). If the IRQ handler
doesn't happen to do any context synchronization and we reschedule, I
guess the step exception could happen after the next eret?

Thanks,
Jean
Masami Hiramatsu (Google) Nov. 24, 2020, 2:34 a.m. UTC | #9
On Tue, 17 Nov 2020 18:28:12 +0100
Jean-Philippe Brucker <jean-philippe@linaro.org> wrote:

> On Tue, Nov 03, 2020 at 09:23:16AM +0000, Will Deacon wrote:
> > Yes, let's just set all of DAIF during the trampoline. Also, while I've got
> > you, If you get a chance, I'd appreciate any feedback on my proposal for
> > reworking our debug exception handling altogether:
> > 
> > https://lore.kernel.org/r/20200626095551.GA9312@willie-the-truck
> 
> Well, I stared at this for a while... It looks fine to me, but I don't
> have a full picture of the trap infrastructure (not sure whether you were
> asking me). I could help with testing if you get around to reworking it.
> 
> > On taking an interrupt from EL1, stash MDSCR_EL1.SS in a pcpu variable and
> > clear the register bit if it was set. Then unmask only D and leave I set. On
> > return from the exception, set D and restore MDSCR_EL1.SS. If we decide to
> > reschedule, unmask D (i.e. we only step into interrupts if we need a
> > reschedule. Alternatively, we could skip the reschedule if we were
> > stepping.)
> 
> Any specific reason to treat reschedule differently, or just to keep
> things simple? I'm asking because that could be a problem with the
> current code: when taking an interrupt while stepping EL1, we keep
> MDSCR_EL1.SS set and unmask D before calling the IRQ handler. The step
> exception might only be taken after the next context synchronization event
> (on QEMU it happens at an isb in the timer handler). If the IRQ handler
> doesn't happen to do any context synchronization and we reschedule, I
> guess the step exception could happen after the next eret?

Will, would you have any comment on this?

I guess if SS on EL1 enabled with kprobes, this may happen (SS means SS exception happens)

[orig-code(SS)]->[1st BRK](D set)->[copied insn(!SS)]->[2nd BRK](D unmask)->[orig-code(SS)]

What if we unmask D flag (current code), this may happen

[orig-code(SS)]->[1st BRK](D unmask)->[copied insn(SS)]->[2nd BRK](D unmask)->[orig-code(SS)]

But as you pointed, this may make things complex. (maybe debugger also needs
to care about this case, because the single-stepped address is far from the
original code, they may not be able to find correct source line.)

So keeping D flag set in this phase is good to me, until we solve the debugger-side support.

Thank you,
diff mbox series

Patch

diff --git a/arch/arm64/include/asm/brk-imm.h b/arch/arm64/include/asm/brk-imm.h
index e3d47b52161d..ec7720dbe2c8 100644
--- a/arch/arm64/include/asm/brk-imm.h
+++ b/arch/arm64/include/asm/brk-imm.h
@@ -10,6 +10,7 @@ 
  * #imm16 values used for BRK instruction generation
  * 0x004: for installing kprobes
  * 0x005: for installing uprobes
+ * 0x006: for kprobe software single-step
  * Allowed values for kgdb are 0x400 - 0x7ff
  * 0x100: for triggering a fault on purpose (reserved)
  * 0x400: for dynamic BRK instruction
@@ -19,6 +20,7 @@ 
  */
 #define KPROBES_BRK_IMM			0x004
 #define UPROBES_BRK_IMM			0x005
+#define KPROBES_BRK_SS_IMM		0x006
 #define FAULT_BRK_IMM			0x100
 #define KGDB_DYN_DBG_BRK_IMM		0x400
 #define KGDB_COMPILED_DBG_BRK_IMM	0x401
diff --git a/arch/arm64/include/asm/debug-monitors.h b/arch/arm64/include/asm/debug-monitors.h
index 0b298f48f5bf..657c921fd784 100644
--- a/arch/arm64/include/asm/debug-monitors.h
+++ b/arch/arm64/include/asm/debug-monitors.h
@@ -53,6 +53,7 @@ 
 
 /* kprobes BRK opcodes with ESR encoding  */
 #define BRK64_OPCODE_KPROBES	(AARCH64_BREAK_MON | (KPROBES_BRK_IMM << 5))
+#define BRK64_OPCODE_KPROBES_SS	(AARCH64_BREAK_MON | (KPROBES_BRK_SS_IMM << 5))
 /* uprobes BRK opcodes with ESR encoding  */
 #define BRK64_OPCODE_UPROBES	(AARCH64_BREAK_MON | (UPROBES_BRK_IMM << 5))
 
diff --git a/arch/arm64/include/asm/kprobes.h b/arch/arm64/include/asm/kprobes.h
index 97e511d645a2..8699ce30f587 100644
--- a/arch/arm64/include/asm/kprobes.h
+++ b/arch/arm64/include/asm/kprobes.h
@@ -16,7 +16,7 @@ 
 #include <linux/percpu.h>
 
 #define __ARCH_WANT_KPROBES_INSN_SLOT
-#define MAX_INSN_SIZE			1
+#define MAX_INSN_SIZE			2
 
 #define flush_insn_slot(p)		do { } while (0)
 #define kretprobe_blacklist_size	0
diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c
index deba738142ed..ec1446ceacc9 100644
--- a/arch/arm64/kernel/probes/kprobes.c
+++ b/arch/arm64/kernel/probes/kprobes.c
@@ -36,21 +36,25 @@  DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
 static void __kprobes
 post_kprobe_handler(struct kprobe_ctlblk *, struct pt_regs *);
 
-static int __kprobes patch_text(kprobe_opcode_t *addr, u32 opcode)
+static int __kprobes patch_text(kprobe_opcode_t *addr, u32 opc1, u32 opc2)
 {
-	void *addrs[1];
-	u32 insns[1];
+	void *addrs[2];
+	u32 insns[2];
 
 	addrs[0] = addr;
-	insns[0] = opcode;
+	insns[0] = opc1;
+	if (opc2) {
+		addrs[1] = addr + 1;
+		insns[1] = opc2;
+	}
 
-	return aarch64_insn_patch_text(addrs, insns, 1);
+	return aarch64_insn_patch_text(addrs, insns, opc2 ? 2 : 1);
 }
 
 static void __kprobes arch_prepare_ss_slot(struct kprobe *p)
 {
 	/* prepare insn slot */
-	patch_text(p->ainsn.api.insn, p->opcode);
+	patch_text(p->ainsn.api.insn, p->opcode, BRK64_OPCODE_KPROBES_SS);
 
 	flush_icache_range((uintptr_t) (p->ainsn.api.insn),
 			   (uintptr_t) (p->ainsn.api.insn) +
@@ -128,13 +132,13 @@  void *alloc_insn_page(void)
 /* arm kprobe: install breakpoint in text */
 void __kprobes arch_arm_kprobe(struct kprobe *p)
 {
-	patch_text(p->addr, BRK64_OPCODE_KPROBES);
+	patch_text(p->addr, BRK64_OPCODE_KPROBES, 0);
 }
 
 /* disarm kprobe: remove breakpoint from text */
 void __kprobes arch_disarm_kprobe(struct kprobe *p)
 {
-	patch_text(p->addr, p->opcode);
+	patch_text(p->addr, p->opcode, 0);
 }
 
 void __kprobes arch_remove_kprobe(struct kprobe *p)
@@ -163,20 +167,14 @@  static void __kprobes set_current_kprobe(struct kprobe *p)
 }
 
 /*
- * Interrupts need to be disabled before single-step mode is set, and not
- * reenabled until after single-step mode ends.
- * Without disabling interrupt on local CPU, there is a chance of
- * interrupt occurrence in the period of exception return and  start of
- * out-of-line single-step, that result in wrongly single stepping
- * into the interrupt handler.
+ * Keep interrupts disabled while executing the instruction out-of-line. Since
+ * the kprobe state is per-CPU, we can't afford to be migrated.
  */
 static void __kprobes kprobes_save_local_irqflag(struct kprobe_ctlblk *kcb,
 						struct pt_regs *regs)
 {
 	kcb->saved_irqflag = regs->pstate & DAIF_MASK;
 	regs->pstate |= PSR_I_BIT;
-	/* Unmask PSTATE.D for enabling software step exceptions. */
-	regs->pstate &= ~PSR_D_BIT;
 }
 
 static void __kprobes kprobes_restore_local_irqflag(struct kprobe_ctlblk *kcb,
@@ -219,10 +217,7 @@  static void __kprobes setup_singlestep(struct kprobe *p,
 		slot = (unsigned long)p->ainsn.api.insn;
 
 		set_ss_context(kcb, slot);	/* mark pending ss */
-
-		/* IRQs and single stepping do not mix well. */
 		kprobes_save_local_irqflag(kcb, regs);
-		kernel_enable_single_step(regs);
 		instruction_pointer_set(regs, slot);
 	} else {
 		/* insn simulation */
@@ -273,12 +268,8 @@  post_kprobe_handler(struct kprobe_ctlblk *kcb, struct pt_regs *regs)
 	}
 	/* call post handler */
 	kcb->kprobe_status = KPROBE_HIT_SSDONE;
-	if (cur->post_handler)	{
-		/* post_handler can hit breakpoint and single step
-		 * again, so we enable D-flag for recursive exception.
-		 */
+	if (cur->post_handler)
 		cur->post_handler(cur, regs, 0);
-	}
 
 	reset_current_kprobe();
 }
@@ -302,8 +293,6 @@  int __kprobes kprobe_fault_handler(struct pt_regs *regs, unsigned int fsr)
 		if (!instruction_pointer(regs))
 			BUG();
 
-		kernel_disable_single_step();
-
 		if (kcb->kprobe_status == KPROBE_REENTER)
 			restore_previous_kprobe(kcb);
 		else
@@ -365,10 +354,6 @@  static void __kprobes kprobe_handler(struct pt_regs *regs)
 			 * pre-handler and it returned non-zero, it will
 			 * modify the execution path and no need to single
 			 * stepping. Let's just reset current kprobe and exit.
-			 *
-			 * pre_handler can hit a breakpoint and can step thru
-			 * before return, keep PSTATE D-flag enabled until
-			 * pre_handler return back.
 			 */
 			if (!p->pre_handler || !p->pre_handler(p, regs)) {
 				setup_singlestep(p, regs, kcb, 0);
@@ -399,7 +384,7 @@  kprobe_ss_hit(struct kprobe_ctlblk *kcb, unsigned long addr)
 }
 
 static int __kprobes
-kprobe_single_step_handler(struct pt_regs *regs, unsigned int esr)
+kprobe_breakpoint_ss_handler(struct pt_regs *regs, unsigned int esr)
 {
 	struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
 	int retval;
@@ -409,16 +394,15 @@  kprobe_single_step_handler(struct pt_regs *regs, unsigned int esr)
 
 	if (retval == DBG_HOOK_HANDLED) {
 		kprobes_restore_local_irqflag(kcb, regs);
-		kernel_disable_single_step();
-
 		post_kprobe_handler(kcb, regs);
 	}
 
 	return retval;
 }
 
-static struct step_hook kprobes_step_hook = {
-	.fn = kprobe_single_step_handler,
+static struct break_hook kprobes_break_ss_hook = {
+	.imm = KPROBES_BRK_SS_IMM,
+	.fn = kprobe_breakpoint_ss_handler,
 };
 
 static int __kprobes
@@ -486,7 +470,7 @@  int __kprobes arch_trampoline_kprobe(struct kprobe *p)
 int __init arch_init_kprobes(void)
 {
 	register_kernel_break_hook(&kprobes_break_hook);
-	register_kernel_step_hook(&kprobes_step_hook);
+	register_kernel_break_hook(&kprobes_break_ss_hook);
 
 	return 0;
 }