diff mbox series

hw_breakpoint: fix single-stepping when using bpf_overflow_handler

Message ID 20220923203644.2731604-1-tnovak@fb.com (mailing list archive)
State New, archived
Delegated to: BPF
Headers show
Series hw_breakpoint: fix single-stepping when using bpf_overflow_handler | expand

Checks

Context Check Description
netdev/tree_selection success Not a local patch
bpf/vmtest-bpf-next-PR success PR summary
bpf/vmtest-bpf-next-VM_Test-2 success Logs for build for x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-3 success Logs for build for x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-1 success Logs for build for s390x with gcc
bpf/vmtest-bpf-next-VM_Test-15 success Logs for test_verifier on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-16 success Logs for test_verifier on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-17 success Logs for test_verifier on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-7 success Logs for test_maps on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-12 success Logs for test_progs_no_alu32 on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-13 success Logs for test_progs_no_alu32 on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-14 success Logs for test_progs_no_alu32 on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-6 success Logs for test_maps on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-8 success Logs for test_maps on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-10 success Logs for test_progs on x86_64 with gcc
bpf/vmtest-bpf-next-VM_Test-11 success Logs for test_progs on x86_64 with llvm-16
bpf/vmtest-bpf-next-VM_Test-9 success Logs for test_progs on s390x with gcc
bpf/vmtest-bpf-next-VM_Test-4 success Logs for llvm-toolchain
bpf/vmtest-bpf-next-VM_Test-5 success Logs for set-matrix

Commit Message

Tomislav Novak Sept. 23, 2022, 8:36 p.m. UTC
On ARM platforms is_default_overflow_handler() is used to determine if
hw_breakpoint code should single-step over the watchpoint trigger or
let the custom handler deal with it.

Attaching a BPF program to a watchpoint replaces the handler with
bpf_overflow_handler, which isn't recognized as a default handler so we
never step over the instruction triggering the data abort exception (the
watchpoint keeps firing):

  # bpftrace -e 'watchpoint:0x10000000:4:w { printf("hit\n"); }' ./wp_test
  Attaching 1 probe...
  hit
  hit
  hit
  [...]

(wp_test performs a single 4-byte store to address 0x10000000)

This patch replaces the check with uses_default_overflow_handler(), which
accounts for the bpf_overflow_handler() case by also testing if the handler
invokes one of the perf_event_output functions via orig_default_handler.

Signed-off-by: Tomislav Novak <tnovak@fb.com>
Tested-by: Samuel Gosselin <sgosselin@fb.com> # arm64
---
 arch/arm/kernel/hw_breakpoint.c   |  8 ++++----
 arch/arm64/kernel/hw_breakpoint.c |  4 ++--
 include/linux/perf_event.h        | 22 +++++++++++++++++++---
 3 files changed, 25 insertions(+), 9 deletions(-)

Comments

Tomislav Novak Oct. 12, 2022, 3:40 p.m. UTC | #1
On Fri, Sep 23, 2022 at 01:36:45PM -0700, Tomislav Novak wrote:
> On ARM platforms is_default_overflow_handler() is used to determine if
> hw_breakpoint code should single-step over the watchpoint trigger or
> let the custom handler deal with it.
> 
> Attaching a BPF program to a watchpoint replaces the handler with
> bpf_overflow_handler, which isn't recognized as a default handler so we
> never step over the instruction triggering the data abort exception (the
> watchpoint keeps firing):
> 
>   # bpftrace -e 'watchpoint:0x10000000:4:w { printf("hit\n"); }' ./wp_test
>   Attaching 1 probe...
>   hit
>   hit
>   hit
>   [...]
> 
> (wp_test performs a single 4-byte store to address 0x10000000)

Adding a few more people (per MAINTAINERS), specifically for arch/arm{,64}
(other targets don't have this issue AFAICT).

> This patch replaces the check with uses_default_overflow_handler(), which
> accounts for the bpf_overflow_handler() case by also testing if the handler
> invokes one of the perf_event_output functions via orig_default_handler.
> 
> Signed-off-by: Tomislav Novak <tnovak@fb.com>
> Tested-by: Samuel Gosselin <sgosselin@fb.com> # arm64
> ---
>  arch/arm/kernel/hw_breakpoint.c   |  8 ++++----
>  arch/arm64/kernel/hw_breakpoint.c |  4 ++--
>  include/linux/perf_event.h        | 22 +++++++++++++++++++---
>  3 files changed, 25 insertions(+), 9 deletions(-)
> 
> diff --git a/arch/arm/kernel/hw_breakpoint.c b/arch/arm/kernel/hw_breakpoint.c
> index 054e9199f30d..dc0fb7a81371 100644
> --- a/arch/arm/kernel/hw_breakpoint.c
> +++ b/arch/arm/kernel/hw_breakpoint.c
> @@ -626,7 +626,7 @@ int hw_breakpoint_arch_parse(struct perf_event *bp,
>  	hw->address &= ~alignment_mask;
>  	hw->ctrl.len <<= offset;
>  
> -	if (is_default_overflow_handler(bp)) {
> +	if (uses_default_overflow_handler(bp)) {
>  		/*
>  		 * Mismatch breakpoints are required for single-stepping
>  		 * breakpoints.
> @@ -798,7 +798,7 @@ static void watchpoint_handler(unsigned long addr, unsigned int fsr,
>  		 * Otherwise, insert a temporary mismatch breakpoint so that
>  		 * we can single-step over the watchpoint trigger.
>  		 */
> -		if (!is_default_overflow_handler(wp))
> +		if (!uses_default_overflow_handler(wp))
>  			continue;
>  step:
>  		enable_single_step(wp, instruction_pointer(regs));
> @@ -811,7 +811,7 @@ static void watchpoint_handler(unsigned long addr, unsigned int fsr,
>  		info->trigger = addr;
>  		pr_debug("watchpoint fired: address = 0x%x\n", info->trigger);
>  		perf_bp_event(wp, regs);
> -		if (is_default_overflow_handler(wp))
> +		if (uses_default_overflow_handler(wp))
>  			enable_single_step(wp, instruction_pointer(regs));
>  	}
>  
> @@ -886,7 +886,7 @@ static void breakpoint_handler(unsigned long unknown, struct pt_regs *regs)
>  			info->trigger = addr;
>  			pr_debug("breakpoint fired: address = 0x%x\n", addr);
>  			perf_bp_event(bp, regs);
> -			if (is_default_overflow_handler(bp))
> +			if (uses_default_overflow_handler(bp))
>  				enable_single_step(bp, addr);
>  			goto unlock;
>  		}
> diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c
> index b29a311bb055..9659a9555c63 100644
> --- a/arch/arm64/kernel/hw_breakpoint.c
> +++ b/arch/arm64/kernel/hw_breakpoint.c
> @@ -654,7 +654,7 @@ static int breakpoint_handler(unsigned long unused, unsigned long esr,
>  		perf_bp_event(bp, regs);
>  
>  		/* Do we need to handle the stepping? */
> -		if (is_default_overflow_handler(bp))
> +		if (uses_default_overflow_handler(bp))
>  			step = 1;
>  unlock:
>  		rcu_read_unlock();
> @@ -733,7 +733,7 @@ static u64 get_distance_from_watchpoint(unsigned long addr, u64 val,
>  static int watchpoint_report(struct perf_event *wp, unsigned long addr,
>  			     struct pt_regs *regs)
>  {
> -	int step = is_default_overflow_handler(wp);
> +	int step = uses_default_overflow_handler(wp);
>  	struct arch_hw_breakpoint *info = counter_arch_bp(wp);
>  
>  	info->trigger = addr;
> diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
> index ee8b9ecdc03b..f174b77437f5 100644
> --- a/include/linux/perf_event.h
> +++ b/include/linux/perf_event.h
> @@ -1105,15 +1105,31 @@ extern int perf_event_output(struct perf_event *event,
>  			     struct pt_regs *regs);
>  
>  static inline bool
> -is_default_overflow_handler(struct perf_event *event)
> +__is_default_overflow_handler(perf_overflow_handler_t overflow_handler)
>  {
> -	if (likely(event->overflow_handler == perf_event_output_forward))
> +	if (likely(overflow_handler == perf_event_output_forward))
>  		return true;
> -	if (unlikely(event->overflow_handler == perf_event_output_backward))
> +	if (unlikely(overflow_handler == perf_event_output_backward))
>  		return true;
>  	return false;
>  }
>  
> +#define is_default_overflow_handler(event) \
> +	__is_default_overflow_handler((event)->overflow_handler)
> +
> +#ifdef CONFIG_BPF_SYSCALL
> +static inline bool uses_default_overflow_handler(struct perf_event *event)
> +{
> +	if (likely(is_default_overflow_handler(event)))
> +		return true;
> +
> +	return __is_default_overflow_handler(event->orig_overflow_handler);
> +}
> +#else
> +#define uses_default_overflow_handler(event) \
> +	is_default_overflow_handler(event)
> +#endif
> +
>  extern void
>  perf_event_header__init_id(struct perf_event_header *header,
>  			   struct perf_sample_data *data,
> -- 
> 2.30.2
>
Catalin Marinas Nov. 15, 2022, 3:09 p.m. UTC | #2
On Fri, Sep 23, 2022 at 01:36:45PM -0700, Tomislav Novak wrote:
> On ARM platforms is_default_overflow_handler() is used to determine if
> hw_breakpoint code should single-step over the watchpoint trigger or
> let the custom handler deal with it.
> 
> Attaching a BPF program to a watchpoint replaces the handler with
> bpf_overflow_handler, which isn't recognized as a default handler so we
> never step over the instruction triggering the data abort exception (the
> watchpoint keeps firing):
> 
>   # bpftrace -e 'watchpoint:0x10000000:4:w { printf("hit\n"); }' ./wp_test
>   Attaching 1 probe...
>   hit
>   hit
>   hit
>   [...]
> 
> (wp_test performs a single 4-byte store to address 0x10000000)
> 
> This patch replaces the check with uses_default_overflow_handler(), which
> accounts for the bpf_overflow_handler() case by also testing if the handler
> invokes one of the perf_event_output functions via orig_default_handler.
> 
> Signed-off-by: Tomislav Novak <tnovak@fb.com>
> Tested-by: Samuel Gosselin <sgosselin@fb.com> # arm64
> ---
>  arch/arm/kernel/hw_breakpoint.c   |  8 ++++----
>  arch/arm64/kernel/hw_breakpoint.c |  4 ++--
>  include/linux/perf_event.h        | 22 +++++++++++++++++++---
>  3 files changed, 25 insertions(+), 9 deletions(-)

It looks like this slipped through the cracks. I'm fine with the patch
but could you split the arm and arm64 parts in separate patches? Unless
rmk acks it and we can take the patch through the arm64 (or perf) tree.
Tomislav Novak Nov. 28, 2022, 11:59 a.m. UTC | #3
On Tue, Nov 15, 2022 at 03:09:37PM +0000, Catalin Marinas wrote:
> > On ARM platforms is_default_overflow_handler() is used to determine if
> > hw_breakpoint code should single-step over the watchpoint trigger or
> > let the custom handler deal with it.
> > 
> > Attaching a BPF program to a watchpoint replaces the handler with
> > bpf_overflow_handler, which isn't recognized as a default handler so we
> > never step over the instruction triggering the data abort exception (the
> > watchpoint keeps firing):
> > 
> >   # bpftrace -e 'watchpoint:0x10000000:4:w { printf("hit\n"); }' ./wp_test
> >   Attaching 1 probe...
> >   hit
> >   hit
> >   hit
> >   [...]
> > 
> > (wp_test performs a single 4-byte store to address 0x10000000)
> > 
> > This patch replaces the check with uses_default_overflow_handler(), which
> > accounts for the bpf_overflow_handler() case by also testing if the handler
> > invokes one of the perf_event_output functions via orig_default_handler.
> > 
> > Signed-off-by: Tomislav Novak <tnovak@fb.com>
> > Tested-by: Samuel Gosselin <sgosselin@fb.com> # arm64
> > ---
> >  arch/arm/kernel/hw_breakpoint.c   |  8 ++++----
> >  arch/arm64/kernel/hw_breakpoint.c |  4 ++--
> >  include/linux/perf_event.h        | 22 +++++++++++++++++++---
> >  3 files changed, 25 insertions(+), 9 deletions(-)
> 
> It looks like this slipped through the cracks. I'm fine with the patch
> but could you split the arm and arm64 parts in separate patches? Unless
> rmk acks it and we can take the patch through the arm64 (or perf) tree.

Thanks for reviewing!

Given the changes in the arch-independent perf_event.h, I think merging it
as a single commit may be easiest (assuming rmk acks it).

Alternatively I could move arm changes into a separate patch, keeping arm64
and perf_event.h in this one (possibly splitting out the latter into its own
commit). One that's merged, the arm patch could be submitted to linux-arm.
What would you prefer?
Catalin Marinas Nov. 30, 2022, 10:51 a.m. UTC | #4
On Mon, Nov 28, 2022 at 11:59:37AM +0000, Tomislav Novak wrote:
> On Tue, Nov 15, 2022 at 03:09:37PM +0000, Catalin Marinas wrote:
> > > On ARM platforms is_default_overflow_handler() is used to determine if
> > > hw_breakpoint code should single-step over the watchpoint trigger or
> > > let the custom handler deal with it.
> > > 
> > > Attaching a BPF program to a watchpoint replaces the handler with
> > > bpf_overflow_handler, which isn't recognized as a default handler so we
> > > never step over the instruction triggering the data abort exception (the
> > > watchpoint keeps firing):
> > > 
> > >   # bpftrace -e 'watchpoint:0x10000000:4:w { printf("hit\n"); }' ./wp_test
> > >   Attaching 1 probe...
> > >   hit
> > >   hit
> > >   hit
> > >   [...]
> > > 
> > > (wp_test performs a single 4-byte store to address 0x10000000)
> > > 
> > > This patch replaces the check with uses_default_overflow_handler(), which
> > > accounts for the bpf_overflow_handler() case by also testing if the handler
> > > invokes one of the perf_event_output functions via orig_default_handler.
> > > 
> > > Signed-off-by: Tomislav Novak <tnovak@fb.com>
> > > Tested-by: Samuel Gosselin <sgosselin@fb.com> # arm64
> > > ---
> > >  arch/arm/kernel/hw_breakpoint.c   |  8 ++++----
> > >  arch/arm64/kernel/hw_breakpoint.c |  4 ++--
> > >  include/linux/perf_event.h        | 22 +++++++++++++++++++---
> > >  3 files changed, 25 insertions(+), 9 deletions(-)
> > 
> > It looks like this slipped through the cracks. I'm fine with the patch
> > but could you split the arm and arm64 parts in separate patches? Unless
> > rmk acks it and we can take the patch through the arm64 (or perf) tree.
> 
> Thanks for reviewing!
> 
> Given the changes in the arch-independent perf_event.h, I think merging it
> as a single commit may be easiest (assuming rmk acks it).
> 
> Alternatively I could move arm changes into a separate patch, keeping arm64
> and perf_event.h in this one (possibly splitting out the latter into its own
> commit). One that's merged, the arm patch could be submitted to linux-arm.
> What would you prefer?

Actually, arch/arm*/kernel/hw_breakpoint.c come under the ARM PMU
profiling, so no need to split the patch. It may need an ack from the
generic perf maintainers for include/linux/perf.h.

FWIW,

Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Tomislav Novak Nov. 30, 2022, 12:08 p.m. UTC | #5
On Wed, Nov 30, 2022 at 10:51:56AM +0000, Catalin Marinas wrote:
> On Mon, Nov 28, 2022 at 11:59:37AM +0000, Tomislav Novak wrote:
> > On Tue, Nov 15, 2022 at 03:09:37PM +0000, Catalin Marinas wrote:
> > > > On ARM platforms is_default_overflow_handler() is used to determine if
> > > > hw_breakpoint code should single-step over the watchpoint trigger or
> > > > let the custom handler deal with it.
> > > > 
> > > > Attaching a BPF program to a watchpoint replaces the handler with
> > > > bpf_overflow_handler, which isn't recognized as a default handler so we
> > > > never step over the instruction triggering the data abort exception (the
> > > > watchpoint keeps firing):
> > > > 
> > > >   # bpftrace -e 'watchpoint:0x10000000:4:w { printf("hit\n"); }' ./wp_test
> > > >   Attaching 1 probe...
> > > >   hit
> > > >   hit
> > > >   hit
> > > >   [...]
> > > > 
> > > > (wp_test performs a single 4-byte store to address 0x10000000)
> > > > 
> > > > This patch replaces the check with uses_default_overflow_handler(), which
> > > > accounts for the bpf_overflow_handler() case by also testing if the handler
> > > > invokes one of the perf_event_output functions via orig_default_handler.
> > > > 
> > > > Signed-off-by: Tomislav Novak <tnovak@fb.com>
> > > > Tested-by: Samuel Gosselin <sgosselin@fb.com> # arm64
> > > > ---
> > > >  arch/arm/kernel/hw_breakpoint.c   |  8 ++++----
> > > >  arch/arm64/kernel/hw_breakpoint.c |  4 ++--
> > > >  include/linux/perf_event.h        | 22 +++++++++++++++++++---
> > > >  3 files changed, 25 insertions(+), 9 deletions(-)
> > > 
> > > It looks like this slipped through the cracks. I'm fine with the patch
> > > but could you split the arm and arm64 parts in separate patches? Unless
> > > rmk acks it and we can take the patch through the arm64 (or perf) tree.
> > 
> > Thanks for reviewing!
> > 
> > Given the changes in the arch-independent perf_event.h, I think merging it
> > as a single commit may be easiest (assuming rmk acks it).
> > 
> > Alternatively I could move arm changes into a separate patch, keeping arm64
> > and perf_event.h in this one (possibly splitting out the latter into its own
> > commit). One that's merged, the arm patch could be submitted to linux-arm.
> > What would you prefer?
> 
> Actually, arch/arm*/kernel/hw_breakpoint.c come under the ARM PMU
> profiling, so no need to split the patch. It may need an ack from the
> generic perf maintainers for include/linux/perf.h.

Good point! I realized I've neglected to CC perf_event maintainers (sorry!),
doing so now.
diff mbox series

Patch

diff --git a/arch/arm/kernel/hw_breakpoint.c b/arch/arm/kernel/hw_breakpoint.c
index 054e9199f30d..dc0fb7a81371 100644
--- a/arch/arm/kernel/hw_breakpoint.c
+++ b/arch/arm/kernel/hw_breakpoint.c
@@ -626,7 +626,7 @@  int hw_breakpoint_arch_parse(struct perf_event *bp,
 	hw->address &= ~alignment_mask;
 	hw->ctrl.len <<= offset;
 
-	if (is_default_overflow_handler(bp)) {
+	if (uses_default_overflow_handler(bp)) {
 		/*
 		 * Mismatch breakpoints are required for single-stepping
 		 * breakpoints.
@@ -798,7 +798,7 @@  static void watchpoint_handler(unsigned long addr, unsigned int fsr,
 		 * Otherwise, insert a temporary mismatch breakpoint so that
 		 * we can single-step over the watchpoint trigger.
 		 */
-		if (!is_default_overflow_handler(wp))
+		if (!uses_default_overflow_handler(wp))
 			continue;
 step:
 		enable_single_step(wp, instruction_pointer(regs));
@@ -811,7 +811,7 @@  static void watchpoint_handler(unsigned long addr, unsigned int fsr,
 		info->trigger = addr;
 		pr_debug("watchpoint fired: address = 0x%x\n", info->trigger);
 		perf_bp_event(wp, regs);
-		if (is_default_overflow_handler(wp))
+		if (uses_default_overflow_handler(wp))
 			enable_single_step(wp, instruction_pointer(regs));
 	}
 
@@ -886,7 +886,7 @@  static void breakpoint_handler(unsigned long unknown, struct pt_regs *regs)
 			info->trigger = addr;
 			pr_debug("breakpoint fired: address = 0x%x\n", addr);
 			perf_bp_event(bp, regs);
-			if (is_default_overflow_handler(bp))
+			if (uses_default_overflow_handler(bp))
 				enable_single_step(bp, addr);
 			goto unlock;
 		}
diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c
index b29a311bb055..9659a9555c63 100644
--- a/arch/arm64/kernel/hw_breakpoint.c
+++ b/arch/arm64/kernel/hw_breakpoint.c
@@ -654,7 +654,7 @@  static int breakpoint_handler(unsigned long unused, unsigned long esr,
 		perf_bp_event(bp, regs);
 
 		/* Do we need to handle the stepping? */
-		if (is_default_overflow_handler(bp))
+		if (uses_default_overflow_handler(bp))
 			step = 1;
 unlock:
 		rcu_read_unlock();
@@ -733,7 +733,7 @@  static u64 get_distance_from_watchpoint(unsigned long addr, u64 val,
 static int watchpoint_report(struct perf_event *wp, unsigned long addr,
 			     struct pt_regs *regs)
 {
-	int step = is_default_overflow_handler(wp);
+	int step = uses_default_overflow_handler(wp);
 	struct arch_hw_breakpoint *info = counter_arch_bp(wp);
 
 	info->trigger = addr;
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index ee8b9ecdc03b..f174b77437f5 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -1105,15 +1105,31 @@  extern int perf_event_output(struct perf_event *event,
 			     struct pt_regs *regs);
 
 static inline bool
-is_default_overflow_handler(struct perf_event *event)
+__is_default_overflow_handler(perf_overflow_handler_t overflow_handler)
 {
-	if (likely(event->overflow_handler == perf_event_output_forward))
+	if (likely(overflow_handler == perf_event_output_forward))
 		return true;
-	if (unlikely(event->overflow_handler == perf_event_output_backward))
+	if (unlikely(overflow_handler == perf_event_output_backward))
 		return true;
 	return false;
 }
 
+#define is_default_overflow_handler(event) \
+	__is_default_overflow_handler((event)->overflow_handler)
+
+#ifdef CONFIG_BPF_SYSCALL
+static inline bool uses_default_overflow_handler(struct perf_event *event)
+{
+	if (likely(is_default_overflow_handler(event)))
+		return true;
+
+	return __is_default_overflow_handler(event->orig_overflow_handler);
+}
+#else
+#define uses_default_overflow_handler(event) \
+	is_default_overflow_handler(event)
+#endif
+
 extern void
 perf_event_header__init_id(struct perf_event_header *header,
 			   struct perf_sample_data *data,