ARM: hw_breakpoint: Handle inexact watchpoint addresses
diff mbox series

Message ID 20191019111216.1.I82eae759ca6dc28a245b043f485ca490e3015321@changeid
State New
Headers show
Series
  • ARM: hw_breakpoint: Handle inexact watchpoint addresses
Related show

Commit Message

Doug Anderson Oct. 19, 2019, 6:12 p.m. UTC
This is commit fdfeff0f9e3d ("arm64: hw_breakpoint: Handle inexact
watchpoint addresses") but ported to arm32, which has the same
problem.

This problem was found by Android CTS tests, notably the
"watchpoint_imprecise" test [1].  I tested locally against a copycat
(simplified) version of the test though.

[1] https://android.googlesource.com/platform/bionic/+/master/tests/sys_ptrace_test.cpp

Signed-off-by: Douglas Anderson <dianders@chromium.org>
---

 arch/arm/kernel/hw_breakpoint.c | 96 ++++++++++++++++++++++++---------
 1 file changed, 70 insertions(+), 26 deletions(-)

Comments

Matthias Kaehlcke Oct. 21, 2019, 6:46 p.m. UTC | #1
On Sat, Oct 19, 2019 at 11:12:26AM -0700, Douglas Anderson wrote:
> This is commit fdfeff0f9e3d ("arm64: hw_breakpoint: Handle inexact
> watchpoint addresses") but ported to arm32, which has the same
> problem.
> 
> This problem was found by Android CTS tests, notably the
> "watchpoint_imprecise" test [1].  I tested locally against a copycat
> (simplified) version of the test though.
> 
> [1] https://android.googlesource.com/platform/bionic/+/master/tests/sys_ptrace_test.cpp
> 
> Signed-off-by: Douglas Anderson <dianders@chromium.org>
> ---
> 
>  arch/arm/kernel/hw_breakpoint.c | 96 ++++++++++++++++++++++++---------
>  1 file changed, 70 insertions(+), 26 deletions(-)
> 
> diff --git a/arch/arm/kernel/hw_breakpoint.c b/arch/arm/kernel/hw_breakpoint.c
> index b0c195e3a06d..d394878409db 100644
> --- a/arch/arm/kernel/hw_breakpoint.c
> +++ b/arch/arm/kernel/hw_breakpoint.c
> @@ -680,26 +680,62 @@ static void disable_single_step(struct perf_event *bp)
>  	arch_install_hw_breakpoint(bp);
>  }
>  
> +/*
> + * Arm32 hardware does not always report a watchpoint hit address that matches
> + * one of the watchpoints set. It can also report an address "near" the
> + * watchpoint if a single instruction access both watched and unwatched
> + * addresses. There is no straight-forward way, short of disassembling the
> + * offending instruction, to map that address back to the watchpoint. This
> + * function computes the distance of the memory access from the watchpoint as a
> + * heuristic for the likelyhood that a given access triggered the watchpoint.
> + *
> + * See this same function in the arm64 platform code, which has the same
> + * problem.
> + *
> + * The function returns the distance of the address from the bytes watched by
> + * the watchpoint. In case of an exact match, it returns 0.
> + */
> +static u32 get_distance_from_watchpoint(unsigned long addr, u32 val,
> +					struct arch_hw_breakpoint_ctrl *ctrl)
> +{
> +	u32 wp_low, wp_high;
> +	u32 lens, lene;
> +
> +	lens = __ffs(ctrl->len);

Doesn't this always end up with 'lens == 0'? IIUC ctrl->len can have
the values ARM_BREAKPOINT_LEN_{1,2,4,8}:

#define ARM_BREAKPOINT_LEN_1	0x1
#define ARM_BREAKPOINT_LEN_2	0x3
#define ARM_BREAKPOINT_LEN_4	0xf
#define ARM_BREAKPOINT_LEN_8	0xff

> +	lene = __fls(ctrl->len);
> +
> +	wp_low = val + lens;
> +	wp_high = val + lene;

First I thought these values are off by one, but in difference to
ffs() from glibc the kernel functions start with index 0, instead
of using zero as 'no bit set'.

> +	if (addr < wp_low)
> +		return wp_low - addr;
> +	else if (addr > wp_high)
> +		return addr - wp_high;
> +	else
> +		return 0;
> +}
> +
>  static void watchpoint_handler(unsigned long addr, unsigned int fsr,
>  			       struct pt_regs *regs)
>  {
> -	int i, access;
> -	u32 val, ctrl_reg, alignment_mask;
> +	int i, access, closest_match = 0;
> +	u32 min_dist = -1, dist;
> +	u32 val, ctrl_reg;
>  	struct perf_event *wp, **slots;
>  	struct arch_hw_breakpoint *info;
>  	struct arch_hw_breakpoint_ctrl ctrl;
>  
>  	slots = this_cpu_ptr(wp_on_reg);
>  
> +	/*
> +	 * Find all watchpoints that match the reported address. If no exact
> +	 * match is found. Attribute the hit to the closest watchpoint.
> +	 */
> +	rcu_read_lock();
>  	for (i = 0; i < core_num_wrps; ++i) {
> -		rcu_read_lock();
> -
>  		wp = slots[i];
> -
>  		if (wp == NULL)
> -			goto unlock;
> +			continue;
>  
> -		info = counter_arch_bp(wp);
>  		/*
>  		 * The DFAR is an unknown value on debug architectures prior
>  		 * to 7.1. Since we only allow a single watchpoint on these
> @@ -708,33 +744,31 @@ static void watchpoint_handler(unsigned long addr, unsigned int fsr,
>  		 */
>  		if (debug_arch < ARM_DEBUG_ARCH_V7_1) {
>  			BUG_ON(i > 0);
> +			info = counter_arch_bp(wp);
>  			info->trigger = wp->attr.bp_addr;
>  		} else {
> -			if (info->ctrl.len == ARM_BREAKPOINT_LEN_8)
> -				alignment_mask = 0x7;
> -			else
> -				alignment_mask = 0x3;
> -
> -			/* Check if the watchpoint value matches. */
> -			val = read_wb_reg(ARM_BASE_WVR + i);
> -			if (val != (addr & ~alignment_mask))
> -				goto unlock;
> -
> -			/* Possible match, check the byte address select. */
> -			ctrl_reg = read_wb_reg(ARM_BASE_WCR + i);
> -			decode_ctrl_reg(ctrl_reg, &ctrl);
> -			if (!((1 << (addr & alignment_mask)) & ctrl.len))
> -				goto unlock;
> -
>  			/* Check that the access type matches. */
>  			if (debug_exception_updates_fsr()) {
>  				access = (fsr & ARM_FSR_ACCESS_MASK) ?
>  					  HW_BREAKPOINT_W : HW_BREAKPOINT_R;
>  				if (!(access & hw_breakpoint_type(wp)))
> -					goto unlock;
> +					continue;
>  			}
>  
> +			val = read_wb_reg(ARM_BASE_WVR + i);
> +			ctrl_reg = read_wb_reg(ARM_BASE_WCR + i);
> +			decode_ctrl_reg(ctrl_reg, &ctrl);
> +			dist = get_distance_from_watchpoint(addr, val, &ctrl);
> +			if (dist < min_dist) {
> +				min_dist = dist;
> +				closest_match = i;
> +			}
> +			/* Is this an exact match? */
> +			if (dist != 0)
> +				continue;
> +
>  			/* We have a winner. */
> +			info = counter_arch_bp(wp);
>  			info->trigger = addr;

Unless we care about using the 'last' watchpoint in case multiple WPs have
the same address I think it would be clearer to change the above to:

	       	       	if (dist == 0) {
				/* We have a winner. */
				info = counter_arch_bp(wp);
				info->trigger = addr;
				break;
			}

>  		}
>  
> @@ -748,10 +782,20 @@ static void watchpoint_handler(unsigned long addr, unsigned int fsr,
>  		 */
>  		if (is_default_overflow_handler(wp))
>  			enable_single_step(wp, instruction_pointer(regs));
> +	}
>  
> -unlock:
> -		rcu_read_unlock();
> +	if (min_dist > 0 && min_dist != -1) {
> +		/* No exact match found. */
> +		wp = slots[closest_match];
> +		info = counter_arch_bp(wp);
> +		info->trigger = addr;
> +		pr_debug("watchpoint fired: address = 0x%x\n", info->trigger);
> +		perf_bp_event(wp, regs);
> +		if (is_default_overflow_handler(wp))
> +			enable_single_step(wp, instruction_pointer(regs));
>  	}
> +
> +	rcu_read_unlock();
>  }
>  
>  static void watchpoint_single_step_handler(unsigned long pc)
> -- 
> 2.23.0.866.gb869b98d4c-goog
>
Doug Anderson Oct. 21, 2019, 11:49 p.m. UTC | #2
Hi,

On Mon, Oct 21, 2019 at 11:47 AM Matthias Kaehlcke <mka@chromium.org> wrote:
>
> On Sat, Oct 19, 2019 at 11:12:26AM -0700, Douglas Anderson wrote:
> > This is commit fdfeff0f9e3d ("arm64: hw_breakpoint: Handle inexact
> > watchpoint addresses") but ported to arm32, which has the same
> > problem.
> >
> > This problem was found by Android CTS tests, notably the
> > "watchpoint_imprecise" test [1].  I tested locally against a copycat
> > (simplified) version of the test though.
> >
> > [1] https://android.googlesource.com/platform/bionic/+/master/tests/sys_ptrace_test.cpp
> >
> > Signed-off-by: Douglas Anderson <dianders@chromium.org>
> > ---
> >
> >  arch/arm/kernel/hw_breakpoint.c | 96 ++++++++++++++++++++++++---------
> >  1 file changed, 70 insertions(+), 26 deletions(-)
> >
> > diff --git a/arch/arm/kernel/hw_breakpoint.c b/arch/arm/kernel/hw_breakpoint.c
> > index b0c195e3a06d..d394878409db 100644
> > --- a/arch/arm/kernel/hw_breakpoint.c
> > +++ b/arch/arm/kernel/hw_breakpoint.c
> > @@ -680,26 +680,62 @@ static void disable_single_step(struct perf_event *bp)
> >       arch_install_hw_breakpoint(bp);
> >  }
> >
> > +/*
> > + * Arm32 hardware does not always report a watchpoint hit address that matches
> > + * one of the watchpoints set. It can also report an address "near" the
> > + * watchpoint if a single instruction access both watched and unwatched
> > + * addresses. There is no straight-forward way, short of disassembling the
> > + * offending instruction, to map that address back to the watchpoint. This
> > + * function computes the distance of the memory access from the watchpoint as a
> > + * heuristic for the likelyhood that a given access triggered the watchpoint.
> > + *
> > + * See this same function in the arm64 platform code, which has the same
> > + * problem.
> > + *
> > + * The function returns the distance of the address from the bytes watched by
> > + * the watchpoint. In case of an exact match, it returns 0.
> > + */
> > +static u32 get_distance_from_watchpoint(unsigned long addr, u32 val,
> > +                                     struct arch_hw_breakpoint_ctrl *ctrl)
> > +{
> > +     u32 wp_low, wp_high;
> > +     u32 lens, lene;
> > +
> > +     lens = __ffs(ctrl->len);
>
> Doesn't this always end up with 'lens == 0'? IIUC ctrl->len can have
> the values ARM_BREAKPOINT_LEN_{1,2,4,8}:
>
> #define ARM_BREAKPOINT_LEN_1    0x1
> #define ARM_BREAKPOINT_LEN_2    0x3
> #define ARM_BREAKPOINT_LEN_4    0xf
> #define ARM_BREAKPOINT_LEN_8    0xff

Yes, but my best guess without digging into the ARM ARM is that the
underlying hardware is more flexible.  I don't think it hurts to
support the flexibility here even if the code creating the breakpoint
never creates one line that.  ...especially since it makes the arm32
and arm64 code match in this way.


> > +     lene = __fls(ctrl->len);
> > +
> > +     wp_low = val + lens;
> > +     wp_high = val + lene;
>
> First I thought these values are off by one, but in difference to
> ffs() from glibc the kernel functions start with index 0, instead
> of using zero as 'no bit set'.

Yes, this took me a while.  If you look at the original commit
fdfeff0f9e3d ("arm64: hw_breakpoint: Handle inexact watchpoint
addresses") this was clearly done on purpose though.  Specifically
note the part of the commit message:

[will: use __ffs instead of ffs - 1]


> > +     if (addr < wp_low)
> > +             return wp_low - addr;
> > +     else if (addr > wp_high)
> > +             return addr - wp_high;
> > +     else
> > +             return 0;
> > +}
> > +
> >  static void watchpoint_handler(unsigned long addr, unsigned int fsr,
> >                              struct pt_regs *regs)
> >  {
> > -     int i, access;
> > -     u32 val, ctrl_reg, alignment_mask;
> > +     int i, access, closest_match = 0;
> > +     u32 min_dist = -1, dist;
> > +     u32 val, ctrl_reg;
> >       struct perf_event *wp, **slots;
> >       struct arch_hw_breakpoint *info;
> >       struct arch_hw_breakpoint_ctrl ctrl;
> >
> >       slots = this_cpu_ptr(wp_on_reg);
> >
> > +     /*
> > +      * Find all watchpoints that match the reported address. If no exact
> > +      * match is found. Attribute the hit to the closest watchpoint.
> > +      */
> > +     rcu_read_lock();
> >       for (i = 0; i < core_num_wrps; ++i) {
> > -             rcu_read_lock();
> > -
> >               wp = slots[i];
> > -
> >               if (wp == NULL)
> > -                     goto unlock;
> > +                     continue;
> >
> > -             info = counter_arch_bp(wp);
> >               /*
> >                * The DFAR is an unknown value on debug architectures prior
> >                * to 7.1. Since we only allow a single watchpoint on these
> > @@ -708,33 +744,31 @@ static void watchpoint_handler(unsigned long addr, unsigned int fsr,
> >                */
> >               if (debug_arch < ARM_DEBUG_ARCH_V7_1) {
> >                       BUG_ON(i > 0);
> > +                     info = counter_arch_bp(wp);
> >                       info->trigger = wp->attr.bp_addr;
> >               } else {
> > -                     if (info->ctrl.len == ARM_BREAKPOINT_LEN_8)
> > -                             alignment_mask = 0x7;
> > -                     else
> > -                             alignment_mask = 0x3;
> > -
> > -                     /* Check if the watchpoint value matches. */
> > -                     val = read_wb_reg(ARM_BASE_WVR + i);
> > -                     if (val != (addr & ~alignment_mask))
> > -                             goto unlock;
> > -
> > -                     /* Possible match, check the byte address select. */
> > -                     ctrl_reg = read_wb_reg(ARM_BASE_WCR + i);
> > -                     decode_ctrl_reg(ctrl_reg, &ctrl);
> > -                     if (!((1 << (addr & alignment_mask)) & ctrl.len))
> > -                             goto unlock;
> > -
> >                       /* Check that the access type matches. */
> >                       if (debug_exception_updates_fsr()) {
> >                               access = (fsr & ARM_FSR_ACCESS_MASK) ?
> >                                         HW_BREAKPOINT_W : HW_BREAKPOINT_R;
> >                               if (!(access & hw_breakpoint_type(wp)))
> > -                                     goto unlock;
> > +                                     continue;
> >                       }
> >
> > +                     val = read_wb_reg(ARM_BASE_WVR + i);
> > +                     ctrl_reg = read_wb_reg(ARM_BASE_WCR + i);
> > +                     decode_ctrl_reg(ctrl_reg, &ctrl);
> > +                     dist = get_distance_from_watchpoint(addr, val, &ctrl);
> > +                     if (dist < min_dist) {
> > +                             min_dist = dist;
> > +                             closest_match = i;
> > +                     }
> > +                     /* Is this an exact match? */
> > +                     if (dist != 0)
> > +                             continue;
> > +
> >                       /* We have a winner. */
> > +                     info = counter_arch_bp(wp);
> >                       info->trigger = addr;
>
> Unless we care about using the 'last' watchpoint in case multiple WPs have
> the same address I think it would be clearer to change the above to:
>
>                         if (dist == 0) {
>                                 /* We have a winner. */
>                                 info = counter_arch_bp(wp);
>                                 info->trigger = addr;
>                                 break;
>                         }

Without being an expert on the Hardware Breakpoint API, my
understanding (based on how the old arm32 code worked and how the
existing arm64 code works) is that the API accounts for the fact that
more than one watchpoint can trigger and that we should report on all
of them.

Specifically if you do:

watch 1 byte at 0x1000
watch 1 byte at 0x1003

...and then someone does a single 4-byte write at 0x1000 then both
watchpoints should trigger.  If we do a "break" here then they won't
both trigger.  Also note that the triggering happens below in the
"perf_bp_event(wp, regs)" so with your break I think you'll miss it,
no?

That being said, with my patch we still won't do exactly the right
thing that for an "imprecise" watchpoint.  Specifically if you do:

watch 1 byte at 0x1008
watch 1 byte at 0x100b
write 16 bytes at 0x1000

...then we will _only_ trigger the 0x1008 watchpoint.  ...but that's
the limitation in how the breakpoints work.  You can see this is what
happens because the imprecise stuff is outside the for loop and only
triggers when nothing else did.


-Doug
Matthias Kaehlcke Oct. 22, 2019, 4:39 p.m. UTC | #3
On Mon, Oct 21, 2019 at 04:49:48PM -0700, Doug Anderson wrote:
> Hi,
> 
> On Mon, Oct 21, 2019 at 11:47 AM Matthias Kaehlcke <mka@chromium.org> wrote:
> >
> > On Sat, Oct 19, 2019 at 11:12:26AM -0700, Douglas Anderson wrote:
> > > This is commit fdfeff0f9e3d ("arm64: hw_breakpoint: Handle inexact
> > > watchpoint addresses") but ported to arm32, which has the same
> > > problem.
> > >
> > > This problem was found by Android CTS tests, notably the
> > > "watchpoint_imprecise" test [1].  I tested locally against a copycat
> > > (simplified) version of the test though.
> > >
> > > [1] https://android.googlesource.com/platform/bionic/+/master/tests/sys_ptrace_test.cpp
> > >
> > > Signed-off-by: Douglas Anderson <dianders@chromium.org>
> > > ---
> > >
> > >  arch/arm/kernel/hw_breakpoint.c | 96 ++++++++++++++++++++++++---------
> > >  1 file changed, 70 insertions(+), 26 deletions(-)
> > >
> > > diff --git a/arch/arm/kernel/hw_breakpoint.c b/arch/arm/kernel/hw_breakpoint.c
> > > index b0c195e3a06d..d394878409db 100644
> > > --- a/arch/arm/kernel/hw_breakpoint.c
> > > +++ b/arch/arm/kernel/hw_breakpoint.c
> > > @@ -680,26 +680,62 @@ static void disable_single_step(struct perf_event *bp)
> > >       arch_install_hw_breakpoint(bp);
> > >  }
> > >
> > > +/*
> > > + * Arm32 hardware does not always report a watchpoint hit address that matches
> > > + * one of the watchpoints set. It can also report an address "near" the
> > > + * watchpoint if a single instruction access both watched and unwatched
> > > + * addresses. There is no straight-forward way, short of disassembling the
> > > + * offending instruction, to map that address back to the watchpoint. This
> > > + * function computes the distance of the memory access from the watchpoint as a
> > > + * heuristic for the likelyhood that a given access triggered the watchpoint.
> > > + *
> > > + * See this same function in the arm64 platform code, which has the same
> > > + * problem.
> > > + *
> > > + * The function returns the distance of the address from the bytes watched by
> > > + * the watchpoint. In case of an exact match, it returns 0.
> > > + */
> > > +static u32 get_distance_from_watchpoint(unsigned long addr, u32 val,
> > > +                                     struct arch_hw_breakpoint_ctrl *ctrl)
> > > +{
> > > +     u32 wp_low, wp_high;
> > > +     u32 lens, lene;
> > > +
> > > +     lens = __ffs(ctrl->len);
> >
> > Doesn't this always end up with 'lens == 0'? IIUC ctrl->len can have
> > the values ARM_BREAKPOINT_LEN_{1,2,4,8}:
> >
> > #define ARM_BREAKPOINT_LEN_1    0x1
> > #define ARM_BREAKPOINT_LEN_2    0x3
> > #define ARM_BREAKPOINT_LEN_4    0xf
> > #define ARM_BREAKPOINT_LEN_8    0xff
> 
> Yes, but my best guess without digging into the ARM ARM is that the
> underlying hardware is more flexible.  I don't think it hurts to
> support the flexibility here even if the code creating the breakpoint
> never creates one line that.  ...especially since it makes the arm32
> and arm64 code match in this way.

ok

> > > +     lene = __fls(ctrl->len);
> > > +
> > > +     wp_low = val + lens;
> > > +     wp_high = val + lene;
> >
> > First I thought these values are off by one, but in difference to
> > ffs() from glibc the kernel functions start with index 0, instead
> > of using zero as 'no bit set'.
> 
> Yes, this took me a while.  If you look at the original commit
> fdfeff0f9e3d ("arm64: hw_breakpoint: Handle inexact watchpoint
> addresses") this was clearly done on purpose though.  Specifically
> note the part of the commit message:
> 
> [will: use __ffs instead of ffs - 1]
> 
> 
> > > +     if (addr < wp_low)
> > > +             return wp_low - addr;
> > > +     else if (addr > wp_high)
> > > +             return addr - wp_high;
> > > +     else
> > > +             return 0;
> > > +}
> > > +
> > >  static void watchpoint_handler(unsigned long addr, unsigned int fsr,
> > >                              struct pt_regs *regs)
> > >  {
> > > -     int i, access;
> > > -     u32 val, ctrl_reg, alignment_mask;
> > > +     int i, access, closest_match = 0;
> > > +     u32 min_dist = -1, dist;
> > > +     u32 val, ctrl_reg;
> > >       struct perf_event *wp, **slots;
> > >       struct arch_hw_breakpoint *info;
> > >       struct arch_hw_breakpoint_ctrl ctrl;
> > >
> > >       slots = this_cpu_ptr(wp_on_reg);
> > >
> > > +     /*
> > > +      * Find all watchpoints that match the reported address. If no exact
> > > +      * match is found. Attribute the hit to the closest watchpoint.
> > > +      */
> > > +     rcu_read_lock();
> > >       for (i = 0; i < core_num_wrps; ++i) {
> > > -             rcu_read_lock();
> > > -
> > >               wp = slots[i];
> > > -
> > >               if (wp == NULL)
> > > -                     goto unlock;
> > > +                     continue;
> > >
> > > -             info = counter_arch_bp(wp);
> > >               /*
> > >                * The DFAR is an unknown value on debug architectures prior
> > >                * to 7.1. Since we only allow a single watchpoint on these
> > > @@ -708,33 +744,31 @@ static void watchpoint_handler(unsigned long addr, unsigned int fsr,
> > >                */
> > >               if (debug_arch < ARM_DEBUG_ARCH_V7_1) {
> > >                       BUG_ON(i > 0);
> > > +                     info = counter_arch_bp(wp);
> > >                       info->trigger = wp->attr.bp_addr;
> > >               } else {
> > > -                     if (info->ctrl.len == ARM_BREAKPOINT_LEN_8)
> > > -                             alignment_mask = 0x7;
> > > -                     else
> > > -                             alignment_mask = 0x3;
> > > -
> > > -                     /* Check if the watchpoint value matches. */
> > > -                     val = read_wb_reg(ARM_BASE_WVR + i);
> > > -                     if (val != (addr & ~alignment_mask))
> > > -                             goto unlock;
> > > -
> > > -                     /* Possible match, check the byte address select. */
> > > -                     ctrl_reg = read_wb_reg(ARM_BASE_WCR + i);
> > > -                     decode_ctrl_reg(ctrl_reg, &ctrl);
> > > -                     if (!((1 << (addr & alignment_mask)) & ctrl.len))
> > > -                             goto unlock;
> > > -
> > >                       /* Check that the access type matches. */
> > >                       if (debug_exception_updates_fsr()) {
> > >                               access = (fsr & ARM_FSR_ACCESS_MASK) ?
> > >                                         HW_BREAKPOINT_W : HW_BREAKPOINT_R;
> > >                               if (!(access & hw_breakpoint_type(wp)))
> > > -                                     goto unlock;
> > > +                                     continue;
> > >                       }
> > >
> > > +                     val = read_wb_reg(ARM_BASE_WVR + i);
> > > +                     ctrl_reg = read_wb_reg(ARM_BASE_WCR + i);
> > > +                     decode_ctrl_reg(ctrl_reg, &ctrl);
> > > +                     dist = get_distance_from_watchpoint(addr, val, &ctrl);
> > > +                     if (dist < min_dist) {
> > > +                             min_dist = dist;
> > > +                             closest_match = i;
> > > +                     }
> > > +                     /* Is this an exact match? */
> > > +                     if (dist != 0)
> > > +                             continue;
> > > +
> > >                       /* We have a winner. */
> > > +                     info = counter_arch_bp(wp);
> > >                       info->trigger = addr;
> >
> > Unless we care about using the 'last' watchpoint in case multiple WPs have
> > the same address I think it would be clearer to change the above to:
> >
> >                         if (dist == 0) {
> >                                 /* We have a winner. */
> >                                 info = counter_arch_bp(wp);
> >                                 info->trigger = addr;
> >                                 break;
> >                         }
> 
> Without being an expert on the Hardware Breakpoint API, my
> understanding (based on how the old arm32 code worked and how the
> existing arm64 code works) is that the API accounts for the fact that
> more than one watchpoint can trigger and that we should report on all
> of them.
> 
> Specifically if you do:
> 
> watch 1 byte at 0x1000
> watch 1 byte at 0x1003
> 
> ...and then someone does a single 4-byte write at 0x1000 then both
> watchpoints should trigger.  If we do a "break" here then they won't
> both trigger.

Makes sense, thanks for the example!

> Also note that the triggering happens below in the "perf_bp_event(wp, regs)"
> so with your break I think you'll miss it, no?

You are right, I put that mentally after the loop, we definitely don't
want to skip it :)

> That being said, with my patch we still won't do exactly the right
> thing that for an "imprecise" watchpoint.  Specifically if you do:
> 
> watch 1 byte at 0x1008
> watch 1 byte at 0x100b
> write 16 bytes at 0x1000
> 
> ...then we will _only_ trigger the 0x1008 watchpoint.  ...but that's
> the limitation in how the breakpoints work.  You can see this is what
> happens because the imprecise stuff is outside the for loop and only
> triggers when nothing else did.

It's still an improvement from not triggering at all :)

Not that I'm an expert in this area, but the change looks good to me, so:

Reviewed-by: Matthias Kaehlcke <mka@chromium.org>

Patch
diff mbox series

diff --git a/arch/arm/kernel/hw_breakpoint.c b/arch/arm/kernel/hw_breakpoint.c
index b0c195e3a06d..d394878409db 100644
--- a/arch/arm/kernel/hw_breakpoint.c
+++ b/arch/arm/kernel/hw_breakpoint.c
@@ -680,26 +680,62 @@  static void disable_single_step(struct perf_event *bp)
 	arch_install_hw_breakpoint(bp);
 }
 
+/*
+ * Arm32 hardware does not always report a watchpoint hit address that matches
+ * one of the watchpoints set. It can also report an address "near" the
+ * watchpoint if a single instruction access both watched and unwatched
+ * addresses. There is no straight-forward way, short of disassembling the
+ * offending instruction, to map that address back to the watchpoint. This
+ * function computes the distance of the memory access from the watchpoint as a
+ * heuristic for the likelyhood that a given access triggered the watchpoint.
+ *
+ * See this same function in the arm64 platform code, which has the same
+ * problem.
+ *
+ * The function returns the distance of the address from the bytes watched by
+ * the watchpoint. In case of an exact match, it returns 0.
+ */
+static u32 get_distance_from_watchpoint(unsigned long addr, u32 val,
+					struct arch_hw_breakpoint_ctrl *ctrl)
+{
+	u32 wp_low, wp_high;
+	u32 lens, lene;
+
+	lens = __ffs(ctrl->len);
+	lene = __fls(ctrl->len);
+
+	wp_low = val + lens;
+	wp_high = val + lene;
+	if (addr < wp_low)
+		return wp_low - addr;
+	else if (addr > wp_high)
+		return addr - wp_high;
+	else
+		return 0;
+}
+
 static void watchpoint_handler(unsigned long addr, unsigned int fsr,
 			       struct pt_regs *regs)
 {
-	int i, access;
-	u32 val, ctrl_reg, alignment_mask;
+	int i, access, closest_match = 0;
+	u32 min_dist = -1, dist;
+	u32 val, ctrl_reg;
 	struct perf_event *wp, **slots;
 	struct arch_hw_breakpoint *info;
 	struct arch_hw_breakpoint_ctrl ctrl;
 
 	slots = this_cpu_ptr(wp_on_reg);
 
+	/*
+	 * Find all watchpoints that match the reported address. If no exact
+	 * match is found. Attribute the hit to the closest watchpoint.
+	 */
+	rcu_read_lock();
 	for (i = 0; i < core_num_wrps; ++i) {
-		rcu_read_lock();
-
 		wp = slots[i];
-
 		if (wp == NULL)
-			goto unlock;
+			continue;
 
-		info = counter_arch_bp(wp);
 		/*
 		 * The DFAR is an unknown value on debug architectures prior
 		 * to 7.1. Since we only allow a single watchpoint on these
@@ -708,33 +744,31 @@  static void watchpoint_handler(unsigned long addr, unsigned int fsr,
 		 */
 		if (debug_arch < ARM_DEBUG_ARCH_V7_1) {
 			BUG_ON(i > 0);
+			info = counter_arch_bp(wp);
 			info->trigger = wp->attr.bp_addr;
 		} else {
-			if (info->ctrl.len == ARM_BREAKPOINT_LEN_8)
-				alignment_mask = 0x7;
-			else
-				alignment_mask = 0x3;
-
-			/* Check if the watchpoint value matches. */
-			val = read_wb_reg(ARM_BASE_WVR + i);
-			if (val != (addr & ~alignment_mask))
-				goto unlock;
-
-			/* Possible match, check the byte address select. */
-			ctrl_reg = read_wb_reg(ARM_BASE_WCR + i);
-			decode_ctrl_reg(ctrl_reg, &ctrl);
-			if (!((1 << (addr & alignment_mask)) & ctrl.len))
-				goto unlock;
-
 			/* Check that the access type matches. */
 			if (debug_exception_updates_fsr()) {
 				access = (fsr & ARM_FSR_ACCESS_MASK) ?
 					  HW_BREAKPOINT_W : HW_BREAKPOINT_R;
 				if (!(access & hw_breakpoint_type(wp)))
-					goto unlock;
+					continue;
 			}
 
+			val = read_wb_reg(ARM_BASE_WVR + i);
+			ctrl_reg = read_wb_reg(ARM_BASE_WCR + i);
+			decode_ctrl_reg(ctrl_reg, &ctrl);
+			dist = get_distance_from_watchpoint(addr, val, &ctrl);
+			if (dist < min_dist) {
+				min_dist = dist;
+				closest_match = i;
+			}
+			/* Is this an exact match? */
+			if (dist != 0)
+				continue;
+
 			/* We have a winner. */
+			info = counter_arch_bp(wp);
 			info->trigger = addr;
 		}
 
@@ -748,10 +782,20 @@  static void watchpoint_handler(unsigned long addr, unsigned int fsr,
 		 */
 		if (is_default_overflow_handler(wp))
 			enable_single_step(wp, instruction_pointer(regs));
+	}
 
-unlock:
-		rcu_read_unlock();
+	if (min_dist > 0 && min_dist != -1) {
+		/* No exact match found. */
+		wp = slots[closest_match];
+		info = counter_arch_bp(wp);
+		info->trigger = addr;
+		pr_debug("watchpoint fired: address = 0x%x\n", info->trigger);
+		perf_bp_event(wp, regs);
+		if (is_default_overflow_handler(wp))
+			enable_single_step(wp, instruction_pointer(regs));
 	}
+
+	rcu_read_unlock();
 }
 
 static void watchpoint_single_step_handler(unsigned long pc)