diff mbox series

[5/8] perf/hw_breakpoint: Remove useless code related to flexible breakpoints

Message ID 20220609113046.780504-6-elver@google.com (mailing list archive)
State New, archived
Headers show
Series perf/hw_breakpoint: Optimize for thousands of tasks | expand

Commit Message

Marco Elver June 9, 2022, 11:30 a.m. UTC
Flexible breakpoints have never been implemented, with
bp_cpuinfo::flexible always being 0. Unfortunately, they still occupy 4
bytes in each bp_cpuinfo and bp_busy_slots, as well as computing the max
flexible count in fetch_bp_busy_slots().

This again causes suboptimal code generation, when we always know that
`!!slots.flexible` will be 0.

Just get rid of the flexible "placeholder" and remove all real code
related to it. Make a note in the comment related to the constraints
algorithm but don't remove them from the algorithm, so that if in future
flexible breakpoints need supporting, it should be trivial to revive
them (along with reverting this change).

Signed-off-by: Marco Elver <elver@google.com>
---
 kernel/events/hw_breakpoint.c | 12 +++---------
 1 file changed, 3 insertions(+), 9 deletions(-)

Comments

Dmitry Vyukov June 9, 2022, 12:04 p.m. UTC | #1
On Thu, 9 Jun 2022 at 13:31, Marco Elver <elver@google.com> wrote:
>
> Flexible breakpoints have never been implemented, with
> bp_cpuinfo::flexible always being 0. Unfortunately, they still occupy 4
> bytes in each bp_cpuinfo and bp_busy_slots, as well as computing the max
> flexible count in fetch_bp_busy_slots().
>
> This again causes suboptimal code generation, when we always know that
> `!!slots.flexible` will be 0.
>
> Just get rid of the flexible "placeholder" and remove all real code
> related to it. Make a note in the comment related to the constraints
> algorithm but don't remove them from the algorithm, so that if in future
> flexible breakpoints need supporting, it should be trivial to revive
> them (along with reverting this change).
>
> Signed-off-by: Marco Elver <elver@google.com>

Was added in 2009.

Acked-by: Dmitry Vyukov <dvyukov@google.com>

> ---
>  kernel/events/hw_breakpoint.c | 12 +++---------
>  1 file changed, 3 insertions(+), 9 deletions(-)
>
> diff --git a/kernel/events/hw_breakpoint.c b/kernel/events/hw_breakpoint.c
> index 5f40c8dfa042..afe0a6007e96 100644
> --- a/kernel/events/hw_breakpoint.c
> +++ b/kernel/events/hw_breakpoint.c
> @@ -46,8 +46,6 @@ struct bp_cpuinfo {
>  #else
>         unsigned int    *tsk_pinned;
>  #endif
> -       /* Number of non-pinned cpu/task breakpoints in a cpu */
> -       unsigned int    flexible; /* XXX: placeholder, see fetch_this_slot() */
>  };
>
>  static DEFINE_PER_CPU(struct bp_cpuinfo, bp_cpuinfo[TYPE_MAX]);
> @@ -71,7 +69,6 @@ static bool constraints_initialized __ro_after_init;
>  /* Gather the number of total pinned and un-pinned bp in a cpuset */
>  struct bp_busy_slots {
>         unsigned int pinned;
> -       unsigned int flexible;
>  };
>
>  /* Serialize accesses to the above constraints */
> @@ -213,10 +210,6 @@ fetch_bp_busy_slots(struct bp_busy_slots *slots, struct perf_event *bp,
>
>                 if (nr > slots->pinned)
>                         slots->pinned = nr;
> -
> -               nr = info->flexible;
> -               if (nr > slots->flexible)
> -                       slots->flexible = nr;
>         }
>  }
>
> @@ -299,7 +292,8 @@ __weak void arch_unregister_hw_breakpoint(struct perf_event *bp)
>  }
>
>  /*
> - * Constraints to check before allowing this new breakpoint counter:
> + * Constraints to check before allowing this new breakpoint counter. Note that
> + * flexible breakpoints are currently unsupported -- see fetch_this_slot().
>   *
>   *  == Non-pinned counter == (Considered as pinned for now)
>   *
> @@ -366,7 +360,7 @@ static int __reserve_bp_slot(struct perf_event *bp, u64 bp_type)
>         fetch_this_slot(&slots, weight);
>
>         /* Flexible counters need to keep at least one slot */
> -       if (slots.pinned + (!!slots.flexible) > hw_breakpoint_slots_cached(type))
> +       if (slots.pinned > hw_breakpoint_slots_cached(type))
>                 return -ENOSPC;
>
>         ret = arch_reserve_bp_slot(bp);
> --
> 2.36.1.255.ge46751e96f-goog
>
Dmitry Vyukov June 9, 2022, 1:41 p.m. UTC | #2
On Thu, 9 Jun 2022 at 14:04, Dmitry Vyukov <dvyukov@google.com> wrote:
>
> On Thu, 9 Jun 2022 at 13:31, Marco Elver <elver@google.com> wrote:
> >
> > Flexible breakpoints have never been implemented, with
> > bp_cpuinfo::flexible always being 0. Unfortunately, they still occupy 4
> > bytes in each bp_cpuinfo and bp_busy_slots, as well as computing the max
> > flexible count in fetch_bp_busy_slots().
> >
> > This again causes suboptimal code generation, when we always know that
> > `!!slots.flexible` will be 0.
> >
> > Just get rid of the flexible "placeholder" and remove all real code
> > related to it. Make a note in the comment related to the constraints
> > algorithm but don't remove them from the algorithm, so that if in future
> > flexible breakpoints need supporting, it should be trivial to revive
> > them (along with reverting this change).
> >
> > Signed-off-by: Marco Elver <elver@google.com>
>
> Was added in 2009.
>
> Acked-by: Dmitry Vyukov <dvyukov@google.com>
>
> > ---
> >  kernel/events/hw_breakpoint.c | 12 +++---------
> >  1 file changed, 3 insertions(+), 9 deletions(-)
> >
> > diff --git a/kernel/events/hw_breakpoint.c b/kernel/events/hw_breakpoint.c
> > index 5f40c8dfa042..afe0a6007e96 100644
> > --- a/kernel/events/hw_breakpoint.c
> > +++ b/kernel/events/hw_breakpoint.c
> > @@ -46,8 +46,6 @@ struct bp_cpuinfo {
> >  #else
> >         unsigned int    *tsk_pinned;
> >  #endif
> > -       /* Number of non-pinned cpu/task breakpoints in a cpu */
> > -       unsigned int    flexible; /* XXX: placeholder, see fetch_this_slot() */
> >  };
> >
> >  static DEFINE_PER_CPU(struct bp_cpuinfo, bp_cpuinfo[TYPE_MAX]);
> > @@ -71,7 +69,6 @@ static bool constraints_initialized __ro_after_init;
> >  /* Gather the number of total pinned and un-pinned bp in a cpuset */
> >  struct bp_busy_slots {

Do we also want to remove this struct altogether? Now it becomes just
an int counter.

> >         unsigned int pinned;
> > -       unsigned int flexible;
> >  };
> >
> >  /* Serialize accesses to the above constraints */
> > @@ -213,10 +210,6 @@ fetch_bp_busy_slots(struct bp_busy_slots *slots, struct perf_event *bp,
> >
> >                 if (nr > slots->pinned)
> >                         slots->pinned = nr;
> > -
> > -               nr = info->flexible;
> > -               if (nr > slots->flexible)
> > -                       slots->flexible = nr;
> >         }
> >  }
> >
> > @@ -299,7 +292,8 @@ __weak void arch_unregister_hw_breakpoint(struct perf_event *bp)
> >  }
> >
> >  /*
> > - * Constraints to check before allowing this new breakpoint counter:
> > + * Constraints to check before allowing this new breakpoint counter. Note that
> > + * flexible breakpoints are currently unsupported -- see fetch_this_slot().
> >   *
> >   *  == Non-pinned counter == (Considered as pinned for now)
> >   *
> > @@ -366,7 +360,7 @@ static int __reserve_bp_slot(struct perf_event *bp, u64 bp_type)
> >         fetch_this_slot(&slots, weight);
> >
> >         /* Flexible counters need to keep at least one slot */
> > -       if (slots.pinned + (!!slots.flexible) > hw_breakpoint_slots_cached(type))
> > +       if (slots.pinned > hw_breakpoint_slots_cached(type))
> >                 return -ENOSPC;
> >
> >         ret = arch_reserve_bp_slot(bp);
> > --
> > 2.36.1.255.ge46751e96f-goog
> >
Marco Elver June 9, 2022, 2 p.m. UTC | #3
On Thu, 9 Jun 2022 at 15:41, Dmitry Vyukov <dvyukov@google.com> wrote:
>
> On Thu, 9 Jun 2022 at 14:04, Dmitry Vyukov <dvyukov@google.com> wrote:
> >
> > On Thu, 9 Jun 2022 at 13:31, Marco Elver <elver@google.com> wrote:
> > >
> > > Flexible breakpoints have never been implemented, with
> > > bp_cpuinfo::flexible always being 0. Unfortunately, they still occupy 4
> > > bytes in each bp_cpuinfo and bp_busy_slots, as well as computing the max
> > > flexible count in fetch_bp_busy_slots().
> > >
> > > This again causes suboptimal code generation, when we always know that
> > > `!!slots.flexible` will be 0.
> > >
> > > Just get rid of the flexible "placeholder" and remove all real code
> > > related to it. Make a note in the comment related to the constraints
> > > algorithm but don't remove them from the algorithm, so that if in future
> > > flexible breakpoints need supporting, it should be trivial to revive
> > > them (along with reverting this change).
> > >
> > > Signed-off-by: Marco Elver <elver@google.com>
> >
> > Was added in 2009.
> >
> > Acked-by: Dmitry Vyukov <dvyukov@google.com>
> >
> > > ---
> > >  kernel/events/hw_breakpoint.c | 12 +++---------
> > >  1 file changed, 3 insertions(+), 9 deletions(-)
> > >
> > > diff --git a/kernel/events/hw_breakpoint.c b/kernel/events/hw_breakpoint.c
> > > index 5f40c8dfa042..afe0a6007e96 100644
> > > --- a/kernel/events/hw_breakpoint.c
> > > +++ b/kernel/events/hw_breakpoint.c
> > > @@ -46,8 +46,6 @@ struct bp_cpuinfo {
> > >  #else
> > >         unsigned int    *tsk_pinned;
> > >  #endif
> > > -       /* Number of non-pinned cpu/task breakpoints in a cpu */
> > > -       unsigned int    flexible; /* XXX: placeholder, see fetch_this_slot() */
> > >  };
> > >
> > >  static DEFINE_PER_CPU(struct bp_cpuinfo, bp_cpuinfo[TYPE_MAX]);
> > > @@ -71,7 +69,6 @@ static bool constraints_initialized __ro_after_init;
> > >  /* Gather the number of total pinned and un-pinned bp in a cpuset */
> > >  struct bp_busy_slots {
>
> Do we also want to remove this struct altogether? Now it becomes just
> an int counter.

Yes, that actually can simplify a bunch of things, including
fetch_bp_busy_slots() just returning an int and fetch_this_slot() can
be removed (it'll be even cleaner if we remove the overridable
weight).

I'll simplify unless I hear objections.
diff mbox series

Patch

diff --git a/kernel/events/hw_breakpoint.c b/kernel/events/hw_breakpoint.c
index 5f40c8dfa042..afe0a6007e96 100644
--- a/kernel/events/hw_breakpoint.c
+++ b/kernel/events/hw_breakpoint.c
@@ -46,8 +46,6 @@  struct bp_cpuinfo {
 #else
 	unsigned int	*tsk_pinned;
 #endif
-	/* Number of non-pinned cpu/task breakpoints in a cpu */
-	unsigned int	flexible; /* XXX: placeholder, see fetch_this_slot() */
 };
 
 static DEFINE_PER_CPU(struct bp_cpuinfo, bp_cpuinfo[TYPE_MAX]);
@@ -71,7 +69,6 @@  static bool constraints_initialized __ro_after_init;
 /* Gather the number of total pinned and un-pinned bp in a cpuset */
 struct bp_busy_slots {
 	unsigned int pinned;
-	unsigned int flexible;
 };
 
 /* Serialize accesses to the above constraints */
@@ -213,10 +210,6 @@  fetch_bp_busy_slots(struct bp_busy_slots *slots, struct perf_event *bp,
 
 		if (nr > slots->pinned)
 			slots->pinned = nr;
-
-		nr = info->flexible;
-		if (nr > slots->flexible)
-			slots->flexible = nr;
 	}
 }
 
@@ -299,7 +292,8 @@  __weak void arch_unregister_hw_breakpoint(struct perf_event *bp)
 }
 
 /*
- * Constraints to check before allowing this new breakpoint counter:
+ * Constraints to check before allowing this new breakpoint counter. Note that
+ * flexible breakpoints are currently unsupported -- see fetch_this_slot().
  *
  *  == Non-pinned counter == (Considered as pinned for now)
  *
@@ -366,7 +360,7 @@  static int __reserve_bp_slot(struct perf_event *bp, u64 bp_type)
 	fetch_this_slot(&slots, weight);
 
 	/* Flexible counters need to keep at least one slot */
-	if (slots.pinned + (!!slots.flexible) > hw_breakpoint_slots_cached(type))
+	if (slots.pinned > hw_breakpoint_slots_cached(type))
 		return -ENOSPC;
 
 	ret = arch_reserve_bp_slot(bp);