diff mbox series

[07/34] perf/arm: optimize opencoded atomic find_bit() API

Message ID 20231118155105.25678-8-yury.norov@gmail.com (mailing list archive)
State New, archived
Headers show
Series None | expand

Commit Message

Yury Norov Nov. 18, 2023, 3:50 p.m. UTC
Switch subsystem to use atomic find_bit() or atomic iterators as
appropriate.

Signed-off-by: Yury Norov <yury.norov@gmail.com>
---
 drivers/perf/arm-cci.c        | 23 +++++------------------
 drivers/perf/arm-ccn.c        | 10 ++--------
 drivers/perf/arm_dmc620_pmu.c |  9 ++-------
 drivers/perf/arm_pmuv3.c      |  8 ++------
 4 files changed, 11 insertions(+), 39 deletions(-)

Comments

Will Deacon Nov. 21, 2023, 3:53 p.m. UTC | #1
On Sat, Nov 18, 2023 at 07:50:38AM -0800, Yury Norov wrote:
> Switch subsystem to use atomic find_bit() or atomic iterators as
> appropriate.
> 
> Signed-off-by: Yury Norov <yury.norov@gmail.com>
> ---
>  drivers/perf/arm-cci.c        | 23 +++++------------------
>  drivers/perf/arm-ccn.c        | 10 ++--------
>  drivers/perf/arm_dmc620_pmu.c |  9 ++-------
>  drivers/perf/arm_pmuv3.c      |  8 ++------
>  4 files changed, 11 insertions(+), 39 deletions(-)
> 
> diff --git a/drivers/perf/arm-cci.c b/drivers/perf/arm-cci.c
> index 61de861eaf91..70fbf9d09d37 100644
> --- a/drivers/perf/arm-cci.c
> +++ b/drivers/perf/arm-cci.c
> @@ -320,12 +320,8 @@ static int cci400_get_event_idx(struct cci_pmu *cci_pmu,
>  		return CCI400_PMU_CYCLE_CNTR_IDX;
>  	}
>  
> -	for (idx = CCI400_PMU_CNTR0_IDX; idx <= CCI_PMU_CNTR_LAST(cci_pmu); ++idx)
> -		if (!test_and_set_bit(idx, hw->used_mask))
> -			return idx;
> -
> -	/* No counters available */
> -	return -EAGAIN;
> +	idx = find_and_set_bit(hw->used_mask, CCI_PMU_CNTR_LAST(cci_pmu) + 1);

CCI400_PMU_CNTR0_IDX is defined as 1, so isn't this wrong?

[...]

> diff --git a/drivers/perf/arm_dmc620_pmu.c b/drivers/perf/arm_dmc620_pmu.c
> index 30cea6859574..e41c84dabc3e 100644
> --- a/drivers/perf/arm_dmc620_pmu.c
> +++ b/drivers/perf/arm_dmc620_pmu.c
> @@ -303,13 +303,8 @@ static int dmc620_get_event_idx(struct perf_event *event)
>  		end_idx = DMC620_PMU_MAX_COUNTERS;
>  	}
>  
> -	for (idx = start_idx; idx < end_idx; ++idx) {
> -		if (!test_and_set_bit(idx, dmc620_pmu->used_mask))
> -			return idx;
> -	}
> -
> -	/* The counters are all in use. */
> -	return -EAGAIN;
> +	idx = find_and_set_next_bit(dmc620_pmu->used_mask, end_idx, start_idx);

It might just be me, but I'd find this a tonne easier to read if you swapped
the last two arguments around so that the offset came before the limit in
the new function.

Will
Yury Norov Nov. 21, 2023, 4:16 p.m. UTC | #2
On Tue, Nov 21, 2023 at 03:53:44PM +0000, Will Deacon wrote:
> On Sat, Nov 18, 2023 at 07:50:38AM -0800, Yury Norov wrote:
> > Switch subsystem to use atomic find_bit() or atomic iterators as
> > appropriate.
> > 
> > Signed-off-by: Yury Norov <yury.norov@gmail.com>
> > ---
> >  drivers/perf/arm-cci.c        | 23 +++++------------------
> >  drivers/perf/arm-ccn.c        | 10 ++--------
> >  drivers/perf/arm_dmc620_pmu.c |  9 ++-------
> >  drivers/perf/arm_pmuv3.c      |  8 ++------
> >  4 files changed, 11 insertions(+), 39 deletions(-)
> > 
> > diff --git a/drivers/perf/arm-cci.c b/drivers/perf/arm-cci.c
> > index 61de861eaf91..70fbf9d09d37 100644
> > --- a/drivers/perf/arm-cci.c
> > +++ b/drivers/perf/arm-cci.c
> > @@ -320,12 +320,8 @@ static int cci400_get_event_idx(struct cci_pmu *cci_pmu,
> >  		return CCI400_PMU_CYCLE_CNTR_IDX;
> >  	}
> >  
> > -	for (idx = CCI400_PMU_CNTR0_IDX; idx <= CCI_PMU_CNTR_LAST(cci_pmu); ++idx)
> > -		if (!test_and_set_bit(idx, hw->used_mask))
> > -			return idx;
> > -
> > -	/* No counters available */
> > -	return -EAGAIN;
> > +	idx = find_and_set_bit(hw->used_mask, CCI_PMU_CNTR_LAST(cci_pmu) + 1);
> 
> CCI400_PMU_CNTR0_IDX is defined as 1, so isn't this wrong?

You're right. Will fix in v2
 
> [...]
> 
> > diff --git a/drivers/perf/arm_dmc620_pmu.c b/drivers/perf/arm_dmc620_pmu.c
> > index 30cea6859574..e41c84dabc3e 100644
> > --- a/drivers/perf/arm_dmc620_pmu.c
> > +++ b/drivers/perf/arm_dmc620_pmu.c
> > @@ -303,13 +303,8 @@ static int dmc620_get_event_idx(struct perf_event *event)
> >  		end_idx = DMC620_PMU_MAX_COUNTERS;
> >  	}
> >  
> > -	for (idx = start_idx; idx < end_idx; ++idx) {
> > -		if (!test_and_set_bit(idx, dmc620_pmu->used_mask))
> > -			return idx;
> > -	}
> > -
> > -	/* The counters are all in use. */
> > -	return -EAGAIN;
> > +	idx = find_and_set_next_bit(dmc620_pmu->used_mask, end_idx, start_idx);
> 
> It might just be me, but I'd find this a tonne easier to read if you swapped
> the last two arguments around so that the offset came before the limit in
> the new function.

I personally agree, but we already have find_next_*_bit(addr, nbits, offset)
functions, and having atomic versions of the same with different order
of arguments will make it even more messy...

Thanks,
        Yury
Will Deacon Nov. 21, 2023, 4:17 p.m. UTC | #3
On Tue, Nov 21, 2023 at 08:16:13AM -0800, Yury Norov wrote:
> On Tue, Nov 21, 2023 at 03:53:44PM +0000, Will Deacon wrote:
> > On Sat, Nov 18, 2023 at 07:50:38AM -0800, Yury Norov wrote:
> > > Switch subsystem to use atomic find_bit() or atomic iterators as
> > > appropriate.
> > > 
> > > Signed-off-by: Yury Norov <yury.norov@gmail.com>
> > > ---
> > >  drivers/perf/arm-cci.c        | 23 +++++------------------
> > >  drivers/perf/arm-ccn.c        | 10 ++--------
> > >  drivers/perf/arm_dmc620_pmu.c |  9 ++-------
> > >  drivers/perf/arm_pmuv3.c      |  8 ++------
> > >  4 files changed, 11 insertions(+), 39 deletions(-)
> > > 
> > > diff --git a/drivers/perf/arm-cci.c b/drivers/perf/arm-cci.c
> > > index 61de861eaf91..70fbf9d09d37 100644
> > > --- a/drivers/perf/arm-cci.c
> > > +++ b/drivers/perf/arm-cci.c
> > > @@ -320,12 +320,8 @@ static int cci400_get_event_idx(struct cci_pmu *cci_pmu,
> > >  		return CCI400_PMU_CYCLE_CNTR_IDX;
> > >  	}
> > >  
> > > -	for (idx = CCI400_PMU_CNTR0_IDX; idx <= CCI_PMU_CNTR_LAST(cci_pmu); ++idx)
> > > -		if (!test_and_set_bit(idx, hw->used_mask))
> > > -			return idx;
> > > -
> > > -	/* No counters available */
> > > -	return -EAGAIN;
> > > +	idx = find_and_set_bit(hw->used_mask, CCI_PMU_CNTR_LAST(cci_pmu) + 1);
> > 
> > CCI400_PMU_CNTR0_IDX is defined as 1, so isn't this wrong?
> 
> You're right. Will fix in v2
>  
> > [...]
> > 
> > > diff --git a/drivers/perf/arm_dmc620_pmu.c b/drivers/perf/arm_dmc620_pmu.c
> > > index 30cea6859574..e41c84dabc3e 100644
> > > --- a/drivers/perf/arm_dmc620_pmu.c
> > > +++ b/drivers/perf/arm_dmc620_pmu.c
> > > @@ -303,13 +303,8 @@ static int dmc620_get_event_idx(struct perf_event *event)
> > >  		end_idx = DMC620_PMU_MAX_COUNTERS;
> > >  	}
> > >  
> > > -	for (idx = start_idx; idx < end_idx; ++idx) {
> > > -		if (!test_and_set_bit(idx, dmc620_pmu->used_mask))
> > > -			return idx;
> > > -	}
> > > -
> > > -	/* The counters are all in use. */
> > > -	return -EAGAIN;
> > > +	idx = find_and_set_next_bit(dmc620_pmu->used_mask, end_idx, start_idx);
> > 
> > It might just be me, but I'd find this a tonne easier to read if you swapped
> > the last two arguments around so that the offset came before the limit in
> > the new function.
> 
> I personally agree, but we already have find_next_*_bit(addr, nbits, offset)
> functions, and having atomic versions of the same with different order
> of arguments will make it even more messy...

Urgh, and there's loads of them too :(

Will
diff mbox series

Patch

diff --git a/drivers/perf/arm-cci.c b/drivers/perf/arm-cci.c
index 61de861eaf91..70fbf9d09d37 100644
--- a/drivers/perf/arm-cci.c
+++ b/drivers/perf/arm-cci.c
@@ -320,12 +320,8 @@  static int cci400_get_event_idx(struct cci_pmu *cci_pmu,
 		return CCI400_PMU_CYCLE_CNTR_IDX;
 	}
 
-	for (idx = CCI400_PMU_CNTR0_IDX; idx <= CCI_PMU_CNTR_LAST(cci_pmu); ++idx)
-		if (!test_and_set_bit(idx, hw->used_mask))
-			return idx;
-
-	/* No counters available */
-	return -EAGAIN;
+	idx = find_and_set_bit(hw->used_mask, CCI_PMU_CNTR_LAST(cci_pmu) + 1);
+	return idx < CCI_PMU_CNTR_LAST(cci_pmu) + 1 ? idx : -EAGAIN;
 }
 
 static int cci400_validate_hw_event(struct cci_pmu *cci_pmu, unsigned long hw_event)
@@ -802,13 +798,8 @@  static int pmu_get_event_idx(struct cci_pmu_hw_events *hw, struct perf_event *ev
 	if (cci_pmu->model->get_event_idx)
 		return cci_pmu->model->get_event_idx(cci_pmu, hw, cci_event);
 
-	/* Generic code to find an unused idx from the mask */
-	for (idx = 0; idx <= CCI_PMU_CNTR_LAST(cci_pmu); idx++)
-		if (!test_and_set_bit(idx, hw->used_mask))
-			return idx;
-
-	/* No counters available */
-	return -EAGAIN;
+	idx = find_and_set_bit(hw->used_mask, CCI_PMU_CNTR_LAST(cci_pmu) + 1);
+	return idx < CCI_PMU_CNTR_LAST(cci_pmu) + 1 ? idx : -EAGAIN;
 }
 
 static int pmu_map_event(struct perf_event *event)
@@ -861,12 +852,8 @@  static void pmu_free_irq(struct cci_pmu *cci_pmu)
 {
 	int i;
 
-	for (i = 0; i < cci_pmu->nr_irqs; i++) {
-		if (!test_and_clear_bit(i, &cci_pmu->active_irqs))
-			continue;
-
+	for_each_test_and_clear_bit(i, &cci_pmu->active_irqs, cci_pmu->nr_irqs)
 		free_irq(cci_pmu->irqs[i], cci_pmu);
-	}
 }
 
 static u32 pmu_read_counter(struct perf_event *event)
diff --git a/drivers/perf/arm-ccn.c b/drivers/perf/arm-ccn.c
index 728d13d8e98a..d657701b1f23 100644
--- a/drivers/perf/arm-ccn.c
+++ b/drivers/perf/arm-ccn.c
@@ -589,15 +589,9 @@  static const struct attribute_group *arm_ccn_pmu_attr_groups[] = {
 
 static int arm_ccn_pmu_alloc_bit(unsigned long *bitmap, unsigned long size)
 {
-	int bit;
-
-	do {
-		bit = find_first_zero_bit(bitmap, size);
-		if (bit >= size)
-			return -EAGAIN;
-	} while (test_and_set_bit(bit, bitmap));
+	int bit = find_and_set_bit(bitmap, size);
 
-	return bit;
+	return bit < size ? bit : -EAGAIN;
 }
 
 /* All RN-I and RN-D nodes have identical PMUs */
diff --git a/drivers/perf/arm_dmc620_pmu.c b/drivers/perf/arm_dmc620_pmu.c
index 30cea6859574..e41c84dabc3e 100644
--- a/drivers/perf/arm_dmc620_pmu.c
+++ b/drivers/perf/arm_dmc620_pmu.c
@@ -303,13 +303,8 @@  static int dmc620_get_event_idx(struct perf_event *event)
 		end_idx = DMC620_PMU_MAX_COUNTERS;
 	}
 
-	for (idx = start_idx; idx < end_idx; ++idx) {
-		if (!test_and_set_bit(idx, dmc620_pmu->used_mask))
-			return idx;
-	}
-
-	/* The counters are all in use. */
-	return -EAGAIN;
+	idx = find_and_set_next_bit(dmc620_pmu->used_mask, end_idx, start_idx);
+	return idx < end_idx ? idx : -EAGAIN;
 }
 
 static inline
diff --git a/drivers/perf/arm_pmuv3.c b/drivers/perf/arm_pmuv3.c
index 18b91b56af1d..784b0383e9f8 100644
--- a/drivers/perf/arm_pmuv3.c
+++ b/drivers/perf/arm_pmuv3.c
@@ -825,13 +825,9 @@  static irqreturn_t armv8pmu_handle_irq(struct arm_pmu *cpu_pmu)
 static int armv8pmu_get_single_idx(struct pmu_hw_events *cpuc,
 				    struct arm_pmu *cpu_pmu)
 {
-	int idx;
+	int idx = find_and_set_next_bit(cpuc->used_mask, cpu_pmu->num_events, ARMV8_IDX_COUNTER0);
 
-	for (idx = ARMV8_IDX_COUNTER0; idx < cpu_pmu->num_events; idx++) {
-		if (!test_and_set_bit(idx, cpuc->used_mask))
-			return idx;
-	}
-	return -EAGAIN;
+	return idx < cpu_pmu->num_events ? idx : -EAGAIN;
 }
 
 static int armv8pmu_get_chain_idx(struct pmu_hw_events *cpuc,