diff mbox series

[2/2] drm/i915/pmu: Make PMU sample array two-dimensional

Message ID 20230523151918.4170499-3-ashutosh.dixit@intel.com (mailing list archive)
State New, archived
Headers show
Series drm/i915/pmu: couple of cleanups | expand

Commit Message

Dixit, Ashutosh May 23, 2023, 3:19 p.m. UTC
No functional changes but we can remove some unsightly index computation
and read/write functions if we convert the PMU sample array from a
one-dimensional to a two-dimensional array.

Suggested-by: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
---
 drivers/gpu/drm/i915/i915_pmu.c | 60 ++++++++++-----------------------
 drivers/gpu/drm/i915/i915_pmu.h |  2 +-
 2 files changed, 19 insertions(+), 43 deletions(-)

Comments

Andrzej Hajda May 24, 2023, 9:14 a.m. UTC | #1
On 23.05.2023 17:19, Ashutosh Dixit wrote:
> No functional changes but we can remove some unsightly index computation
> and read/write functions if we convert the PMU sample array from a
> one-dimensional to a two-dimensional array.
> 
> Suggested-by: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
> Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com>

Reviewed-by: Andrzej Hajda <andrzej.hajda@intel.com>

Regards
Andrzej

> ---
>   drivers/gpu/drm/i915/i915_pmu.c | 60 ++++++++++-----------------------
>   drivers/gpu/drm/i915/i915_pmu.h |  2 +-
>   2 files changed, 19 insertions(+), 43 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
> index b47d890d4ada1..137e0df9573ee 100644
> --- a/drivers/gpu/drm/i915/i915_pmu.c
> +++ b/drivers/gpu/drm/i915/i915_pmu.c
> @@ -195,33 +195,6 @@ static inline s64 ktime_since_raw(const ktime_t kt)
>   	return ktime_to_ns(ktime_sub(ktime_get_raw(), kt));
>   }
>   
> -static unsigned int
> -__sample_idx(struct i915_pmu *pmu, unsigned int gt_id, int sample)
> -{
> -	unsigned int idx = gt_id * __I915_NUM_PMU_SAMPLERS + sample;
> -
> -	GEM_BUG_ON(idx >= ARRAY_SIZE(pmu->sample));
> -
> -	return idx;
> -}
> -
> -static u64 read_sample(struct i915_pmu *pmu, unsigned int gt_id, int sample)
> -{
> -	return pmu->sample[__sample_idx(pmu, gt_id, sample)].cur;
> -}
> -
> -static void
> -store_sample(struct i915_pmu *pmu, unsigned int gt_id, int sample, u64 val)
> -{
> -	pmu->sample[__sample_idx(pmu, gt_id, sample)].cur = val;
> -}
> -
> -static void
> -add_sample_mult(struct i915_pmu *pmu, unsigned int gt_id, int sample, u32 val, u32 mul)
> -{
> -	pmu->sample[__sample_idx(pmu, gt_id, sample)].cur += mul_u32_u32(val, mul);
> -}
> -
>   static u64 get_rc6(struct intel_gt *gt)
>   {
>   	struct drm_i915_private *i915 = gt->i915;
> @@ -240,7 +213,7 @@ static u64 get_rc6(struct intel_gt *gt)
>   	spin_lock_irqsave(&pmu->lock, flags);
>   
>   	if (awake) {
> -		store_sample(pmu, gt_id, __I915_SAMPLE_RC6, val);
> +		pmu->sample[gt_id][__I915_SAMPLE_RC6].cur = val;
>   	} else {
>   		/*
>   		 * We think we are runtime suspended.
> @@ -250,13 +223,13 @@ static u64 get_rc6(struct intel_gt *gt)
>   		 * counter value.
>   		 */
>   		val = ktime_since_raw(pmu->sleep_last[gt_id]);
> -		val += read_sample(pmu, gt_id, __I915_SAMPLE_RC6);
> +		val += pmu->sample[gt_id][__I915_SAMPLE_RC6].cur;
>   	}
>   
> -	if (val < read_sample(pmu, gt_id, __I915_SAMPLE_RC6_LAST_REPORTED))
> -		val = read_sample(pmu, gt_id, __I915_SAMPLE_RC6_LAST_REPORTED);
> +	if (val < pmu->sample[gt_id][__I915_SAMPLE_RC6_LAST_REPORTED].cur)
> +		val = pmu->sample[gt_id][__I915_SAMPLE_RC6_LAST_REPORTED].cur;
>   	else
> -		store_sample(pmu, gt_id, __I915_SAMPLE_RC6_LAST_REPORTED, val);
> +		pmu->sample[gt_id][__I915_SAMPLE_RC6_LAST_REPORTED].cur = val;
>   
>   	spin_unlock_irqrestore(&pmu->lock, flags);
>   
> @@ -275,9 +248,8 @@ static void init_rc6(struct i915_pmu *pmu)
>   		with_intel_runtime_pm(gt->uncore->rpm, wakeref) {
>   			u64 val = __get_rc6(gt);
>   
> -			store_sample(pmu, i, __I915_SAMPLE_RC6, val);
> -			store_sample(pmu, i, __I915_SAMPLE_RC6_LAST_REPORTED,
> -				     val);
> +			pmu->sample[i][__I915_SAMPLE_RC6].cur = val;
> +			pmu->sample[i][__I915_SAMPLE_RC6_LAST_REPORTED].cur = val;
>   			pmu->sleep_last[i] = ktime_get_raw();
>   		}
>   	}
> @@ -287,7 +259,7 @@ static void park_rc6(struct intel_gt *gt)
>   {
>   	struct i915_pmu *pmu = &gt->i915->pmu;
>   
> -	store_sample(pmu, gt->info.id, __I915_SAMPLE_RC6, __get_rc6(gt));
> +	pmu->sample[gt->info.id][__I915_SAMPLE_RC6].cur = __get_rc6(gt);
>   	pmu->sleep_last[gt->info.id] = ktime_get_raw();
>   }
>   
> @@ -428,6 +400,12 @@ engines_sample(struct intel_gt *gt, unsigned int period_ns)
>   	}
>   }
>   
> +static void
> +add_sample_mult(struct i915_pmu_sample *sample, u32 val, u32 mul)
> +{
> +	sample->cur += mul_u32_u32(val, mul);
> +}
> +
>   static bool
>   frequency_sampling_enabled(struct i915_pmu *pmu, unsigned int gt)
>   {
> @@ -467,12 +445,12 @@ frequency_sample(struct intel_gt *gt, unsigned int period_ns)
>   		if (!val)
>   			val = intel_gpu_freq(rps, rps->cur_freq);
>   
> -		add_sample_mult(pmu, gt_id, __I915_SAMPLE_FREQ_ACT,
> +		add_sample_mult(&pmu->sample[gt_id][__I915_SAMPLE_FREQ_ACT],
>   				val, period_ns / 1000);
>   	}
>   
>   	if (pmu->enable & config_mask(__I915_PMU_REQUESTED_FREQUENCY(gt_id))) {
> -		add_sample_mult(pmu, gt_id, __I915_SAMPLE_FREQ_REQ,
> +		add_sample_mult(&pmu->sample[gt_id][__I915_SAMPLE_FREQ_REQ],
>   				intel_rps_get_requested_frequency(rps),
>   				period_ns / 1000);
>   	}
> @@ -673,14 +651,12 @@ static u64 __i915_pmu_event_read(struct perf_event *event)
>   		switch (config) {
>   		case I915_PMU_ACTUAL_FREQUENCY:
>   			val =
> -			   div_u64(read_sample(pmu, gt_id,
> -					       __I915_SAMPLE_FREQ_ACT),
> +			   div_u64(pmu->sample[gt_id][__I915_SAMPLE_FREQ_ACT].cur,
>   				   USEC_PER_SEC /* to MHz */);
>   			break;
>   		case I915_PMU_REQUESTED_FREQUENCY:
>   			val =
> -			   div_u64(read_sample(pmu, gt_id,
> -					       __I915_SAMPLE_FREQ_REQ),
> +			   div_u64(pmu->sample[gt_id][__I915_SAMPLE_FREQ_REQ].cur,
>   				   USEC_PER_SEC /* to MHz */);
>   			break;
>   		case I915_PMU_INTERRUPTS:
> diff --git a/drivers/gpu/drm/i915/i915_pmu.h b/drivers/gpu/drm/i915/i915_pmu.h
> index 33d80fbaab8bc..d20592e7db999 100644
> --- a/drivers/gpu/drm/i915/i915_pmu.h
> +++ b/drivers/gpu/drm/i915/i915_pmu.h
> @@ -127,7 +127,7 @@ struct i915_pmu {
>   	 * Only global counters are held here, while the per-engine ones are in
>   	 * struct intel_engine_cs.
>   	 */
> -	struct i915_pmu_sample sample[I915_PMU_MAX_GTS * __I915_NUM_PMU_SAMPLERS];
> +	struct i915_pmu_sample sample[I915_PMU_MAX_GTS][__I915_NUM_PMU_SAMPLERS];
>   	/**
>   	 * @sleep_last: Last time GT parked for RC6 estimation.
>   	 */
Tvrtko Ursulin May 24, 2023, 11:38 a.m. UTC | #2
On 23/05/2023 16:19, Ashutosh Dixit wrote:
> No functional changes but we can remove some unsightly index computation
> and read/write functions if we convert the PMU sample array from a
> one-dimensional to a two-dimensional array.
> 
> Suggested-by: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
> Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
> ---
>   drivers/gpu/drm/i915/i915_pmu.c | 60 ++++++++++-----------------------
>   drivers/gpu/drm/i915/i915_pmu.h |  2 +-
>   2 files changed, 19 insertions(+), 43 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
> index b47d890d4ada1..137e0df9573ee 100644
> --- a/drivers/gpu/drm/i915/i915_pmu.c
> +++ b/drivers/gpu/drm/i915/i915_pmu.c
> @@ -195,33 +195,6 @@ static inline s64 ktime_since_raw(const ktime_t kt)
>   	return ktime_to_ns(ktime_sub(ktime_get_raw(), kt));
>   }
>   
> -static unsigned int
> -__sample_idx(struct i915_pmu *pmu, unsigned int gt_id, int sample)
> -{
> -	unsigned int idx = gt_id * __I915_NUM_PMU_SAMPLERS + sample;
> -
> -	GEM_BUG_ON(idx >= ARRAY_SIZE(pmu->sample));
> -
> -	return idx;
> -}
> -
> -static u64 read_sample(struct i915_pmu *pmu, unsigned int gt_id, int sample)
> -{
> -	return pmu->sample[__sample_idx(pmu, gt_id, sample)].cur;
> -}
> -
> -static void
> -store_sample(struct i915_pmu *pmu, unsigned int gt_id, int sample, u64 val)
> -{
> -	pmu->sample[__sample_idx(pmu, gt_id, sample)].cur = val;
> -}
> -
> -static void
> -add_sample_mult(struct i915_pmu *pmu, unsigned int gt_id, int sample, u32 val, u32 mul)
> -{
> -	pmu->sample[__sample_idx(pmu, gt_id, sample)].cur += mul_u32_u32(val, mul);
> -}

IMO read and store helpers could have stayed and just changed the 
implementation. Like add_sample_mult which you just moved. I would have 
been a smaller patch. So dunno.. a bit of a reluctant r-b.

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Regards,

Tvrtko

> -
>   static u64 get_rc6(struct intel_gt *gt)
>   {
>   	struct drm_i915_private *i915 = gt->i915;
> @@ -240,7 +213,7 @@ static u64 get_rc6(struct intel_gt *gt)
>   	spin_lock_irqsave(&pmu->lock, flags);
>   
>   	if (awake) {
> -		store_sample(pmu, gt_id, __I915_SAMPLE_RC6, val);
> +		pmu->sample[gt_id][__I915_SAMPLE_RC6].cur = val;
>   	} else {
>   		/*
>   		 * We think we are runtime suspended.
> @@ -250,13 +223,13 @@ static u64 get_rc6(struct intel_gt *gt)
>   		 * counter value.
>   		 */
>   		val = ktime_since_raw(pmu->sleep_last[gt_id]);
> -		val += read_sample(pmu, gt_id, __I915_SAMPLE_RC6);
> +		val += pmu->sample[gt_id][__I915_SAMPLE_RC6].cur;
>   	}
>   
> -	if (val < read_sample(pmu, gt_id, __I915_SAMPLE_RC6_LAST_REPORTED))
> -		val = read_sample(pmu, gt_id, __I915_SAMPLE_RC6_LAST_REPORTED);
> +	if (val < pmu->sample[gt_id][__I915_SAMPLE_RC6_LAST_REPORTED].cur)
> +		val = pmu->sample[gt_id][__I915_SAMPLE_RC6_LAST_REPORTED].cur;
>   	else
> -		store_sample(pmu, gt_id, __I915_SAMPLE_RC6_LAST_REPORTED, val);
> +		pmu->sample[gt_id][__I915_SAMPLE_RC6_LAST_REPORTED].cur = val;
>   
>   	spin_unlock_irqrestore(&pmu->lock, flags);
>   
> @@ -275,9 +248,8 @@ static void init_rc6(struct i915_pmu *pmu)
>   		with_intel_runtime_pm(gt->uncore->rpm, wakeref) {
>   			u64 val = __get_rc6(gt);
>   
> -			store_sample(pmu, i, __I915_SAMPLE_RC6, val);
> -			store_sample(pmu, i, __I915_SAMPLE_RC6_LAST_REPORTED,
> -				     val);
> +			pmu->sample[i][__I915_SAMPLE_RC6].cur = val;
> +			pmu->sample[i][__I915_SAMPLE_RC6_LAST_REPORTED].cur = val;
>   			pmu->sleep_last[i] = ktime_get_raw();
>   		}
>   	}
> @@ -287,7 +259,7 @@ static void park_rc6(struct intel_gt *gt)
>   {
>   	struct i915_pmu *pmu = &gt->i915->pmu;
>   
> -	store_sample(pmu, gt->info.id, __I915_SAMPLE_RC6, __get_rc6(gt));
> +	pmu->sample[gt->info.id][__I915_SAMPLE_RC6].cur = __get_rc6(gt);
>   	pmu->sleep_last[gt->info.id] = ktime_get_raw();
>   }
>   
> @@ -428,6 +400,12 @@ engines_sample(struct intel_gt *gt, unsigned int period_ns)
>   	}
>   }
>   
> +static void
> +add_sample_mult(struct i915_pmu_sample *sample, u32 val, u32 mul)
> +{
> +	sample->cur += mul_u32_u32(val, mul);
> +}
> +
>   static bool
>   frequency_sampling_enabled(struct i915_pmu *pmu, unsigned int gt)
>   {
> @@ -467,12 +445,12 @@ frequency_sample(struct intel_gt *gt, unsigned int period_ns)
>   		if (!val)
>   			val = intel_gpu_freq(rps, rps->cur_freq);
>   
> -		add_sample_mult(pmu, gt_id, __I915_SAMPLE_FREQ_ACT,
> +		add_sample_mult(&pmu->sample[gt_id][__I915_SAMPLE_FREQ_ACT],
>   				val, period_ns / 1000);
>   	}
>   
>   	if (pmu->enable & config_mask(__I915_PMU_REQUESTED_FREQUENCY(gt_id))) {
> -		add_sample_mult(pmu, gt_id, __I915_SAMPLE_FREQ_REQ,
> +		add_sample_mult(&pmu->sample[gt_id][__I915_SAMPLE_FREQ_REQ],
>   				intel_rps_get_requested_frequency(rps),
>   				period_ns / 1000);
>   	}
> @@ -673,14 +651,12 @@ static u64 __i915_pmu_event_read(struct perf_event *event)
>   		switch (config) {
>   		case I915_PMU_ACTUAL_FREQUENCY:
>   			val =
> -			   div_u64(read_sample(pmu, gt_id,
> -					       __I915_SAMPLE_FREQ_ACT),
> +			   div_u64(pmu->sample[gt_id][__I915_SAMPLE_FREQ_ACT].cur,
>   				   USEC_PER_SEC /* to MHz */);
>   			break;
>   		case I915_PMU_REQUESTED_FREQUENCY:
>   			val =
> -			   div_u64(read_sample(pmu, gt_id,
> -					       __I915_SAMPLE_FREQ_REQ),
> +			   div_u64(pmu->sample[gt_id][__I915_SAMPLE_FREQ_REQ].cur,
>   				   USEC_PER_SEC /* to MHz */);
>   			break;
>   		case I915_PMU_INTERRUPTS:
> diff --git a/drivers/gpu/drm/i915/i915_pmu.h b/drivers/gpu/drm/i915/i915_pmu.h
> index 33d80fbaab8bc..d20592e7db999 100644
> --- a/drivers/gpu/drm/i915/i915_pmu.h
> +++ b/drivers/gpu/drm/i915/i915_pmu.h
> @@ -127,7 +127,7 @@ struct i915_pmu {
>   	 * Only global counters are held here, while the per-engine ones are in
>   	 * struct intel_engine_cs.
>   	 */
> -	struct i915_pmu_sample sample[I915_PMU_MAX_GTS * __I915_NUM_PMU_SAMPLERS];
> +	struct i915_pmu_sample sample[I915_PMU_MAX_GTS][__I915_NUM_PMU_SAMPLERS];
>   	/**
>   	 * @sleep_last: Last time GT parked for RC6 estimation.
>   	 */
Dixit, Ashutosh May 24, 2023, 5:38 p.m. UTC | #3
On Wed, 24 May 2023 04:38:18 -0700, Tvrtko Ursulin wrote:
>

Hi Tvrtko,

> On 23/05/2023 16:19, Ashutosh Dixit wrote:
> > No functional changes but we can remove some unsightly index computation
> > and read/write functions if we convert the PMU sample array from a
> > one-dimensional to a two-dimensional array.
> >
> > Suggested-by: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
> > Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
> > ---
> >   drivers/gpu/drm/i915/i915_pmu.c | 60 ++++++++++-----------------------
> >   drivers/gpu/drm/i915/i915_pmu.h |  2 +-
> >   2 files changed, 19 insertions(+), 43 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
> > index b47d890d4ada1..137e0df9573ee 100644
> > --- a/drivers/gpu/drm/i915/i915_pmu.c
> > +++ b/drivers/gpu/drm/i915/i915_pmu.c
> > @@ -195,33 +195,6 @@ static inline s64 ktime_since_raw(const ktime_t kt)
> >	return ktime_to_ns(ktime_sub(ktime_get_raw(), kt));
> >   }
> >   -static unsigned int
> > -__sample_idx(struct i915_pmu *pmu, unsigned int gt_id, int sample)
> > -{
> > -	unsigned int idx = gt_id * __I915_NUM_PMU_SAMPLERS + sample;
> > -
> > -	GEM_BUG_ON(idx >= ARRAY_SIZE(pmu->sample));
> > -
> > -	return idx;
> > -}
> > -
> > -static u64 read_sample(struct i915_pmu *pmu, unsigned int gt_id, int sample)
> > -{
> > -	return pmu->sample[__sample_idx(pmu, gt_id, sample)].cur;
> > -}
> > -
> > -static void
> > -store_sample(struct i915_pmu *pmu, unsigned int gt_id, int sample, u64 val)
> > -{
> > -	pmu->sample[__sample_idx(pmu, gt_id, sample)].cur = val;
> > -}
> > -
> > -static void
> > -add_sample_mult(struct i915_pmu *pmu, unsigned int gt_id, int sample, u32 val, u32 mul)
> > -{
> > -	pmu->sample[__sample_idx(pmu, gt_id, sample)].cur += mul_u32_u32(val, mul);
> > -}
>
> IMO read and store helpers could have stayed and just changed the
> implementation. Like add_sample_mult which you just moved. I would have
> been a smaller patch. So dunno.. a bit of a reluctant r-b.

Are you referring just to add_sample_mult or to all the other functions
too? add_sample_mult I moved it to where it was before bc4be0a38b63
("drm/i915/pmu: Prepare for multi-tile non-engine counters"), could have
left it here I guess.

The other read and store helpers are not needed with the 2-d array at all
since the compiler itself will do that, so I thought it was better to get
rid of them completely.

Let me know if you want any changes, otherwise I will leave as is.

> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Thanks for the review. Thanks Andrzej too :)
--
Ashutosh

> > -
> >   static u64 get_rc6(struct intel_gt *gt)
> >   {
> >	struct drm_i915_private *i915 = gt->i915;
> > @@ -240,7 +213,7 @@ static u64 get_rc6(struct intel_gt *gt)
> >	spin_lock_irqsave(&pmu->lock, flags);
> >		if (awake) {
> > -		store_sample(pmu, gt_id, __I915_SAMPLE_RC6, val);
> > +		pmu->sample[gt_id][__I915_SAMPLE_RC6].cur = val;
> >	} else {
> >		/*
> >		 * We think we are runtime suspended.
> > @@ -250,13 +223,13 @@ static u64 get_rc6(struct intel_gt *gt)
> >		 * counter value.
> >		 */
> >		val = ktime_since_raw(pmu->sleep_last[gt_id]);
> > -		val += read_sample(pmu, gt_id, __I915_SAMPLE_RC6);
> > +		val += pmu->sample[gt_id][__I915_SAMPLE_RC6].cur;
> >	}
> >   -	if (val < read_sample(pmu, gt_id, __I915_SAMPLE_RC6_LAST_REPORTED))
> > -		val = read_sample(pmu, gt_id, __I915_SAMPLE_RC6_LAST_REPORTED);
> > +	if (val < pmu->sample[gt_id][__I915_SAMPLE_RC6_LAST_REPORTED].cur)
> > +		val = pmu->sample[gt_id][__I915_SAMPLE_RC6_LAST_REPORTED].cur;
> >	else
> > -		store_sample(pmu, gt_id, __I915_SAMPLE_RC6_LAST_REPORTED, val);
> > +		pmu->sample[gt_id][__I915_SAMPLE_RC6_LAST_REPORTED].cur = val;
> >		spin_unlock_irqrestore(&pmu->lock, flags);
> >   @@ -275,9 +248,8 @@ static void init_rc6(struct i915_pmu *pmu)
> >		with_intel_runtime_pm(gt->uncore->rpm, wakeref) {
> >			u64 val = __get_rc6(gt);
> >   -			store_sample(pmu, i, __I915_SAMPLE_RC6, val);
> > -			store_sample(pmu, i, __I915_SAMPLE_RC6_LAST_REPORTED,
> > -				     val);
> > +			pmu->sample[i][__I915_SAMPLE_RC6].cur = val;
> > +			pmu->sample[i][__I915_SAMPLE_RC6_LAST_REPORTED].cur = val;
> >			pmu->sleep_last[i] = ktime_get_raw();
> >		}
> >	}
> > @@ -287,7 +259,7 @@ static void park_rc6(struct intel_gt *gt)
> >   {
> >	struct i915_pmu *pmu = &gt->i915->pmu;
> >   -	store_sample(pmu, gt->info.id, __I915_SAMPLE_RC6, __get_rc6(gt));
> > +	pmu->sample[gt->info.id][__I915_SAMPLE_RC6].cur = __get_rc6(gt);
> >	pmu->sleep_last[gt->info.id] = ktime_get_raw();
> >   }
> >   @@ -428,6 +400,12 @@ engines_sample(struct intel_gt *gt, unsigned int
> > period_ns)
> >	}
> >   }
> >   +static void
> > +add_sample_mult(struct i915_pmu_sample *sample, u32 val, u32 mul)
> > +{
> > +	sample->cur += mul_u32_u32(val, mul);
> > +}
> > +
> >   static bool
> >   frequency_sampling_enabled(struct i915_pmu *pmu, unsigned int gt)
> >   {
> > @@ -467,12 +445,12 @@ frequency_sample(struct intel_gt *gt, unsigned int period_ns)
> >		if (!val)
> >			val = intel_gpu_freq(rps, rps->cur_freq);
> >   -		add_sample_mult(pmu, gt_id, __I915_SAMPLE_FREQ_ACT,
> > +		add_sample_mult(&pmu->sample[gt_id][__I915_SAMPLE_FREQ_ACT],
> >				val, period_ns / 1000);
> >	}
> >		if (pmu->enable &
> > config_mask(__I915_PMU_REQUESTED_FREQUENCY(gt_id))) {
> > -		add_sample_mult(pmu, gt_id, __I915_SAMPLE_FREQ_REQ,
> > +		add_sample_mult(&pmu->sample[gt_id][__I915_SAMPLE_FREQ_REQ],
> >				intel_rps_get_requested_frequency(rps),
> >				period_ns / 1000);
> >	}
> > @@ -673,14 +651,12 @@ static u64 __i915_pmu_event_read(struct perf_event *event)
> >		switch (config) {
> >		case I915_PMU_ACTUAL_FREQUENCY:
> >			val =
> > -			   div_u64(read_sample(pmu, gt_id,
> > -					       __I915_SAMPLE_FREQ_ACT),
> > +			   div_u64(pmu->sample[gt_id][__I915_SAMPLE_FREQ_ACT].cur,
> >				   USEC_PER_SEC /* to MHz */);
> >			break;
> >		case I915_PMU_REQUESTED_FREQUENCY:
> >			val =
> > -			   div_u64(read_sample(pmu, gt_id,
> > -					       __I915_SAMPLE_FREQ_REQ),
> > +			   div_u64(pmu->sample[gt_id][__I915_SAMPLE_FREQ_REQ].cur,
> >				   USEC_PER_SEC /* to MHz */);
> >			break;
> >		case I915_PMU_INTERRUPTS:
> > diff --git a/drivers/gpu/drm/i915/i915_pmu.h b/drivers/gpu/drm/i915/i915_pmu.h
> > index 33d80fbaab8bc..d20592e7db999 100644
> > --- a/drivers/gpu/drm/i915/i915_pmu.h
> > +++ b/drivers/gpu/drm/i915/i915_pmu.h
> > @@ -127,7 +127,7 @@ struct i915_pmu {
> >	 * Only global counters are held here, while the per-engine ones are in
> >	 * struct intel_engine_cs.
> >	 */
> > -	struct i915_pmu_sample sample[I915_PMU_MAX_GTS * __I915_NUM_PMU_SAMPLERS];
> > +	struct i915_pmu_sample sample[I915_PMU_MAX_GTS][__I915_NUM_PMU_SAMPLERS];
> >	/**
> >	 * @sleep_last: Last time GT parked for RC6 estimation.
> >	 */
Tvrtko Ursulin May 24, 2023, 5:53 p.m. UTC | #4
On 24/05/2023 18:38, Dixit, Ashutosh wrote:
> On Wed, 24 May 2023 04:38:18 -0700, Tvrtko Ursulin wrote:
>>
> 
> Hi Tvrtko,
> 
>> On 23/05/2023 16:19, Ashutosh Dixit wrote:
>>> No functional changes but we can remove some unsightly index computation
>>> and read/write functions if we convert the PMU sample array from a
>>> one-dimensional to a two-dimensional array.
>>>
>>> Suggested-by: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
>>> Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
>>> ---
>>>    drivers/gpu/drm/i915/i915_pmu.c | 60 ++++++++++-----------------------
>>>    drivers/gpu/drm/i915/i915_pmu.h |  2 +-
>>>    2 files changed, 19 insertions(+), 43 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
>>> index b47d890d4ada1..137e0df9573ee 100644
>>> --- a/drivers/gpu/drm/i915/i915_pmu.c
>>> +++ b/drivers/gpu/drm/i915/i915_pmu.c
>>> @@ -195,33 +195,6 @@ static inline s64 ktime_since_raw(const ktime_t kt)
>>> 	return ktime_to_ns(ktime_sub(ktime_get_raw(), kt));
>>>    }
>>>    -static unsigned int
>>> -__sample_idx(struct i915_pmu *pmu, unsigned int gt_id, int sample)
>>> -{
>>> -	unsigned int idx = gt_id * __I915_NUM_PMU_SAMPLERS + sample;
>>> -
>>> -	GEM_BUG_ON(idx >= ARRAY_SIZE(pmu->sample));
>>> -
>>> -	return idx;
>>> -}
>>> -
>>> -static u64 read_sample(struct i915_pmu *pmu, unsigned int gt_id, int sample)
>>> -{
>>> -	return pmu->sample[__sample_idx(pmu, gt_id, sample)].cur;
>>> -}
>>> -
>>> -static void
>>> -store_sample(struct i915_pmu *pmu, unsigned int gt_id, int sample, u64 val)
>>> -{
>>> -	pmu->sample[__sample_idx(pmu, gt_id, sample)].cur = val;
>>> -}
>>> -
>>> -static void
>>> -add_sample_mult(struct i915_pmu *pmu, unsigned int gt_id, int sample, u32 val, u32 mul)
>>> -{
>>> -	pmu->sample[__sample_idx(pmu, gt_id, sample)].cur += mul_u32_u32(val, mul);
>>> -}
>>
>> IMO read and store helpers could have stayed and just changed the
>> implementation. Like add_sample_mult which you just moved. I would have
>> been a smaller patch. So dunno.. a bit of a reluctant r-b.
> 
> Are you referring just to add_sample_mult or to all the other functions
> too? add_sample_mult I moved it to where it was before bc4be0a38b63

Read and store helpers.

> ("drm/i915/pmu: Prepare for multi-tile non-engine counters"), could have
> left it here I guess.
> 
> The other read and store helpers are not needed with the 2-d array at all
> since the compiler itself will do that, so I thought it was better to get
> rid of them completely.

Yes I get it, just that I didn't see the benefit of removing them.

For example:

  -		store_sample(pmu, gt_id, __I915_SAMPLE_RC6, val);
  +		pmu->sample[gt_id][__I915_SAMPLE_RC6].cur = val;

It's a meh for me. Either flavour looks fine to me so I would have erred 
on the side of keeping the patch small. If anything I probably slightly 
prefer that the struct pmu_sample implementation was able to be changed 
with less churn before. For example. But a very minor argument really.

Or maybe next step is get rid of the struct i915_pmu_sample. It is a 
struct because originally previous value was tracked too. Then I removed 
that and it was easier to keep the struct. I guess it can go now and 
then the removal of helpers here will look somewhat nicer without the 
trailing .cur on every affected line.

> Let me know if you want any changes, otherwise I will leave as is.

You can leave it as is, I dont' mind much.

Regards,

Tvrtko

>> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> 
> Thanks for the review. Thanks Andrzej too :)
> --
> Ashutosh
> 
>>> -
>>>    static u64 get_rc6(struct intel_gt *gt)
>>>    {
>>> 	struct drm_i915_private *i915 = gt->i915;
>>> @@ -240,7 +213,7 @@ static u64 get_rc6(struct intel_gt *gt)
>>> 	spin_lock_irqsave(&pmu->lock, flags);
>>> 		if (awake) {
>>> -		store_sample(pmu, gt_id, __I915_SAMPLE_RC6, val);
>>> +		pmu->sample[gt_id][__I915_SAMPLE_RC6].cur = val;
>>> 	} else {
>>> 		/*
>>> 		 * We think we are runtime suspended.
>>> @@ -250,13 +223,13 @@ static u64 get_rc6(struct intel_gt *gt)
>>> 		 * counter value.
>>> 		 */
>>> 		val = ktime_since_raw(pmu->sleep_last[gt_id]);
>>> -		val += read_sample(pmu, gt_id, __I915_SAMPLE_RC6);
>>> +		val += pmu->sample[gt_id][__I915_SAMPLE_RC6].cur;
>>> 	}
>>>    -	if (val < read_sample(pmu, gt_id, __I915_SAMPLE_RC6_LAST_REPORTED))
>>> -		val = read_sample(pmu, gt_id, __I915_SAMPLE_RC6_LAST_REPORTED);
>>> +	if (val < pmu->sample[gt_id][__I915_SAMPLE_RC6_LAST_REPORTED].cur)
>>> +		val = pmu->sample[gt_id][__I915_SAMPLE_RC6_LAST_REPORTED].cur;
>>> 	else
>>> -		store_sample(pmu, gt_id, __I915_SAMPLE_RC6_LAST_REPORTED, val);
>>> +		pmu->sample[gt_id][__I915_SAMPLE_RC6_LAST_REPORTED].cur = val;
>>> 		spin_unlock_irqrestore(&pmu->lock, flags);
>>>    @@ -275,9 +248,8 @@ static void init_rc6(struct i915_pmu *pmu)
>>> 		with_intel_runtime_pm(gt->uncore->rpm, wakeref) {
>>> 			u64 val = __get_rc6(gt);
>>>    -			store_sample(pmu, i, __I915_SAMPLE_RC6, val);
>>> -			store_sample(pmu, i, __I915_SAMPLE_RC6_LAST_REPORTED,
>>> -				     val);
>>> +			pmu->sample[i][__I915_SAMPLE_RC6].cur = val;
>>> +			pmu->sample[i][__I915_SAMPLE_RC6_LAST_REPORTED].cur = val;
>>> 			pmu->sleep_last[i] = ktime_get_raw();
>>> 		}
>>> 	}
>>> @@ -287,7 +259,7 @@ static void park_rc6(struct intel_gt *gt)
>>>    {
>>> 	struct i915_pmu *pmu = &gt->i915->pmu;
>>>    -	store_sample(pmu, gt->info.id, __I915_SAMPLE_RC6, __get_rc6(gt));
>>> +	pmu->sample[gt->info.id][__I915_SAMPLE_RC6].cur = __get_rc6(gt);
>>> 	pmu->sleep_last[gt->info.id] = ktime_get_raw();
>>>    }
>>>    @@ -428,6 +400,12 @@ engines_sample(struct intel_gt *gt, unsigned int
>>> period_ns)
>>> 	}
>>>    }
>>>    +static void
>>> +add_sample_mult(struct i915_pmu_sample *sample, u32 val, u32 mul)
>>> +{
>>> +	sample->cur += mul_u32_u32(val, mul);
>>> +}
>>> +
>>>    static bool
>>>    frequency_sampling_enabled(struct i915_pmu *pmu, unsigned int gt)
>>>    {
>>> @@ -467,12 +445,12 @@ frequency_sample(struct intel_gt *gt, unsigned int period_ns)
>>> 		if (!val)
>>> 			val = intel_gpu_freq(rps, rps->cur_freq);
>>>    -		add_sample_mult(pmu, gt_id, __I915_SAMPLE_FREQ_ACT,
>>> +		add_sample_mult(&pmu->sample[gt_id][__I915_SAMPLE_FREQ_ACT],
>>> 				val, period_ns / 1000);
>>> 	}
>>> 		if (pmu->enable &
>>> config_mask(__I915_PMU_REQUESTED_FREQUENCY(gt_id))) {
>>> -		add_sample_mult(pmu, gt_id, __I915_SAMPLE_FREQ_REQ,
>>> +		add_sample_mult(&pmu->sample[gt_id][__I915_SAMPLE_FREQ_REQ],
>>> 				intel_rps_get_requested_frequency(rps),
>>> 				period_ns / 1000);
>>> 	}
>>> @@ -673,14 +651,12 @@ static u64 __i915_pmu_event_read(struct perf_event *event)
>>> 		switch (config) {
>>> 		case I915_PMU_ACTUAL_FREQUENCY:
>>> 			val =
>>> -			   div_u64(read_sample(pmu, gt_id,
>>> -					       __I915_SAMPLE_FREQ_ACT),
>>> +			   div_u64(pmu->sample[gt_id][__I915_SAMPLE_FREQ_ACT].cur,
>>> 				   USEC_PER_SEC /* to MHz */);
>>> 			break;
>>> 		case I915_PMU_REQUESTED_FREQUENCY:
>>> 			val =
>>> -			   div_u64(read_sample(pmu, gt_id,
>>> -					       __I915_SAMPLE_FREQ_REQ),
>>> +			   div_u64(pmu->sample[gt_id][__I915_SAMPLE_FREQ_REQ].cur,
>>> 				   USEC_PER_SEC /* to MHz */);
>>> 			break;
>>> 		case I915_PMU_INTERRUPTS:
>>> diff --git a/drivers/gpu/drm/i915/i915_pmu.h b/drivers/gpu/drm/i915/i915_pmu.h
>>> index 33d80fbaab8bc..d20592e7db999 100644
>>> --- a/drivers/gpu/drm/i915/i915_pmu.h
>>> +++ b/drivers/gpu/drm/i915/i915_pmu.h
>>> @@ -127,7 +127,7 @@ struct i915_pmu {
>>> 	 * Only global counters are held here, while the per-engine ones are in
>>> 	 * struct intel_engine_cs.
>>> 	 */
>>> -	struct i915_pmu_sample sample[I915_PMU_MAX_GTS * __I915_NUM_PMU_SAMPLERS];
>>> +	struct i915_pmu_sample sample[I915_PMU_MAX_GTS][__I915_NUM_PMU_SAMPLERS];
>>> 	/**
>>> 	 * @sleep_last: Last time GT parked for RC6 estimation.
>>> 	 */
Dixit, Ashutosh May 24, 2023, 9:46 p.m. UTC | #5
On Wed, 24 May 2023 10:53:20 -0700, Tvrtko Ursulin wrote:
>

Hi Tvrtko,

> On 24/05/2023 18:38, Dixit, Ashutosh wrote:
> > On Wed, 24 May 2023 04:38:18 -0700, Tvrtko Ursulin wrote:
> >> On 23/05/2023 16:19, Ashutosh Dixit wrote:
> >>> No functional changes but we can remove some unsightly index computation
> >>> and read/write functions if we convert the PMU sample array from a
> >>> one-dimensional to a two-dimensional array.
> >>>
> >>> Suggested-by: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com>
> >>> Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
> >>> ---
> >>>    drivers/gpu/drm/i915/i915_pmu.c | 60 ++++++++++-----------------------
> >>>    drivers/gpu/drm/i915/i915_pmu.h |  2 +-
> >>>    2 files changed, 19 insertions(+), 43 deletions(-)
> >>>
> >>> diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
> >>> index b47d890d4ada1..137e0df9573ee 100644
> >>> --- a/drivers/gpu/drm/i915/i915_pmu.c
> >>> +++ b/drivers/gpu/drm/i915/i915_pmu.c
> >>> @@ -195,33 +195,6 @@ static inline s64 ktime_since_raw(const ktime_t kt)
> >>>	return ktime_to_ns(ktime_sub(ktime_get_raw(), kt));
> >>>    }
> >>>    -static unsigned int
> >>> -__sample_idx(struct i915_pmu *pmu, unsigned int gt_id, int sample)
> >>> -{
> >>> -	unsigned int idx = gt_id * __I915_NUM_PMU_SAMPLERS + sample;
> >>> -
> >>> -	GEM_BUG_ON(idx >= ARRAY_SIZE(pmu->sample));
> >>> -
> >>> -	return idx;
> >>> -}
> >>> -
> >>> -static u64 read_sample(struct i915_pmu *pmu, unsigned int gt_id, int sample)
> >>> -{
> >>> -	return pmu->sample[__sample_idx(pmu, gt_id, sample)].cur;
> >>> -}
> >>> -
> >>> -static void
> >>> -store_sample(struct i915_pmu *pmu, unsigned int gt_id, int sample, u64 val)
> >>> -{
> >>> -	pmu->sample[__sample_idx(pmu, gt_id, sample)].cur = val;
> >>> -}
> >>> -
> >>> -static void
> >>> -add_sample_mult(struct i915_pmu *pmu, unsigned int gt_id, int sample, u32 val, u32 mul)
> >>> -{
> >>> -	pmu->sample[__sample_idx(pmu, gt_id, sample)].cur += mul_u32_u32(val, mul);
> >>> -}
> >>
> >> IMO read and store helpers could have stayed and just changed the
> >> implementation. Like add_sample_mult which you just moved. I would have
> >> been a smaller patch. So dunno.. a bit of a reluctant r-b.
> >
> > Are you referring just to add_sample_mult or to all the other functions
> > too? add_sample_mult I moved it to where it was before bc4be0a38b63
>
> Read and store helpers.
>
> > ("drm/i915/pmu: Prepare for multi-tile non-engine counters"), could have
> > left it here I guess.
> >
> > The other read and store helpers are not needed with the 2-d array at all
> > since the compiler itself will do that, so I thought it was better to get
> > rid of them completely.
>
> Yes I get it, just that I didn't see the benefit of removing them.
>
> For example:
>
>  -		store_sample(pmu, gt_id, __I915_SAMPLE_RC6, val);
>  +		pmu->sample[gt_id][__I915_SAMPLE_RC6].cur = val;
>
> It's a meh for me. Either flavour looks fine to me so I would have erred on
> the side of keeping the patch small. If anything I probably slightly prefer
> that the struct pmu_sample implementation was able to be changed with less
> churn before. For example. But a very minor argument really.

OK, I finally understood and have made this change in Patch v2. Please take
a look.

>
> Or maybe next step is get rid of the struct i915_pmu_sample. It is a struct
> because originally previous value was tracked too. Then I removed that and
> it was easier to keep the struct. I guess it can go now and then the
> removal of helpers here will look somewhat nicer without the trailing .cur
> on every affected line.

I have left this as is for now in case the i915_pmu_sample need to be
expanded again. Should be ok with the read/store helpers.

>
> > Let me know if you want any changes, otherwise I will leave as is.
>
> You can leave it as is, I dont' mind much.

I went ahead and changed it anyway since you seemed to want it.

Thanks.
--
Ashutosh

>
> >> Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> >
> > Thanks for the review. Thanks Andrzej too :)
> > --
> > Ashutosh
> >
> >>> -
> >>>    static u64 get_rc6(struct intel_gt *gt)
> >>>    {
> >>>	struct drm_i915_private *i915 = gt->i915;
> >>> @@ -240,7 +213,7 @@ static u64 get_rc6(struct intel_gt *gt)
> >>>	spin_lock_irqsave(&pmu->lock, flags);
> >>>		if (awake) {
> >>> -		store_sample(pmu, gt_id, __I915_SAMPLE_RC6, val);
> >>> +		pmu->sample[gt_id][__I915_SAMPLE_RC6].cur = val;
> >>>	} else {
> >>>		/*
> >>>		 * We think we are runtime suspended.
> >>> @@ -250,13 +223,13 @@ static u64 get_rc6(struct intel_gt *gt)
> >>>		 * counter value.
> >>>		 */
> >>>		val = ktime_since_raw(pmu->sleep_last[gt_id]);
> >>> -		val += read_sample(pmu, gt_id, __I915_SAMPLE_RC6);
> >>> +		val += pmu->sample[gt_id][__I915_SAMPLE_RC6].cur;
> >>>	}
> >>>    -	if (val < read_sample(pmu, gt_id, __I915_SAMPLE_RC6_LAST_REPORTED))
> >>> -		val = read_sample(pmu, gt_id, __I915_SAMPLE_RC6_LAST_REPORTED);
> >>> +	if (val < pmu->sample[gt_id][__I915_SAMPLE_RC6_LAST_REPORTED].cur)
> >>> +		val = pmu->sample[gt_id][__I915_SAMPLE_RC6_LAST_REPORTED].cur;
> >>>	else
> >>> -		store_sample(pmu, gt_id, __I915_SAMPLE_RC6_LAST_REPORTED, val);
> >>> +		pmu->sample[gt_id][__I915_SAMPLE_RC6_LAST_REPORTED].cur = val;
> >>>		spin_unlock_irqrestore(&pmu->lock, flags);
> >>>    @@ -275,9 +248,8 @@ static void init_rc6(struct i915_pmu *pmu)
> >>>		with_intel_runtime_pm(gt->uncore->rpm, wakeref) {
> >>>			u64 val = __get_rc6(gt);
> >>>    -			store_sample(pmu, i, __I915_SAMPLE_RC6, val);
> >>> -			store_sample(pmu, i, __I915_SAMPLE_RC6_LAST_REPORTED,
> >>> -				     val);
> >>> +			pmu->sample[i][__I915_SAMPLE_RC6].cur = val;
> >>> +			pmu->sample[i][__I915_SAMPLE_RC6_LAST_REPORTED].cur = val;
> >>>			pmu->sleep_last[i] = ktime_get_raw();
> >>>		}
> >>>	}
> >>> @@ -287,7 +259,7 @@ static void park_rc6(struct intel_gt *gt)
> >>>    {
> >>>	struct i915_pmu *pmu = &gt->i915->pmu;
> >>>    -	store_sample(pmu, gt->info.id, __I915_SAMPLE_RC6, __get_rc6(gt));
> >>> +	pmu->sample[gt->info.id][__I915_SAMPLE_RC6].cur = __get_rc6(gt);
> >>>	pmu->sleep_last[gt->info.id] = ktime_get_raw();
> >>>    }
> >>>    @@ -428,6 +400,12 @@ engines_sample(struct intel_gt *gt, unsigned int
> >>> period_ns)
> >>>	}
> >>>    }
> >>>    +static void
> >>> +add_sample_mult(struct i915_pmu_sample *sample, u32 val, u32 mul)
> >>> +{
> >>> +	sample->cur += mul_u32_u32(val, mul);
> >>> +}
> >>> +
> >>>    static bool
> >>>    frequency_sampling_enabled(struct i915_pmu *pmu, unsigned int gt)
> >>>    {
> >>> @@ -467,12 +445,12 @@ frequency_sample(struct intel_gt *gt, unsigned int period_ns)
> >>>		if (!val)
> >>>			val = intel_gpu_freq(rps, rps->cur_freq);
> >>>    -		add_sample_mult(pmu, gt_id, __I915_SAMPLE_FREQ_ACT,
> >>> +		add_sample_mult(&pmu->sample[gt_id][__I915_SAMPLE_FREQ_ACT],
> >>>				val, period_ns / 1000);
> >>>	}
> >>>		if (pmu->enable &
> >>> config_mask(__I915_PMU_REQUESTED_FREQUENCY(gt_id))) {
> >>> -		add_sample_mult(pmu, gt_id, __I915_SAMPLE_FREQ_REQ,
> >>> +		add_sample_mult(&pmu->sample[gt_id][__I915_SAMPLE_FREQ_REQ],
> >>>				intel_rps_get_requested_frequency(rps),
> >>>				period_ns / 1000);
> >>>	}
> >>> @@ -673,14 +651,12 @@ static u64 __i915_pmu_event_read(struct perf_event *event)
> >>>		switch (config) {
> >>>		case I915_PMU_ACTUAL_FREQUENCY:
> >>>			val =
> >>> -			   div_u64(read_sample(pmu, gt_id,
> >>> -					       __I915_SAMPLE_FREQ_ACT),
> >>> +			   div_u64(pmu->sample[gt_id][__I915_SAMPLE_FREQ_ACT].cur,
> >>>				   USEC_PER_SEC /* to MHz */);
> >>>			break;
> >>>		case I915_PMU_REQUESTED_FREQUENCY:
> >>>			val =
> >>> -			   div_u64(read_sample(pmu, gt_id,
> >>> -					       __I915_SAMPLE_FREQ_REQ),
> >>> +			   div_u64(pmu->sample[gt_id][__I915_SAMPLE_FREQ_REQ].cur,
> >>>				   USEC_PER_SEC /* to MHz */);
> >>>			break;
> >>>		case I915_PMU_INTERRUPTS:
> >>> diff --git a/drivers/gpu/drm/i915/i915_pmu.h b/drivers/gpu/drm/i915/i915_pmu.h
> >>> index 33d80fbaab8bc..d20592e7db999 100644
> >>> --- a/drivers/gpu/drm/i915/i915_pmu.h
> >>> +++ b/drivers/gpu/drm/i915/i915_pmu.h
> >>> @@ -127,7 +127,7 @@ struct i915_pmu {
> >>>	 * Only global counters are held here, while the per-engine ones are in
> >>>	 * struct intel_engine_cs.
> >>>	 */
> >>> -	struct i915_pmu_sample sample[I915_PMU_MAX_GTS * __I915_NUM_PMU_SAMPLERS];
> >>> +	struct i915_pmu_sample sample[I915_PMU_MAX_GTS][__I915_NUM_PMU_SAMPLERS];
> >>>	/**
> >>>	 * @sleep_last: Last time GT parked for RC6 estimation.
> >>>	 */
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
index b47d890d4ada1..137e0df9573ee 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -195,33 +195,6 @@  static inline s64 ktime_since_raw(const ktime_t kt)
 	return ktime_to_ns(ktime_sub(ktime_get_raw(), kt));
 }
 
-static unsigned int
-__sample_idx(struct i915_pmu *pmu, unsigned int gt_id, int sample)
-{
-	unsigned int idx = gt_id * __I915_NUM_PMU_SAMPLERS + sample;
-
-	GEM_BUG_ON(idx >= ARRAY_SIZE(pmu->sample));
-
-	return idx;
-}
-
-static u64 read_sample(struct i915_pmu *pmu, unsigned int gt_id, int sample)
-{
-	return pmu->sample[__sample_idx(pmu, gt_id, sample)].cur;
-}
-
-static void
-store_sample(struct i915_pmu *pmu, unsigned int gt_id, int sample, u64 val)
-{
-	pmu->sample[__sample_idx(pmu, gt_id, sample)].cur = val;
-}
-
-static void
-add_sample_mult(struct i915_pmu *pmu, unsigned int gt_id, int sample, u32 val, u32 mul)
-{
-	pmu->sample[__sample_idx(pmu, gt_id, sample)].cur += mul_u32_u32(val, mul);
-}
-
 static u64 get_rc6(struct intel_gt *gt)
 {
 	struct drm_i915_private *i915 = gt->i915;
@@ -240,7 +213,7 @@  static u64 get_rc6(struct intel_gt *gt)
 	spin_lock_irqsave(&pmu->lock, flags);
 
 	if (awake) {
-		store_sample(pmu, gt_id, __I915_SAMPLE_RC6, val);
+		pmu->sample[gt_id][__I915_SAMPLE_RC6].cur = val;
 	} else {
 		/*
 		 * We think we are runtime suspended.
@@ -250,13 +223,13 @@  static u64 get_rc6(struct intel_gt *gt)
 		 * counter value.
 		 */
 		val = ktime_since_raw(pmu->sleep_last[gt_id]);
-		val += read_sample(pmu, gt_id, __I915_SAMPLE_RC6);
+		val += pmu->sample[gt_id][__I915_SAMPLE_RC6].cur;
 	}
 
-	if (val < read_sample(pmu, gt_id, __I915_SAMPLE_RC6_LAST_REPORTED))
-		val = read_sample(pmu, gt_id, __I915_SAMPLE_RC6_LAST_REPORTED);
+	if (val < pmu->sample[gt_id][__I915_SAMPLE_RC6_LAST_REPORTED].cur)
+		val = pmu->sample[gt_id][__I915_SAMPLE_RC6_LAST_REPORTED].cur;
 	else
-		store_sample(pmu, gt_id, __I915_SAMPLE_RC6_LAST_REPORTED, val);
+		pmu->sample[gt_id][__I915_SAMPLE_RC6_LAST_REPORTED].cur = val;
 
 	spin_unlock_irqrestore(&pmu->lock, flags);
 
@@ -275,9 +248,8 @@  static void init_rc6(struct i915_pmu *pmu)
 		with_intel_runtime_pm(gt->uncore->rpm, wakeref) {
 			u64 val = __get_rc6(gt);
 
-			store_sample(pmu, i, __I915_SAMPLE_RC6, val);
-			store_sample(pmu, i, __I915_SAMPLE_RC6_LAST_REPORTED,
-				     val);
+			pmu->sample[i][__I915_SAMPLE_RC6].cur = val;
+			pmu->sample[i][__I915_SAMPLE_RC6_LAST_REPORTED].cur = val;
 			pmu->sleep_last[i] = ktime_get_raw();
 		}
 	}
@@ -287,7 +259,7 @@  static void park_rc6(struct intel_gt *gt)
 {
 	struct i915_pmu *pmu = &gt->i915->pmu;
 
-	store_sample(pmu, gt->info.id, __I915_SAMPLE_RC6, __get_rc6(gt));
+	pmu->sample[gt->info.id][__I915_SAMPLE_RC6].cur = __get_rc6(gt);
 	pmu->sleep_last[gt->info.id] = ktime_get_raw();
 }
 
@@ -428,6 +400,12 @@  engines_sample(struct intel_gt *gt, unsigned int period_ns)
 	}
 }
 
+static void
+add_sample_mult(struct i915_pmu_sample *sample, u32 val, u32 mul)
+{
+	sample->cur += mul_u32_u32(val, mul);
+}
+
 static bool
 frequency_sampling_enabled(struct i915_pmu *pmu, unsigned int gt)
 {
@@ -467,12 +445,12 @@  frequency_sample(struct intel_gt *gt, unsigned int period_ns)
 		if (!val)
 			val = intel_gpu_freq(rps, rps->cur_freq);
 
-		add_sample_mult(pmu, gt_id, __I915_SAMPLE_FREQ_ACT,
+		add_sample_mult(&pmu->sample[gt_id][__I915_SAMPLE_FREQ_ACT],
 				val, period_ns / 1000);
 	}
 
 	if (pmu->enable & config_mask(__I915_PMU_REQUESTED_FREQUENCY(gt_id))) {
-		add_sample_mult(pmu, gt_id, __I915_SAMPLE_FREQ_REQ,
+		add_sample_mult(&pmu->sample[gt_id][__I915_SAMPLE_FREQ_REQ],
 				intel_rps_get_requested_frequency(rps),
 				period_ns / 1000);
 	}
@@ -673,14 +651,12 @@  static u64 __i915_pmu_event_read(struct perf_event *event)
 		switch (config) {
 		case I915_PMU_ACTUAL_FREQUENCY:
 			val =
-			   div_u64(read_sample(pmu, gt_id,
-					       __I915_SAMPLE_FREQ_ACT),
+			   div_u64(pmu->sample[gt_id][__I915_SAMPLE_FREQ_ACT].cur,
 				   USEC_PER_SEC /* to MHz */);
 			break;
 		case I915_PMU_REQUESTED_FREQUENCY:
 			val =
-			   div_u64(read_sample(pmu, gt_id,
-					       __I915_SAMPLE_FREQ_REQ),
+			   div_u64(pmu->sample[gt_id][__I915_SAMPLE_FREQ_REQ].cur,
 				   USEC_PER_SEC /* to MHz */);
 			break;
 		case I915_PMU_INTERRUPTS:
diff --git a/drivers/gpu/drm/i915/i915_pmu.h b/drivers/gpu/drm/i915/i915_pmu.h
index 33d80fbaab8bc..d20592e7db999 100644
--- a/drivers/gpu/drm/i915/i915_pmu.h
+++ b/drivers/gpu/drm/i915/i915_pmu.h
@@ -127,7 +127,7 @@  struct i915_pmu {
 	 * Only global counters are held here, while the per-engine ones are in
 	 * struct intel_engine_cs.
 	 */
-	struct i915_pmu_sample sample[I915_PMU_MAX_GTS * __I915_NUM_PMU_SAMPLERS];
+	struct i915_pmu_sample sample[I915_PMU_MAX_GTS][__I915_NUM_PMU_SAMPLERS];
 	/**
 	 * @sleep_last: Last time GT parked for RC6 estimation.
 	 */