diff mbox

[v3,6/7] drm/i915/perf: per-gen timebase for checking sample freq

Message ID 20170405162320.30094-7-robert@sixbynine.org (mailing list archive)
State New, archived
Headers show

Commit Message

Robert Bragg April 5, 2017, 4:23 p.m. UTC
An oa_exponent_to_ns() utility and per-gen timebase constants where
recently removed when updating the tail pointer race condition WA, and
this restores those so we can update the _PROP_OA_EXPONENT validation
done in read_properties_unlocked() to not assume we have a 12.5KHz
timebase as we did for Haswell.

Signed-off-by: Robert Bragg <robert@sixbynine.org>
Cc: Lionel Landwerlin <lionel.g.landwerlin@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h  |  1 +
 drivers/gpu/drm/i915/i915_perf.c | 21 +++++++++++++++------
 2 files changed, 16 insertions(+), 6 deletions(-)

Comments

Lionel Landwerlin April 5, 2017, 4:49 p.m. UTC | #1
Hey Rob,

Thanks for sending this, it looks good to me.
I think we also need to update the oa_sample_rate_hard_limit & 
i915_oa_max_sample_rate variables.

This patch is :

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>


On 05/04/17 17:23, Robert Bragg wrote:
> An oa_exponent_to_ns() utility and per-gen timebase constants where
> recently removed when updating the tail pointer race condition WA, and
> this restores those so we can update the _PROP_OA_EXPONENT validation
> done in read_properties_unlocked() to not assume we have a 12.5KHz
> timebase as we did for Haswell.
>
> Signed-off-by: Robert Bragg <robert@sixbynine.org>
> Cc: Lionel Landwerlin <lionel.g.landwerlin@linux.intel.com>
> ---
>   drivers/gpu/drm/i915/i915_drv.h  |  1 +
>   drivers/gpu/drm/i915/i915_perf.c | 21 +++++++++++++++------
>   2 files changed, 16 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 3a22b6fd0ee6..48b07d706f06 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2463,6 +2463,7 @@ struct drm_i915_private {
>   
>   			bool periodic;
>   			int period_exponent;
> +			int timestamp_frequency;
>   
>   			int metrics_set;
>   
> diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
> index 98eb6415b63a..87c0d1ce1b9f 100644
> --- a/drivers/gpu/drm/i915/i915_perf.c
> +++ b/drivers/gpu/drm/i915/i915_perf.c
> @@ -2549,6 +2549,12 @@ i915_perf_open_ioctl_locked(struct drm_i915_private *dev_priv,
>   	return ret;
>   }
>   
> +static u64 oa_exponent_to_ns(struct drm_i915_private *dev_priv, int exponent)
> +{
> +       return div_u64(1000000000ULL * (2ULL << exponent),
> +                      dev_priv->perf.oa.timestamp_frequency);
> +}
> +
>   /**
>    * read_properties_unlocked - validate + copy userspace stream open properties
>    * @dev_priv: i915 device instance
> @@ -2647,14 +2653,9 @@ static int read_properties_unlocked(struct drm_i915_private *dev_priv,
>   			/* Theoretically we can program the OA unit to sample
>   			 * every 160ns but don't allow that by default unless
>   			 * root.
> -			 *
> -			 * On Haswell the period is derived from the exponent
> -			 * as:
> -			 *
> -			 *   period = 80ns * 2^(exponent + 1)
>   			 */
>   			BUILD_BUG_ON(sizeof(oa_period) != 8);
> -			oa_period = 80ull * (2ull << value);
> +			oa_period = oa_exponent_to_ns(dev_priv, value);
>   
>   			/* This check is primarily to ensure that oa_period <=
>   			 * UINT32_MAX (before passing to do_div which only
> @@ -2910,6 +2911,8 @@ void i915_perf_init(struct drm_i915_private *dev_priv)
>   		dev_priv->perf.oa.ops.oa_hw_tail_read =
>   			gen7_oa_hw_tail_read;
>   
> +		dev_priv->perf.oa.timestamp_frequency = 12500000;
> +
>   		dev_priv->perf.oa.oa_formats = hsw_oa_formats;
>   
>   		dev_priv->perf.oa.n_builtin_sets =
> @@ -2923,6 +2926,8 @@ void i915_perf_init(struct drm_i915_private *dev_priv)
>   		 */
>   
>   		if (IS_GEN8(dev_priv)) {
> +			dev_priv->perf.oa.timestamp_frequency = 12500000;
> +
>   			dev_priv->perf.oa.ctx_oactxctrl_offset = 0x120;
>   			dev_priv->perf.oa.ctx_flexeu0_offset = 0x2ce;
>   			dev_priv->perf.oa.gen8_valid_ctx_bit = (1<<25);
> @@ -2939,6 +2944,8 @@ void i915_perf_init(struct drm_i915_private *dev_priv)
>   					i915_oa_select_metric_set_chv;
>   			}
>   		} else if (IS_GEN9(dev_priv)) {
> +			dev_priv->perf.oa.timestamp_frequency = 12000000;
> +
>   			dev_priv->perf.oa.ctx_oactxctrl_offset = 0x128;
>   			dev_priv->perf.oa.ctx_flexeu0_offset = 0x3de;
>   			dev_priv->perf.oa.gen8_valid_ctx_bit = (1<<16);
> @@ -2959,6 +2966,8 @@ void i915_perf_init(struct drm_i915_private *dev_priv)
>   				dev_priv->perf.oa.ops.select_metric_set =
>   					i915_oa_select_metric_set_sklgt4;
>   			} else if (IS_BROXTON(dev_priv)) {
> +				dev_priv->perf.oa.timestamp_frequency = 19200123;
> +
>   				dev_priv->perf.oa.n_builtin_sets =
>   					i915_oa_n_builtin_metric_sets_bxt;
>   				dev_priv->perf.oa.ops.select_metric_set =
Ville Syrjälä April 5, 2017, 5:06 p.m. UTC | #2
On Wed, Apr 05, 2017 at 05:23:19PM +0100, Robert Bragg wrote:
> An oa_exponent_to_ns() utility and per-gen timebase constants where
> recently removed when updating the tail pointer race condition WA, and
> this restores those so we can update the _PROP_OA_EXPONENT validation
> done in read_properties_unlocked() to not assume we have a 12.5KHz
> timebase as we did for Haswell.
> 
> Signed-off-by: Robert Bragg <robert@sixbynine.org>
> Cc: Lionel Landwerlin <lionel.g.landwerlin@linux.intel.com>
> ---
>  drivers/gpu/drm/i915/i915_drv.h  |  1 +
>  drivers/gpu/drm/i915/i915_perf.c | 21 +++++++++++++++------
>  2 files changed, 16 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 3a22b6fd0ee6..48b07d706f06 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2463,6 +2463,7 @@ struct drm_i915_private {
>  
>  			bool periodic;
>  			int period_exponent;
> +			int timestamp_frequency;
>  
>  			int metrics_set;
>  
> diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
> index 98eb6415b63a..87c0d1ce1b9f 100644
> --- a/drivers/gpu/drm/i915/i915_perf.c
> +++ b/drivers/gpu/drm/i915/i915_perf.c
> @@ -2549,6 +2549,12 @@ i915_perf_open_ioctl_locked(struct drm_i915_private *dev_priv,
>  	return ret;
>  }
>  
> +static u64 oa_exponent_to_ns(struct drm_i915_private *dev_priv, int exponent)
> +{
> +       return div_u64(1000000000ULL * (2ULL << exponent),
> +                      dev_priv->perf.oa.timestamp_frequency);
> +}
> +
>  /**
>   * read_properties_unlocked - validate + copy userspace stream open properties
>   * @dev_priv: i915 device instance
> @@ -2647,14 +2653,9 @@ static int read_properties_unlocked(struct drm_i915_private *dev_priv,
>  			/* Theoretically we can program the OA unit to sample
>  			 * every 160ns but don't allow that by default unless
>  			 * root.
> -			 *
> -			 * On Haswell the period is derived from the exponent
> -			 * as:
> -			 *
> -			 *   period = 80ns * 2^(exponent + 1)
>  			 */
>  			BUILD_BUG_ON(sizeof(oa_period) != 8);
> -			oa_period = 80ull * (2ull << value);
> +			oa_period = oa_exponent_to_ns(dev_priv, value);
>  
>  			/* This check is primarily to ensure that oa_period <=
>  			 * UINT32_MAX (before passing to do_div which only
> @@ -2910,6 +2911,8 @@ void i915_perf_init(struct drm_i915_private *dev_priv)
>  		dev_priv->perf.oa.ops.oa_hw_tail_read =
>  			gen7_oa_hw_tail_read;
>  
> +		dev_priv->perf.oa.timestamp_frequency = 12500000;
> +
>  		dev_priv->perf.oa.oa_formats = hsw_oa_formats;
>  
>  		dev_priv->perf.oa.n_builtin_sets =
> @@ -2923,6 +2926,8 @@ void i915_perf_init(struct drm_i915_private *dev_priv)
>  		 */
>  
>  		if (IS_GEN8(dev_priv)) {
> +			dev_priv->perf.oa.timestamp_frequency = 12500000;
> +
>  			dev_priv->perf.oa.ctx_oactxctrl_offset = 0x120;
>  			dev_priv->perf.oa.ctx_flexeu0_offset = 0x2ce;
>  			dev_priv->perf.oa.gen8_valid_ctx_bit = (1<<25);
> @@ -2939,6 +2944,8 @@ void i915_perf_init(struct drm_i915_private *dev_priv)
>  					i915_oa_select_metric_set_chv;
>  			}
>  		} else if (IS_GEN9(dev_priv)) {
> +			dev_priv->perf.oa.timestamp_frequency = 12000000;
> +
>  			dev_priv->perf.oa.ctx_oactxctrl_offset = 0x128;
>  			dev_priv->perf.oa.ctx_flexeu0_offset = 0x3de;
>  			dev_priv->perf.oa.gen8_valid_ctx_bit = (1<<16);
> @@ -2959,6 +2966,8 @@ void i915_perf_init(struct drm_i915_private *dev_priv)
>  				dev_priv->perf.oa.ops.select_metric_set =
>  					i915_oa_select_metric_set_sklgt4;
>  			} else if (IS_BROXTON(dev_priv)) {
> +				dev_priv->perf.oa.timestamp_frequency = 19200123;
> +

Why isn't this exactly 19.2MHz?

>  				dev_priv->perf.oa.n_builtin_sets =
>  					i915_oa_n_builtin_metric_sets_bxt;
>  				dev_priv->perf.oa.ops.select_metric_set =
> -- 
> 2.12.0
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Lionel Landwerlin April 5, 2017, 5:17 p.m. UTC | #3
On 05/04/17 18:06, Ville Syrjälä wrote:
> On Wed, Apr 05, 2017 at 05:23:19PM +0100, Robert Bragg wrote:
>> An oa_exponent_to_ns() utility and per-gen timebase constants where
>> recently removed when updating the tail pointer race condition WA, and
>> this restores those so we can update the _PROP_OA_EXPONENT validation
>> done in read_properties_unlocked() to not assume we have a 12.5KHz
>> timebase as we did for Haswell.
>>
>> Signed-off-by: Robert Bragg <robert@sixbynine.org>
>> Cc: Lionel Landwerlin <lionel.g.landwerlin@linux.intel.com>
>> ---
>>   drivers/gpu/drm/i915/i915_drv.h  |  1 +
>>   drivers/gpu/drm/i915/i915_perf.c | 21 +++++++++++++++------
>>   2 files changed, 16 insertions(+), 6 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
>> index 3a22b6fd0ee6..48b07d706f06 100644
>> --- a/drivers/gpu/drm/i915/i915_drv.h
>> +++ b/drivers/gpu/drm/i915/i915_drv.h
>> @@ -2463,6 +2463,7 @@ struct drm_i915_private {
>>   
>>   			bool periodic;
>>   			int period_exponent;
>> +			int timestamp_frequency;
>>   
>>   			int metrics_set;
>>   
>> diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
>> index 98eb6415b63a..87c0d1ce1b9f 100644
>> --- a/drivers/gpu/drm/i915/i915_perf.c
>> +++ b/drivers/gpu/drm/i915/i915_perf.c
>> @@ -2549,6 +2549,12 @@ i915_perf_open_ioctl_locked(struct drm_i915_private *dev_priv,
>>   	return ret;
>>   }
>>   
>> +static u64 oa_exponent_to_ns(struct drm_i915_private *dev_priv, int exponent)
>> +{
>> +       return div_u64(1000000000ULL * (2ULL << exponent),
>> +                      dev_priv->perf.oa.timestamp_frequency);
>> +}
>> +
>>   /**
>>    * read_properties_unlocked - validate + copy userspace stream open properties
>>    * @dev_priv: i915 device instance
>> @@ -2647,14 +2653,9 @@ static int read_properties_unlocked(struct drm_i915_private *dev_priv,
>>   			/* Theoretically we can program the OA unit to sample
>>   			 * every 160ns but don't allow that by default unless
>>   			 * root.
>> -			 *
>> -			 * On Haswell the period is derived from the exponent
>> -			 * as:
>> -			 *
>> -			 *   period = 80ns * 2^(exponent + 1)
>>   			 */
>>   			BUILD_BUG_ON(sizeof(oa_period) != 8);
>> -			oa_period = 80ull * (2ull << value);
>> +			oa_period = oa_exponent_to_ns(dev_priv, value);
>>   
>>   			/* This check is primarily to ensure that oa_period <=
>>   			 * UINT32_MAX (before passing to do_div which only
>> @@ -2910,6 +2911,8 @@ void i915_perf_init(struct drm_i915_private *dev_priv)
>>   		dev_priv->perf.oa.ops.oa_hw_tail_read =
>>   			gen7_oa_hw_tail_read;
>>   
>> +		dev_priv->perf.oa.timestamp_frequency = 12500000;
>> +
>>   		dev_priv->perf.oa.oa_formats = hsw_oa_formats;
>>   
>>   		dev_priv->perf.oa.n_builtin_sets =
>> @@ -2923,6 +2926,8 @@ void i915_perf_init(struct drm_i915_private *dev_priv)
>>   		 */
>>   
>>   		if (IS_GEN8(dev_priv)) {
>> +			dev_priv->perf.oa.timestamp_frequency = 12500000;
>> +
>>   			dev_priv->perf.oa.ctx_oactxctrl_offset = 0x120;
>>   			dev_priv->perf.oa.ctx_flexeu0_offset = 0x2ce;
>>   			dev_priv->perf.oa.gen8_valid_ctx_bit = (1<<25);
>> @@ -2939,6 +2944,8 @@ void i915_perf_init(struct drm_i915_private *dev_priv)
>>   					i915_oa_select_metric_set_chv;
>>   			}
>>   		} else if (IS_GEN9(dev_priv)) {
>> +			dev_priv->perf.oa.timestamp_frequency = 12000000;
>> +
>>   			dev_priv->perf.oa.ctx_oactxctrl_offset = 0x128;
>>   			dev_priv->perf.oa.ctx_flexeu0_offset = 0x3de;
>>   			dev_priv->perf.oa.gen8_valid_ctx_bit = (1<<16);
>> @@ -2959,6 +2966,8 @@ void i915_perf_init(struct drm_i915_private *dev_priv)
>>   				dev_priv->perf.oa.ops.select_metric_set =
>>   					i915_oa_select_metric_set_sklgt4;
>>   			} else if (IS_BROXTON(dev_priv)) {
>> +				dev_priv->perf.oa.timestamp_frequency = 19200123;
>> +
> Why isn't this exactly 19.2MHz?

It's based off the timestamp base (from documentation) :

BDW: 80ns
SKL: 83.333ns
BXT: 52.083ns

1000000000 / 19200123 is the closest we can get.

>
>>   				dev_priv->perf.oa.n_builtin_sets =
>>   					i915_oa_n_builtin_metric_sets_bxt;
>>   				dev_priv->perf.oa.ops.select_metric_set =
>> -- 
>> 2.12.0
>>
>> _______________________________________________
>> Intel-gfx mailing list
>> Intel-gfx@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Ville Syrjälä April 5, 2017, 5:26 p.m. UTC | #4
On Wed, Apr 05, 2017 at 06:17:36PM +0100, Lionel Landwerlin wrote:
> On 05/04/17 18:06, Ville Syrjälä wrote:
> > On Wed, Apr 05, 2017 at 05:23:19PM +0100, Robert Bragg wrote:
> >> An oa_exponent_to_ns() utility and per-gen timebase constants where
> >> recently removed when updating the tail pointer race condition WA, and
> >> this restores those so we can update the _PROP_OA_EXPONENT validation
> >> done in read_properties_unlocked() to not assume we have a 12.5KHz
> >> timebase as we did for Haswell.
> >>
> >> Signed-off-by: Robert Bragg <robert@sixbynine.org>
> >> Cc: Lionel Landwerlin <lionel.g.landwerlin@linux.intel.com>
> >> ---
> >>   drivers/gpu/drm/i915/i915_drv.h  |  1 +
> >>   drivers/gpu/drm/i915/i915_perf.c | 21 +++++++++++++++------
> >>   2 files changed, 16 insertions(+), 6 deletions(-)
> >>
> >> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> >> index 3a22b6fd0ee6..48b07d706f06 100644
> >> --- a/drivers/gpu/drm/i915/i915_drv.h
> >> +++ b/drivers/gpu/drm/i915/i915_drv.h
> >> @@ -2463,6 +2463,7 @@ struct drm_i915_private {
> >>   
> >>   			bool periodic;
> >>   			int period_exponent;
> >> +			int timestamp_frequency;
> >>   
> >>   			int metrics_set;
> >>   
> >> diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
> >> index 98eb6415b63a..87c0d1ce1b9f 100644
> >> --- a/drivers/gpu/drm/i915/i915_perf.c
> >> +++ b/drivers/gpu/drm/i915/i915_perf.c
> >> @@ -2549,6 +2549,12 @@ i915_perf_open_ioctl_locked(struct drm_i915_private *dev_priv,
> >>   	return ret;
> >>   }
> >>   
> >> +static u64 oa_exponent_to_ns(struct drm_i915_private *dev_priv, int exponent)
> >> +{
> >> +       return div_u64(1000000000ULL * (2ULL << exponent),
> >> +                      dev_priv->perf.oa.timestamp_frequency);
> >> +}
> >> +
> >>   /**
> >>    * read_properties_unlocked - validate + copy userspace stream open properties
> >>    * @dev_priv: i915 device instance
> >> @@ -2647,14 +2653,9 @@ static int read_properties_unlocked(struct drm_i915_private *dev_priv,
> >>   			/* Theoretically we can program the OA unit to sample
> >>   			 * every 160ns but don't allow that by default unless
> >>   			 * root.
> >> -			 *
> >> -			 * On Haswell the period is derived from the exponent
> >> -			 * as:
> >> -			 *
> >> -			 *   period = 80ns * 2^(exponent + 1)
> >>   			 */
> >>   			BUILD_BUG_ON(sizeof(oa_period) != 8);
> >> -			oa_period = 80ull * (2ull << value);
> >> +			oa_period = oa_exponent_to_ns(dev_priv, value);
> >>   
> >>   			/* This check is primarily to ensure that oa_period <=
> >>   			 * UINT32_MAX (before passing to do_div which only
> >> @@ -2910,6 +2911,8 @@ void i915_perf_init(struct drm_i915_private *dev_priv)
> >>   		dev_priv->perf.oa.ops.oa_hw_tail_read =
> >>   			gen7_oa_hw_tail_read;
> >>   
> >> +		dev_priv->perf.oa.timestamp_frequency = 12500000;
> >> +
> >>   		dev_priv->perf.oa.oa_formats = hsw_oa_formats;
> >>   
> >>   		dev_priv->perf.oa.n_builtin_sets =
> >> @@ -2923,6 +2926,8 @@ void i915_perf_init(struct drm_i915_private *dev_priv)
> >>   		 */
> >>   
> >>   		if (IS_GEN8(dev_priv)) {
> >> +			dev_priv->perf.oa.timestamp_frequency = 12500000;
> >> +
> >>   			dev_priv->perf.oa.ctx_oactxctrl_offset = 0x120;
> >>   			dev_priv->perf.oa.ctx_flexeu0_offset = 0x2ce;
> >>   			dev_priv->perf.oa.gen8_valid_ctx_bit = (1<<25);
> >> @@ -2939,6 +2944,8 @@ void i915_perf_init(struct drm_i915_private *dev_priv)
> >>   					i915_oa_select_metric_set_chv;
> >>   			}
> >>   		} else if (IS_GEN9(dev_priv)) {
> >> +			dev_priv->perf.oa.timestamp_frequency = 12000000;
> >> +
> >>   			dev_priv->perf.oa.ctx_oactxctrl_offset = 0x128;
> >>   			dev_priv->perf.oa.ctx_flexeu0_offset = 0x3de;
> >>   			dev_priv->perf.oa.gen8_valid_ctx_bit = (1<<16);
> >> @@ -2959,6 +2966,8 @@ void i915_perf_init(struct drm_i915_private *dev_priv)
> >>   				dev_priv->perf.oa.ops.select_metric_set =
> >>   					i915_oa_select_metric_set_sklgt4;
> >>   			} else if (IS_BROXTON(dev_priv)) {
> >> +				dev_priv->perf.oa.timestamp_frequency = 19200123;
> >> +
> > Why isn't this exactly 19.2MHz?
> 
> It's based off the timestamp base (from documentation) :
> 
> BDW: 80ns
> SKL: 83.333ns
> BXT: 52.083ns
> 
> 1000000000 / 19200123 is the closest we can get.

I'm pretty sure the clock is actually 19.2 and the 52.083 is just a
truncated value. Same for the 83.333 where the code does specify
120MHz exactly.

> 
> >
> >>   				dev_priv->perf.oa.n_builtin_sets =
> >>   					i915_oa_n_builtin_metric_sets_bxt;
> >>   				dev_priv->perf.oa.ops.select_metric_set =
> >> -- 
> >> 2.12.0
> >>
> >> _______________________________________________
> >> Intel-gfx mailing list
> >> Intel-gfx@lists.freedesktop.org
> >> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
>
Robert Bragg April 5, 2017, 5:59 p.m. UTC | #5
On Wed, Apr 5, 2017 at 6:26 PM, Ville Syrjälä <ville.syrjala@linux.intel.com
> wrote:

> On Wed, Apr 05, 2017 at 06:17:36PM +0100, Lionel Landwerlin wrote:
> > On 05/04/17 18:06, Ville Syrjälä wrote:
> > > On Wed, Apr 05, 2017 at 05:23:19PM +0100, Robert Bragg wrote:
> > >> An oa_exponent_to_ns() utility and per-gen timebase constants where
> > >> recently removed when updating the tail pointer race condition WA, and
> > >> this restores those so we can update the _PROP_OA_EXPONENT validation
> > >> done in read_properties_unlocked() to not assume we have a 12.5KHz
> > >> timebase as we did for Haswell.
> > >>
> > >> Signed-off-by: Robert Bragg <robert@sixbynine.org>
> > >> Cc: Lionel Landwerlin <lionel.g.landwerlin@linux.intel.com>
> > >> ---
> > >>   drivers/gpu/drm/i915/i915_drv.h  |  1 +
> > >>   drivers/gpu/drm/i915/i915_perf.c | 21 +++++++++++++++------
> > >>   2 files changed, 16 insertions(+), 6 deletions(-)
> > >>
> > >> diff --git a/drivers/gpu/drm/i915/i915_drv.h
> b/drivers/gpu/drm/i915/i915_drv.h
> > >> index 3a22b6fd0ee6..48b07d706f06 100644
> > >> --- a/drivers/gpu/drm/i915/i915_drv.h
> > >> +++ b/drivers/gpu/drm/i915/i915_drv.h
> > >> @@ -2463,6 +2463,7 @@ struct drm_i915_private {
> > >>
> > >>                    bool periodic;
> > >>                    int period_exponent;
> > >> +                  int timestamp_frequency;
> > >>
> > >>                    int metrics_set;
> > >>
> > >> diff --git a/drivers/gpu/drm/i915/i915_perf.c
> b/drivers/gpu/drm/i915/i915_perf.c
> > >> index 98eb6415b63a..87c0d1ce1b9f 100644
> > >> --- a/drivers/gpu/drm/i915/i915_perf.c
> > >> +++ b/drivers/gpu/drm/i915/i915_perf.c
> > >> @@ -2549,6 +2549,12 @@ i915_perf_open_ioctl_locked(struct
> drm_i915_private *dev_priv,
> > >>    return ret;
> > >>   }
> > >>
> > >> +static u64 oa_exponent_to_ns(struct drm_i915_private *dev_priv, int
> exponent)
> > >> +{
> > >> +       return div_u64(1000000000ULL * (2ULL << exponent),
> > >> +                      dev_priv->perf.oa.timestamp_frequency);
> > >> +}
> > >> +
> > >>   /**
> > >>    * read_properties_unlocked - validate + copy userspace stream open
> properties
> > >>    * @dev_priv: i915 device instance
> > >> @@ -2647,14 +2653,9 @@ static int read_properties_unlocked(struct
> drm_i915_private *dev_priv,
> > >>                    /* Theoretically we can program the OA unit to
> sample
> > >>                     * every 160ns but don't allow that by default
> unless
> > >>                     * root.
> > >> -                   *
> > >> -                   * On Haswell the period is derived from the
> exponent
> > >> -                   * as:
> > >> -                   *
> > >> -                   *   period = 80ns * 2^(exponent + 1)
> > >>                     */
> > >>                    BUILD_BUG_ON(sizeof(oa_period) != 8);
> > >> -                  oa_period = 80ull * (2ull << value);
> > >> +                  oa_period = oa_exponent_to_ns(dev_priv, value);
> > >>
> > >>                    /* This check is primarily to ensure that
> oa_period <=
> > >>                     * UINT32_MAX (before passing to do_div which only
> > >> @@ -2910,6 +2911,8 @@ void i915_perf_init(struct drm_i915_private
> *dev_priv)
> > >>            dev_priv->perf.oa.ops.oa_hw_tail_read =
> > >>                    gen7_oa_hw_tail_read;
> > >>
> > >> +          dev_priv->perf.oa.timestamp_frequency = 12500000;
> > >> +
> > >>            dev_priv->perf.oa.oa_formats = hsw_oa_formats;
> > >>
> > >>            dev_priv->perf.oa.n_builtin_sets =
> > >> @@ -2923,6 +2926,8 @@ void i915_perf_init(struct drm_i915_private
> *dev_priv)
> > >>             */
> > >>
> > >>            if (IS_GEN8(dev_priv)) {
> > >> +                  dev_priv->perf.oa.timestamp_frequency = 12500000;
> > >> +
> > >>                    dev_priv->perf.oa.ctx_oactxctrl_offset = 0x120;
> > >>                    dev_priv->perf.oa.ctx_flexeu0_offset = 0x2ce;
> > >>                    dev_priv->perf.oa.gen8_valid_ctx_bit = (1<<25);
> > >> @@ -2939,6 +2944,8 @@ void i915_perf_init(struct drm_i915_private
> *dev_priv)
> > >>                                    i915_oa_select_metric_set_chv;
> > >>                    }
> > >>            } else if (IS_GEN9(dev_priv)) {
> > >> +                  dev_priv->perf.oa.timestamp_frequency = 12000000;
> > >> +
> > >>                    dev_priv->perf.oa.ctx_oactxctrl_offset = 0x128;
> > >>                    dev_priv->perf.oa.ctx_flexeu0_offset = 0x3de;
> > >>                    dev_priv->perf.oa.gen8_valid_ctx_bit = (1<<16);
> > >> @@ -2959,6 +2966,8 @@ void i915_perf_init(struct drm_i915_private
> *dev_priv)
> > >>                            dev_priv->perf.oa.ops.select_metric_set =
> > >>                                    i915_oa_select_metric_set_sklgt4;
> > >>                    } else if (IS_BROXTON(dev_priv)) {
> > >> +                          dev_priv->perf.oa.timestamp_frequency =
> 19200123;
> > >> +
> > > Why isn't this exactly 19.2MHz?
> >
> > It's based off the timestamp base (from documentation) :
> >
> > BDW: 80ns
> > SKL: 83.333ns
> > BXT: 52.083ns
> >
> > 1000000000 / 19200123 is the closest we can get.
>
> I'm pretty sure the clock is actually 19.2 and the 52.083 is just a
> truncated value. Same for the 83.333 where the code does specify
> 120MHz exactly.
>

Ah, right, that sounds most likely. I'll update.

Thanks,
- Robert


>
> >
> > >
> > >>                            dev_priv->perf.oa.n_builtin_sets =
> > >>                                    i915_oa_n_builtin_metric_sets_bxt;
> > >>                            dev_priv->perf.oa.ops.select_metric_set =
> > >> --
> > >> 2.12.0
> > >>
> > >> _______________________________________________
> > >> Intel-gfx mailing list
> > >> Intel-gfx@lists.freedesktop.org
> > >> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
> >
>
> --
> Ville Syrjälä
> Intel OTC
>
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 3a22b6fd0ee6..48b07d706f06 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2463,6 +2463,7 @@  struct drm_i915_private {
 
 			bool periodic;
 			int period_exponent;
+			int timestamp_frequency;
 
 			int metrics_set;
 
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 98eb6415b63a..87c0d1ce1b9f 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -2549,6 +2549,12 @@  i915_perf_open_ioctl_locked(struct drm_i915_private *dev_priv,
 	return ret;
 }
 
+static u64 oa_exponent_to_ns(struct drm_i915_private *dev_priv, int exponent)
+{
+       return div_u64(1000000000ULL * (2ULL << exponent),
+                      dev_priv->perf.oa.timestamp_frequency);
+}
+
 /**
  * read_properties_unlocked - validate + copy userspace stream open properties
  * @dev_priv: i915 device instance
@@ -2647,14 +2653,9 @@  static int read_properties_unlocked(struct drm_i915_private *dev_priv,
 			/* Theoretically we can program the OA unit to sample
 			 * every 160ns but don't allow that by default unless
 			 * root.
-			 *
-			 * On Haswell the period is derived from the exponent
-			 * as:
-			 *
-			 *   period = 80ns * 2^(exponent + 1)
 			 */
 			BUILD_BUG_ON(sizeof(oa_period) != 8);
-			oa_period = 80ull * (2ull << value);
+			oa_period = oa_exponent_to_ns(dev_priv, value);
 
 			/* This check is primarily to ensure that oa_period <=
 			 * UINT32_MAX (before passing to do_div which only
@@ -2910,6 +2911,8 @@  void i915_perf_init(struct drm_i915_private *dev_priv)
 		dev_priv->perf.oa.ops.oa_hw_tail_read =
 			gen7_oa_hw_tail_read;
 
+		dev_priv->perf.oa.timestamp_frequency = 12500000;
+
 		dev_priv->perf.oa.oa_formats = hsw_oa_formats;
 
 		dev_priv->perf.oa.n_builtin_sets =
@@ -2923,6 +2926,8 @@  void i915_perf_init(struct drm_i915_private *dev_priv)
 		 */
 
 		if (IS_GEN8(dev_priv)) {
+			dev_priv->perf.oa.timestamp_frequency = 12500000;
+
 			dev_priv->perf.oa.ctx_oactxctrl_offset = 0x120;
 			dev_priv->perf.oa.ctx_flexeu0_offset = 0x2ce;
 			dev_priv->perf.oa.gen8_valid_ctx_bit = (1<<25);
@@ -2939,6 +2944,8 @@  void i915_perf_init(struct drm_i915_private *dev_priv)
 					i915_oa_select_metric_set_chv;
 			}
 		} else if (IS_GEN9(dev_priv)) {
+			dev_priv->perf.oa.timestamp_frequency = 12000000;
+
 			dev_priv->perf.oa.ctx_oactxctrl_offset = 0x128;
 			dev_priv->perf.oa.ctx_flexeu0_offset = 0x3de;
 			dev_priv->perf.oa.gen8_valid_ctx_bit = (1<<16);
@@ -2959,6 +2966,8 @@  void i915_perf_init(struct drm_i915_private *dev_priv)
 				dev_priv->perf.oa.ops.select_metric_set =
 					i915_oa_select_metric_set_sklgt4;
 			} else if (IS_BROXTON(dev_priv)) {
+				dev_priv->perf.oa.timestamp_frequency = 19200123;
+
 				dev_priv->perf.oa.n_builtin_sets =
 					i915_oa_n_builtin_metric_sets_bxt;
 				dev_priv->perf.oa.ops.select_metric_set =