diff mbox

[v2] drm/i915/perf: per-gen timebase for checking sample freq

Message ID 20170405190506.2227-1-robert@sixbynine.org (mailing list archive)
State New, archived
Headers show

Commit Message

Robert Bragg April 5, 2017, 7:05 p.m. UTC
An oa_exponent_to_ns() utility and per-gen timebase constants where
recently removed when updating the tail pointer race condition WA, and
this restores those so we can update the _PROP_OA_EXPONENT validation
done in read_properties_unlocked() to not assume we have a 12.5MHz
timebase as we did for Haswell.

Accordingly the oa_sample_rate_hard_limit value that's referenced by
proc_dointvec_minmax defining the absolute limit for the OA sampling
frequency is now initialized to (timestamp_frequency / 2) instead of the
6.25MHz constant for Haswell.

v2:
    Specify frequency of 19.2MHz for BXT (Ville)
    Initialize oa_sample_rate_hard_limit per-gen too (Lionel)

Signed-off-by: Robert Bragg <robert@sixbynine.org>
Cc: Lionel Landwerlin <lionel.g.landwerlin@linux.intel.com>
Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h  |  1 +
 drivers/gpu/drm/i915/i915_perf.c | 31 ++++++++++++++++++++++---------
 2 files changed, 23 insertions(+), 9 deletions(-)

Comments

Matthew Auld April 12, 2017, 12:34 p.m. UTC | #1
On 5 April 2017 at 20:05, Robert Bragg <robert@sixbynine.org> wrote:
> An oa_exponent_to_ns() utility and per-gen timebase constants where
were

> recently removed when updating the tail pointer race condition WA, and
> this restores those so we can update the _PROP_OA_EXPONENT validation
> done in read_properties_unlocked() to not assume we have a 12.5MHz
> timebase as we did for Haswell.
>
> Accordingly the oa_sample_rate_hard_limit value that's referenced by
> proc_dointvec_minmax defining the absolute limit for the OA sampling
> frequency is now initialized to (timestamp_frequency / 2) instead of the
> 6.25MHz constant for Haswell.
>
> v2:
>     Specify frequency of 19.2MHz for BXT (Ville)
>     Initialize oa_sample_rate_hard_limit per-gen too (Lionel)
>
> Signed-off-by: Robert Bragg <robert@sixbynine.org>
> Cc: Lionel Landwerlin <lionel.g.landwerlin@linux.intel.com>
> Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
> ---
>  drivers/gpu/drm/i915/i915_drv.h  |  1 +
>  drivers/gpu/drm/i915/i915_perf.c | 31 ++++++++++++++++++++++---------
>  2 files changed, 23 insertions(+), 9 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 3a22b6fd0ee6..48b07d706f06 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2463,6 +2463,7 @@ struct drm_i915_private {
>
>                         bool periodic;
>                         int period_exponent;
> +                       int timestamp_frequency;
>
>                         int metrics_set;
>
> diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
> index 98eb6415b63a..980b4a1fd7cc 100644
> --- a/drivers/gpu/drm/i915/i915_perf.c
> +++ b/drivers/gpu/drm/i915/i915_perf.c
> @@ -288,10 +288,12 @@ static u32 i915_perf_stream_paranoid = true;
>
>  /* For sysctl proc_dointvec_minmax of i915_oa_max_sample_rate
>   *
> - * 160ns is the smallest sampling period we can theoretically program the OA
> - * unit with on Haswell, corresponding to 6.25MHz.
> + * The highest sampling frequency we can theoretically program the OA unit
> + * with is always half the timestamp frequency: E.g. 6.25Mhz for Haswell.
> + *
> + * Initialized just before we register the sysctl parameter.
>   */
> -static int oa_sample_rate_hard_limit = 6250000;
> +static int oa_sample_rate_hard_limit;
>
>  /* Theoretically we can program the OA unit to sample every 160ns but don't
>   * allow that by default unless root...
> @@ -2549,6 +2551,12 @@ i915_perf_open_ioctl_locked(struct drm_i915_private *dev_priv,
>         return ret;
>  }
>
> +static u64 oa_exponent_to_ns(struct drm_i915_private *dev_priv, int exponent)
> +{
> +       return div_u64(1000000000ULL * (2ULL << exponent),
> +                      dev_priv->perf.oa.timestamp_frequency);
> +}
> +
>  /**
>   * read_properties_unlocked - validate + copy userspace stream open properties
>   * @dev_priv: i915 device instance
> @@ -2647,14 +2655,9 @@ static int read_properties_unlocked(struct drm_i915_private *dev_priv,
>                         /* Theoretically we can program the OA unit to sample
>                          * every 160ns but don't allow that by default unless
hmm, that's not actually true if we consider BXT, right?

Reviewed-by: Matthew Auld <matthew.auld@intel.com>
Robert Bragg April 12, 2017, 3:07 p.m. UTC | #2
On Wed, Apr 12, 2017 at 1:34 PM, Matthew Auld <
matthew.william.auld@gmail.com> wrote:

> On 5 April 2017 at 20:05, Robert Bragg <robert@sixbynine.org> wrote:
> > An oa_exponent_to_ns() utility and per-gen timebase constants where
> were
>
> > recently removed when updating the tail pointer race condition WA, and
> > this restores those so we can update the _PROP_OA_EXPONENT validation
> > done in read_properties_unlocked() to not assume we have a 12.5MHz
> > timebase as we did for Haswell.
> >
> > Accordingly the oa_sample_rate_hard_limit value that's referenced by
> > proc_dointvec_minmax defining the absolute limit for the OA sampling
> > frequency is now initialized to (timestamp_frequency / 2) instead of the
> > 6.25MHz constant for Haswell.
> >
> > v2:
> >     Specify frequency of 19.2MHz for BXT (Ville)
> >     Initialize oa_sample_rate_hard_limit per-gen too (Lionel)
> >
> > Signed-off-by: Robert Bragg <robert@sixbynine.org>
> > Cc: Lionel Landwerlin <lionel.g.landwerlin@linux.intel.com>
> > Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
> > ---
> >  drivers/gpu/drm/i915/i915_drv.h  |  1 +
> >  drivers/gpu/drm/i915/i915_perf.c | 31 ++++++++++++++++++++++---------
> >  2 files changed, 23 insertions(+), 9 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/i915/i915_drv.h
> b/drivers/gpu/drm/i915/i915_drv.h
> > index 3a22b6fd0ee6..48b07d706f06 100644
> > --- a/drivers/gpu/drm/i915/i915_drv.h
> > +++ b/drivers/gpu/drm/i915/i915_drv.h
> > @@ -2463,6 +2463,7 @@ struct drm_i915_private {
> >
> >                         bool periodic;
> >                         int period_exponent;
> > +                       int timestamp_frequency;
> >
> >                         int metrics_set;
> >
> > diff --git a/drivers/gpu/drm/i915/i915_perf.c
> b/drivers/gpu/drm/i915/i915_perf.c
> > index 98eb6415b63a..980b4a1fd7cc 100644
> > --- a/drivers/gpu/drm/i915/i915_perf.c
> > +++ b/drivers/gpu/drm/i915/i915_perf.c
> > @@ -288,10 +288,12 @@ static u32 i915_perf_stream_paranoid = true;
> >
> >  /* For sysctl proc_dointvec_minmax of i915_oa_max_sample_rate
> >   *
> > - * 160ns is the smallest sampling period we can theoretically program
> the OA
> > - * unit with on Haswell, corresponding to 6.25MHz.
> > + * The highest sampling frequency we can theoretically program the OA
> unit
> > + * with is always half the timestamp frequency: E.g. 6.25Mhz for
> Haswell.
> > + *
> > + * Initialized just before we register the sysctl parameter.
> >   */
> > -static int oa_sample_rate_hard_limit = 6250000;
> > +static int oa_sample_rate_hard_limit;
> >
> >  /* Theoretically we can program the OA unit to sample every 160ns but
> don't
> >   * allow that by default unless root...
> > @@ -2549,6 +2551,12 @@ i915_perf_open_ioctl_locked(struct
> drm_i915_private *dev_priv,
> >         return ret;
> >  }
> >
> > +static u64 oa_exponent_to_ns(struct drm_i915_private *dev_priv, int
> exponent)
> > +{
> > +       return div_u64(1000000000ULL * (2ULL << exponent),
> > +                      dev_priv->perf.oa.timestamp_frequency);
> > +}
> > +
> >  /**
> >   * read_properties_unlocked - validate + copy userspace stream open
> properties
> >   * @dev_priv: i915 device instance
> > @@ -2647,14 +2655,9 @@ static int read_properties_unlocked(struct
> drm_i915_private *dev_priv,
> >                         /* Theoretically we can program the OA unit to
> sample
> >                          * every 160ns but don't allow that by default
> unless
> hmm, that's not actually true if we consider BXT, right?
>

right, I've updated this comment now.



>
> Reviewed-by: Matthew Auld <matthew.auld@intel.com>
>

thanks
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 3a22b6fd0ee6..48b07d706f06 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2463,6 +2463,7 @@  struct drm_i915_private {
 
 			bool periodic;
 			int period_exponent;
+			int timestamp_frequency;
 
 			int metrics_set;
 
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 98eb6415b63a..980b4a1fd7cc 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -288,10 +288,12 @@  static u32 i915_perf_stream_paranoid = true;
 
 /* For sysctl proc_dointvec_minmax of i915_oa_max_sample_rate
  *
- * 160ns is the smallest sampling period we can theoretically program the OA
- * unit with on Haswell, corresponding to 6.25MHz.
+ * The highest sampling frequency we can theoretically program the OA unit
+ * with is always half the timestamp frequency: E.g. 6.25Mhz for Haswell.
+ *
+ * Initialized just before we register the sysctl parameter.
  */
-static int oa_sample_rate_hard_limit = 6250000;
+static int oa_sample_rate_hard_limit;
 
 /* Theoretically we can program the OA unit to sample every 160ns but don't
  * allow that by default unless root...
@@ -2549,6 +2551,12 @@  i915_perf_open_ioctl_locked(struct drm_i915_private *dev_priv,
 	return ret;
 }
 
+static u64 oa_exponent_to_ns(struct drm_i915_private *dev_priv, int exponent)
+{
+       return div_u64(1000000000ULL * (2ULL << exponent),
+                      dev_priv->perf.oa.timestamp_frequency);
+}
+
 /**
  * read_properties_unlocked - validate + copy userspace stream open properties
  * @dev_priv: i915 device instance
@@ -2647,14 +2655,9 @@  static int read_properties_unlocked(struct drm_i915_private *dev_priv,
 			/* Theoretically we can program the OA unit to sample
 			 * every 160ns but don't allow that by default unless
 			 * root.
-			 *
-			 * On Haswell the period is derived from the exponent
-			 * as:
-			 *
-			 *   period = 80ns * 2^(exponent + 1)
 			 */
 			BUILD_BUG_ON(sizeof(oa_period) != 8);
-			oa_period = 80ull * (2ull << value);
+			oa_period = oa_exponent_to_ns(dev_priv, value);
 
 			/* This check is primarily to ensure that oa_period <=
 			 * UINT32_MAX (before passing to do_div which only
@@ -2910,6 +2913,8 @@  void i915_perf_init(struct drm_i915_private *dev_priv)
 		dev_priv->perf.oa.ops.oa_hw_tail_read =
 			gen7_oa_hw_tail_read;
 
+		dev_priv->perf.oa.timestamp_frequency = 12500000;
+
 		dev_priv->perf.oa.oa_formats = hsw_oa_formats;
 
 		dev_priv->perf.oa.n_builtin_sets =
@@ -2923,6 +2928,8 @@  void i915_perf_init(struct drm_i915_private *dev_priv)
 		 */
 
 		if (IS_GEN8(dev_priv)) {
+			dev_priv->perf.oa.timestamp_frequency = 12500000;
+
 			dev_priv->perf.oa.ctx_oactxctrl_offset = 0x120;
 			dev_priv->perf.oa.ctx_flexeu0_offset = 0x2ce;
 			dev_priv->perf.oa.gen8_valid_ctx_bit = (1<<25);
@@ -2939,6 +2946,8 @@  void i915_perf_init(struct drm_i915_private *dev_priv)
 					i915_oa_select_metric_set_chv;
 			}
 		} else if (IS_GEN9(dev_priv)) {
+			dev_priv->perf.oa.timestamp_frequency = 12000000;
+
 			dev_priv->perf.oa.ctx_oactxctrl_offset = 0x128;
 			dev_priv->perf.oa.ctx_flexeu0_offset = 0x3de;
 			dev_priv->perf.oa.gen8_valid_ctx_bit = (1<<16);
@@ -2959,6 +2968,8 @@  void i915_perf_init(struct drm_i915_private *dev_priv)
 				dev_priv->perf.oa.ops.select_metric_set =
 					i915_oa_select_metric_set_sklgt4;
 			} else if (IS_BROXTON(dev_priv)) {
+				dev_priv->perf.oa.timestamp_frequency = 19200000;
+
 				dev_priv->perf.oa.n_builtin_sets =
 					i915_oa_n_builtin_metric_sets_bxt;
 				dev_priv->perf.oa.ops.select_metric_set =
@@ -2993,6 +3004,8 @@  void i915_perf_init(struct drm_i915_private *dev_priv)
 		spin_lock_init(&dev_priv->perf.hook_lock);
 		spin_lock_init(&dev_priv->perf.oa.oa_buffer.ptr_lock);
 
+		oa_sample_rate_hard_limit =
+			dev_priv->perf.oa.timestamp_frequency / 2;
 		dev_priv->perf.sysctl_header = register_sysctl_table(dev_root);
 
 		dev_priv->perf.initialized = true;