diff mbox

[09/11] drm/i915/perf: allowing opening the perf stream without sampling

Message ID 20180326090831.22686-10-lionel.g.landwerlin@intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Lionel Landwerlin March 26, 2018, 9:08 a.m. UTC
We want to allow a user to configure the OA hardware so that
MI_RECORD_PERF_COUNT is functional but without having to deal with the
OA buffer.

This is an interesting optimization we can apply on Gen7.5 has the OA
unit perfectly clocks off if you've configured it to filter on a
particular context id. In this case, the userspace (like the
INTEL_performance_query extension in Mesa) can use
MI_RECORD_PERF_COUNT without having to read the OA buffer.

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
---
 drivers/gpu/drm/i915/i915_perf.c | 56 ++++++++++++++++++++++++++++++----------
 1 file changed, 42 insertions(+), 14 deletions(-)

Comments

Matthew Auld March 26, 2018, 8:16 p.m. UTC | #1
On 26 March 2018 at 10:08, Lionel Landwerlin
<lionel.g.landwerlin@intel.com> wrote:
> We want to allow a user to configure the OA hardware so that
> MI_RECORD_PERF_COUNT is functional but without having to deal with the
> OA buffer.
>
> This is an interesting optimization we can apply on Gen7.5 has the OA
> unit perfectly clocks off if you've configured it to filter on a
> particular context id. In this case, the userspace (like the
> INTEL_performance_query extension in Mesa) can use
> MI_RECORD_PERF_COUNT without having to read the OA buffer.
>
> Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
> ---
>  drivers/gpu/drm/i915/i915_perf.c | 56 ++++++++++++++++++++++++++++++----------
>  1 file changed, 42 insertions(+), 14 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
> index c91ad447207e..55c7631ad3c2 100644
> --- a/drivers/gpu/drm/i915/i915_perf.c
> +++ b/drivers/gpu/drm/i915/i915_perf.c
> @@ -421,6 +421,14 @@ static u32 gen7_oa_hw_tail_read(struct drm_i915_private *dev_priv)
>         return oastatus1 & GEN7_OASTATUS1_TAIL_MASK;
>  }
>
> +static bool i915_perf_stream_produces_reports(struct i915_perf_stream *stream)

Maybe just oa_stream_produces_reports ?

> +{
> +       struct drm_i915_private *dev_priv = stream->dev_priv;
> +
> +       return (stream->sample_flags & SAMPLE_OA_REPORT) != 0 &&
> +               dev_priv->perf.oa.periodic;

stream->dev_priv->perf.oa.periodic; then we can drop dev_priv.

> +}
> +
>  /**
>   * oa_buffer_check_unlocked - check for data and update tail ptr state
>   * @dev_priv: i915 device instance
> @@ -453,6 +461,8 @@ static bool oa_buffer_check_unlocked(struct drm_i915_private *dev_priv)
>         u32 head, hw_tail, aged_tail, aging_tail;
>         u64 now;
>
> +       GEM_BUG_ON(!i915_perf_stream_produces_reports(dev_priv->perf.oa.exclusive_stream));
> +
>         /* We have to consider the (unlikely) possibility that read() errors
>          * could result in an OA buffer reset which might reset the head,
>          * tails[] and aged_tail state.
> @@ -1153,8 +1163,7 @@ static int i915_oa_wait_unlocked(struct i915_perf_stream *stream)
>  {
>         struct drm_i915_private *dev_priv = stream->dev_priv;
>
> -       /* We would wait indefinitely if periodic sampling is not enabled */
> -       if (!dev_priv->perf.oa.periodic)
> +       if (!i915_perf_stream_produces_reports(stream))
>                 return -EIO;
>
>         return wait_event_interruptible(dev_priv->perf.oa.poll_wq,
> @@ -1821,6 +1830,7 @@ static int gen8_enable_metric_set(struct drm_i915_private *dev_priv,
>                                   const struct i915_perf_stream *stream)
>  {
>         struct i915_oa_config *oa_config = stream->oa_config;
> +       u32 oa_debug_value = 0;
>         int ret;
>
>         /*
> @@ -1847,11 +1857,23 @@ static int gen8_enable_metric_set(struct drm_i915_private *dev_priv,
>          * RPT_ID field.
>          */
>         if (IS_GEN9(dev_priv) || IS_GEN10(dev_priv)) {
> -               I915_WRITE(GEN8_OA_DEBUG,
> -                          _MASKED_BIT_ENABLE(GEN9_OA_DEBUG_DISABLE_CLK_RATIO_REPORTS |
> -                                             GEN9_OA_DEBUG_INCLUDE_CLK_RATIO));
> +               oa_debug_value |=
> +                       _MASKED_BIT_ENABLE(GEN9_OA_DEBUG_DISABLE_CLK_RATIO_REPORTS |
> +                                          GEN9_OA_DEBUG_INCLUDE_CLK_RATIO);
>         }
>
> +       /*
> +        * If the user didn't require OA reports, instruct the hardware not to
> +        * emit ctx switch reports (mind your head, we might be disabling the
> +        * disable).
> +        */
> +       oa_debug_value |=
> +               (stream->sample_flags & SAMPLE_OA_REPORT) ?
> +               _MASKED_BIT_DISABLE(GEN9_OA_DEBUG_DISABLE_CTX_SWITCH_REPORTS) :
> +               _MASKED_BIT_ENABLE(GEN9_OA_DEBUG_DISABLE_CTX_SWITCH_REPORTS);
> +
> +       I915_WRITE(GEN8_OA_DEBUG, oa_debug_value);
> +
>         /*
>          * Update all contexts prior writing the mux configurations as we need
>          * to make sure all slices/subslices are ON before writing to NOA
> @@ -1960,7 +1982,7 @@ static void i915_oa_stream_enable(struct i915_perf_stream *stream)
>
>         dev_priv->perf.oa.ops.oa_enable(dev_priv, stream);
>
> -       if (dev_priv->perf.oa.periodic)
> +       if (i915_perf_stream_produces_reports(stream))
>                 hrtimer_start(&dev_priv->perf.oa.poll_check_timer,
>                               ns_to_ktime(POLL_PERIOD),
>                               HRTIMER_MODE_REL_PINNED);
> @@ -1990,7 +2012,7 @@ static void i915_oa_stream_disable(struct i915_perf_stream *stream)
>
>         dev_priv->perf.oa.ops.oa_disable(dev_priv);
>
> -       if (dev_priv->perf.oa.periodic)
> +       if (i915_perf_stream_produces_reports(stream))
>                 hrtimer_cancel(&dev_priv->perf.oa.poll_check_timer);
>  }
>
> @@ -2038,11 +2060,6 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
>                 return -EINVAL;
>         }
>
> -       if (!(props->sample_flags & SAMPLE_OA_REPORT)) {
> -               DRM_DEBUG("Only OA report sampling supported\n");
> -               return -EINVAL;
> -       }
> -
>         if (!dev_priv->perf.oa.ops.init_oa_buffer) {
>                 DRM_DEBUG("OA unit not supported\n");
>                 return -ENODEV;
> @@ -2086,8 +2103,10 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
>
>         format_size = dev_priv->perf.oa.oa_formats[props->oa_format].size;
>
> -       stream->sample_flags |= SAMPLE_OA_REPORT;
> -       stream->sample_size += format_size;
> +       if (props->sample_flags & SAMPLE_OA_REPORT) {
> +               stream->sample_flags |= SAMPLE_OA_REPORT;
> +               stream->sample_size += format_size;
> +       }
>
>         dev_priv->perf.oa.oa_buffer.format_size = format_size;
>         if (WARN_ON(dev_priv->perf.oa.oa_buffer.format_size == 0))
> @@ -2255,6 +2274,12 @@ static ssize_t i915_perf_read(struct file *file,
>         struct drm_i915_private *dev_priv = stream->dev_priv;
>         ssize_t ret;
>
> +       /* If the user opened the stream only to snapshot counters through

Preferred comment style is /*\n

> +        * MI_RECORD_PERF_COUNT, there is nothing to read from the OA buffer.
> +        */
> +       if (!i915_perf_stream_produces_reports(stream))
> +               return -EIO;

I guess reading from a non-periodic stream doesn't make much sense? Do
you know anything about RPT_TO_OABUFFER for MI_RECORD_PERF_COUNT, it
sounds it will instead emit the report to the gtt address that we
configured OABUFFER with? Do you know if that's actually functional,
if so would it not make sense to allow perf_read on non-periodic
streams?

> +
>         /* To ensure it's handled consistently we simply treat all reads of a
>          * disabled stream as an error. In particular it might otherwise lead
>          * to a deadlock for blocking file descriptors...
> @@ -2343,6 +2368,9 @@ static __poll_t i915_perf_poll_locked(struct drm_i915_private *dev_priv,
>  {
>         __poll_t events = 0;
>
> +       if (!i915_perf_stream_produces_reports(stream))
> +               return events;
> +
>         stream->ops->poll_wait(stream, file, wait);
>
>         /* Note: we don't explicitly check whether there's something to read
> --
> 2.16.3
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
diff mbox

Patch

diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index c91ad447207e..55c7631ad3c2 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -421,6 +421,14 @@  static u32 gen7_oa_hw_tail_read(struct drm_i915_private *dev_priv)
 	return oastatus1 & GEN7_OASTATUS1_TAIL_MASK;
 }
 
+static bool i915_perf_stream_produces_reports(struct i915_perf_stream *stream)
+{
+	struct drm_i915_private *dev_priv = stream->dev_priv;
+
+	return (stream->sample_flags & SAMPLE_OA_REPORT) != 0 &&
+		dev_priv->perf.oa.periodic;
+}
+
 /**
  * oa_buffer_check_unlocked - check for data and update tail ptr state
  * @dev_priv: i915 device instance
@@ -453,6 +461,8 @@  static bool oa_buffer_check_unlocked(struct drm_i915_private *dev_priv)
 	u32 head, hw_tail, aged_tail, aging_tail;
 	u64 now;
 
+	GEM_BUG_ON(!i915_perf_stream_produces_reports(dev_priv->perf.oa.exclusive_stream));
+
 	/* We have to consider the (unlikely) possibility that read() errors
 	 * could result in an OA buffer reset which might reset the head,
 	 * tails[] and aged_tail state.
@@ -1153,8 +1163,7 @@  static int i915_oa_wait_unlocked(struct i915_perf_stream *stream)
 {
 	struct drm_i915_private *dev_priv = stream->dev_priv;
 
-	/* We would wait indefinitely if periodic sampling is not enabled */
-	if (!dev_priv->perf.oa.periodic)
+	if (!i915_perf_stream_produces_reports(stream))
 		return -EIO;
 
 	return wait_event_interruptible(dev_priv->perf.oa.poll_wq,
@@ -1821,6 +1830,7 @@  static int gen8_enable_metric_set(struct drm_i915_private *dev_priv,
 				  const struct i915_perf_stream *stream)
 {
 	struct i915_oa_config *oa_config = stream->oa_config;
+	u32 oa_debug_value = 0;
 	int ret;
 
 	/*
@@ -1847,11 +1857,23 @@  static int gen8_enable_metric_set(struct drm_i915_private *dev_priv,
 	 * RPT_ID field.
 	 */
 	if (IS_GEN9(dev_priv) || IS_GEN10(dev_priv)) {
-		I915_WRITE(GEN8_OA_DEBUG,
-			   _MASKED_BIT_ENABLE(GEN9_OA_DEBUG_DISABLE_CLK_RATIO_REPORTS |
-					      GEN9_OA_DEBUG_INCLUDE_CLK_RATIO));
+		oa_debug_value |=
+			_MASKED_BIT_ENABLE(GEN9_OA_DEBUG_DISABLE_CLK_RATIO_REPORTS |
+					   GEN9_OA_DEBUG_INCLUDE_CLK_RATIO);
 	}
 
+	/*
+	 * If the user didn't require OA reports, instruct the hardware not to
+	 * emit ctx switch reports (mind your head, we might be disabling the
+	 * disable).
+	 */
+	oa_debug_value |=
+		(stream->sample_flags & SAMPLE_OA_REPORT) ?
+		_MASKED_BIT_DISABLE(GEN9_OA_DEBUG_DISABLE_CTX_SWITCH_REPORTS) :
+		_MASKED_BIT_ENABLE(GEN9_OA_DEBUG_DISABLE_CTX_SWITCH_REPORTS);
+
+	I915_WRITE(GEN8_OA_DEBUG, oa_debug_value);
+
 	/*
 	 * Update all contexts prior writing the mux configurations as we need
 	 * to make sure all slices/subslices are ON before writing to NOA
@@ -1960,7 +1982,7 @@  static void i915_oa_stream_enable(struct i915_perf_stream *stream)
 
 	dev_priv->perf.oa.ops.oa_enable(dev_priv, stream);
 
-	if (dev_priv->perf.oa.periodic)
+	if (i915_perf_stream_produces_reports(stream))
 		hrtimer_start(&dev_priv->perf.oa.poll_check_timer,
 			      ns_to_ktime(POLL_PERIOD),
 			      HRTIMER_MODE_REL_PINNED);
@@ -1990,7 +2012,7 @@  static void i915_oa_stream_disable(struct i915_perf_stream *stream)
 
 	dev_priv->perf.oa.ops.oa_disable(dev_priv);
 
-	if (dev_priv->perf.oa.periodic)
+	if (i915_perf_stream_produces_reports(stream))
 		hrtimer_cancel(&dev_priv->perf.oa.poll_check_timer);
 }
 
@@ -2038,11 +2060,6 @@  static int i915_oa_stream_init(struct i915_perf_stream *stream,
 		return -EINVAL;
 	}
 
-	if (!(props->sample_flags & SAMPLE_OA_REPORT)) {
-		DRM_DEBUG("Only OA report sampling supported\n");
-		return -EINVAL;
-	}
-
 	if (!dev_priv->perf.oa.ops.init_oa_buffer) {
 		DRM_DEBUG("OA unit not supported\n");
 		return -ENODEV;
@@ -2086,8 +2103,10 @@  static int i915_oa_stream_init(struct i915_perf_stream *stream,
 
 	format_size = dev_priv->perf.oa.oa_formats[props->oa_format].size;
 
-	stream->sample_flags |= SAMPLE_OA_REPORT;
-	stream->sample_size += format_size;
+	if (props->sample_flags & SAMPLE_OA_REPORT) {
+		stream->sample_flags |= SAMPLE_OA_REPORT;
+		stream->sample_size += format_size;
+	}
 
 	dev_priv->perf.oa.oa_buffer.format_size = format_size;
 	if (WARN_ON(dev_priv->perf.oa.oa_buffer.format_size == 0))
@@ -2255,6 +2274,12 @@  static ssize_t i915_perf_read(struct file *file,
 	struct drm_i915_private *dev_priv = stream->dev_priv;
 	ssize_t ret;
 
+	/* If the user opened the stream only to snapshot counters through
+	 * MI_RECORD_PERF_COUNT, there is nothing to read from the OA buffer.
+	 */
+	if (!i915_perf_stream_produces_reports(stream))
+		return -EIO;
+
 	/* To ensure it's handled consistently we simply treat all reads of a
 	 * disabled stream as an error. In particular it might otherwise lead
 	 * to a deadlock for blocking file descriptors...
@@ -2343,6 +2368,9 @@  static __poll_t i915_perf_poll_locked(struct drm_i915_private *dev_priv,
 {
 	__poll_t events = 0;
 
+	if (!i915_perf_stream_produces_reports(stream))
+		return events;
+
 	stream->ops->poll_wait(stream, file, wait);
 
 	/* Note: we don't explicitly check whether there's something to read