diff mbox series

drm/i915/perf: Configure OAR controls for specific context

Message ID 20191107233433.6928-1-umesh.nerlige.ramappa@intel.com (mailing list archive)
State New, archived
Headers show
Series drm/i915/perf: Configure OAR controls for specific context | expand

Commit Message

Umesh Nerlige Ramappa Nov. 7, 2019, 11:34 p.m. UTC
It turns out that the OAR CONTROL register is not getting configured
correctly in conjunction with the context save/restore bit. When
measuring work for a single context, the OAR counters do not increment.

- Configure OAR format and enable OAR counters at the same time as
  enabling context save/restore for OAR counters.
- Make SAMPLE_OA_REPORT optional from gen12.

v2: Update commit message

Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
---
 drivers/gpu/drm/i915/i915_perf.c | 23 +++++++++++++++--------
 1 file changed, 15 insertions(+), 8 deletions(-)

Comments

Lionel Landwerlin Nov. 8, 2019, 7:22 a.m. UTC | #1
On 08/11/2019 01:34, Umesh Nerlige Ramappa wrote:
> It turns out that the OAR CONTROL register is not getting configured
> correctly in conjunction with the context save/restore bit. When
> measuring work for a single context, the OAR counters do not increment.
>
> - Configure OAR format and enable OAR counters at the same time as
>    enabling context save/restore for OAR counters.
> - Make SAMPLE_OA_REPORT optional from gen12.
>
> v2: Update commit message
>
> Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
> ---
>   drivers/gpu/drm/i915/i915_perf.c | 23 +++++++++++++++--------
>   1 file changed, 15 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
> index 2c380aba1ce9..527a16637689 100644
> --- a/drivers/gpu/drm/i915/i915_perf.c
> +++ b/drivers/gpu/drm/i915/i915_perf.c
> @@ -2219,26 +2219,33 @@ static int gen8_configure_context(struct i915_gem_context *ctx,
>   	return err;
>   }
>   
> -static int gen12_emit_oar_config(struct intel_context *ce, bool enable)
> +static int gen12_emit_oar_config(struct i915_perf_stream *stream, bool enable)
>   {
>   	struct i915_request *rq;
> +	struct intel_context *ce = stream->pinned_ctx;
>   	u32 *cs;
> +	u32 format = stream->oa_buffer.format;
>   	int err = 0;
>   
>   	rq = i915_request_create(ce);
>   	if (IS_ERR(rq))
>   		return PTR_ERR(rq);
>   
> -	cs = intel_ring_begin(rq, 4);
> +	cs = intel_ring_begin(rq, 6);
>   	if (IS_ERR(cs)) {
>   		err = PTR_ERR(cs);
>   		goto out;
>   	}
>   
> -	*cs++ = MI_LOAD_REGISTER_IMM(1);
> +	*cs++ = MI_LOAD_REGISTER_IMM(2);
> +	/* Enable context save/restore of OAR counters */
>   	*cs++ = i915_mmio_reg_offset(RING_CONTEXT_CONTROL(ce->engine->mmio_base));
>   	*cs++ = _MASKED_FIELD(GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE,
>   			      enable ? GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE : 0);
> +	/* Enable OAR counters */
> +	*cs++ = i915_mmio_reg_offset(GEN12_OAR_OACONTROL);
> +	*cs++ = (format << GEN12_OAR_OACONTROL_COUNTER_FORMAT_SHIFT) |
> +		(enable ? GEN12_OAR_OACONTROL_COUNTER_ENABLE : 0);
>   	*cs++ = MI_NOOP;


It's probably a good idea to configure OAR in this function indeed :)


But we're already supposed to program it through context image 
modification in lrc_configure_all_contexts().

So it probably means we have the wrong offset?


We should probably remove it from lrc_configure_all_contexts() then. 
It's probably trashing some other bit of the context image.



>   
>   	intel_ring_advance(rq, cs);
> @@ -2474,8 +2481,7 @@ static int gen12_enable_metric_set(struct i915_perf_stream *stream)
>   	 * requested this.
>   	 */
>   	if (stream->ctx) {
> -		ret = gen12_emit_oar_config(stream->pinned_ctx,
> -					    oa_config != NULL);
> +		ret = gen12_emit_oar_config(stream, oa_config != NULL);
>   		if (ret)
>   			return ret;
>   	}
> @@ -2513,7 +2519,7 @@ static void gen12_disable_metric_set(struct i915_perf_stream *stream)
>   
>   	/* disable the context save/restore or OAR counters */
>   	if (stream->ctx)
> -		gen12_emit_oar_config(stream->pinned_ctx, false);
> +		gen12_emit_oar_config(stream, false);
>   
>   	/* Make sure we disable noa to save power. */
>   	intel_uncore_rmw(uncore, RPM_CONFIG1, GEN10_GT_NOA_ENABLE, 0);
> @@ -2713,7 +2719,8 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
>   		return -EINVAL;
>   	}
>   
> -	if (!(props->sample_flags & SAMPLE_OA_REPORT)) {
> +	if (!(props->sample_flags & SAMPLE_OA_REPORT) &&
> +	    (INTEL_GEN(perf->i915) < 12 || !stream->ctx)) {


Good point, but this should probably go into another patch.

Note that we could also consider not sampling the OA buffer a non 
privileged operation on Gen12+, since the counters are per context 
saved/restored.


Thanks,


-Lionel

>   		DRM_DEBUG("Only OA report sampling supported\n");
>   		return -EINVAL;
>   	}
> @@ -2745,7 +2752,7 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
>   
>   	format_size = perf->oa_formats[props->oa_format].size;
>   
> -	stream->sample_flags |= SAMPLE_OA_REPORT;
> +	stream->sample_flags = props->sample_flags;
>   	stream->sample_size += format_size;
>   
>   	stream->oa_buffer.format_size = format_size;
Umesh Nerlige Ramappa Nov. 10, 2019, 5:14 p.m. UTC | #2
On Fri, Nov 08, 2019 at 09:22:00AM +0200, Lionel Landwerlin wrote:
>On 08/11/2019 01:34, Umesh Nerlige Ramappa wrote:
>>It turns out that the OAR CONTROL register is not getting configured
>>correctly in conjunction with the context save/restore bit. When
>>measuring work for a single context, the OAR counters do not increment.
>>
>>- Configure OAR format and enable OAR counters at the same time as
>>   enabling context save/restore for OAR counters.
>>- Make SAMPLE_OA_REPORT optional from gen12.
>>
>>v2: Update commit message
>>
>>Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
>>---
>>  drivers/gpu/drm/i915/i915_perf.c | 23 +++++++++++++++--------
>>  1 file changed, 15 insertions(+), 8 deletions(-)
>>
>>diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
>>index 2c380aba1ce9..527a16637689 100644
>>--- a/drivers/gpu/drm/i915/i915_perf.c
>>+++ b/drivers/gpu/drm/i915/i915_perf.c
>>@@ -2219,26 +2219,33 @@ static int gen8_configure_context(struct i915_gem_context *ctx,
>>  	return err;
>>  }
>>-static int gen12_emit_oar_config(struct intel_context *ce, bool enable)
>>+static int gen12_emit_oar_config(struct i915_perf_stream *stream, bool enable)
>>  {
>>  	struct i915_request *rq;
>>+	struct intel_context *ce = stream->pinned_ctx;
>>  	u32 *cs;
>>+	u32 format = stream->oa_buffer.format;
>>  	int err = 0;
>>  	rq = i915_request_create(ce);
>>  	if (IS_ERR(rq))
>>  		return PTR_ERR(rq);
>>-	cs = intel_ring_begin(rq, 4);
>>+	cs = intel_ring_begin(rq, 6);
>>  	if (IS_ERR(cs)) {
>>  		err = PTR_ERR(cs);
>>  		goto out;
>>  	}
>>-	*cs++ = MI_LOAD_REGISTER_IMM(1);
>>+	*cs++ = MI_LOAD_REGISTER_IMM(2);
>>+	/* Enable context save/restore of OAR counters */
>>  	*cs++ = i915_mmio_reg_offset(RING_CONTEXT_CONTROL(ce->engine->mmio_base));
>>  	*cs++ = _MASKED_FIELD(GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE,
>>  			      enable ? GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE : 0);
>>+	/* Enable OAR counters */
>>+	*cs++ = i915_mmio_reg_offset(GEN12_OAR_OACONTROL);
>>+	*cs++ = (format << GEN12_OAR_OACONTROL_COUNTER_FORMAT_SHIFT) |
>>+		(enable ? GEN12_OAR_OACONTROL_COUNTER_ENABLE : 0);
>>  	*cs++ = MI_NOOP;
>
>
>It's probably a good idea to configure OAR in this function indeed :)
>
>
>But we're already supposed to program it through context image 
>modification in lrc_configure_all_contexts().
>
>So it probably means we have the wrong offset?
>
>
>We should probably remove it from lrc_configure_all_contexts() then. 
>It's probably trashing some other bit of the context image.
>

Offset is correct.

When I removed the configuration from lrc_configure_all_contexts(), the 
test broke :( 

Debugging further, it looks like OAR config (OAR_OACONTROL and 
RING_CONTEXT_CONTROL) must be applied to pinned ctx image as well as 
with LRI using kernel_context

For gen12, lrc_configure_all_contexts only needs to configure the stable 
power state.

Posting rev2, verified with test as posted here - 
https://patchwork.freedesktop.org/patch/339514/?series=69164&rev=1 

The expectation is that oar counters are saved and restored only for the 
context passed in perf open ioctl. Also, if some other context issues 
MI_RPC, it should get valid timestamps, context id etc.., but zeroed 
counter reports. Let me know if this is not the right understanding. 

>
>
>>  	intel_ring_advance(rq, cs);
>>@@ -2474,8 +2481,7 @@ static int gen12_enable_metric_set(struct i915_perf_stream *stream)
>>  	 * requested this.
>>  	 */
>>  	if (stream->ctx) {
>>-		ret = gen12_emit_oar_config(stream->pinned_ctx,
>>-					    oa_config != NULL);
>>+		ret = gen12_emit_oar_config(stream, oa_config != NULL);
>>  		if (ret)
>>  			return ret;
>>  	}
>>@@ -2513,7 +2519,7 @@ static void gen12_disable_metric_set(struct i915_perf_stream *stream)
>>  	/* disable the context save/restore or OAR counters */
>>  	if (stream->ctx)
>>-		gen12_emit_oar_config(stream->pinned_ctx, false);
>>+		gen12_emit_oar_config(stream, false);
>>  	/* Make sure we disable noa to save power. */
>>  	intel_uncore_rmw(uncore, RPM_CONFIG1, GEN10_GT_NOA_ENABLE, 0);
>>@@ -2713,7 +2719,8 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
>>  		return -EINVAL;
>>  	}
>>-	if (!(props->sample_flags & SAMPLE_OA_REPORT)) {
>>+	if (!(props->sample_flags & SAMPLE_OA_REPORT) &&
>>+	    (INTEL_GEN(perf->i915) < 12 || !stream->ctx)) {
>
>
>Good point, but this should probably go into another patch.
>
>Note that we could also consider not sampling the OA buffer a non 
>privileged operation on Gen12+, since the counters are per context 
>saved/restored.
>

The TGL support patch already makes 'not sampling the OA buffer' 
non-privileged. These changes are only clearing the path. I have moved 
them to a separate patch in v2.

Thanks,
Umesh

>
>Thanks,
>
>
>-Lionel
>
>>  		DRM_DEBUG("Only OA report sampling supported\n");
>>  		return -EINVAL;
>>  	}
>>@@ -2745,7 +2752,7 @@ static int i915_oa_stream_init(struct i915_perf_stream *stream,
>>  	format_size = perf->oa_formats[props->oa_format].size;
>>-	stream->sample_flags |= SAMPLE_OA_REPORT;
>>+	stream->sample_flags = props->sample_flags;
>>  	stream->sample_size += format_size;
>>  	stream->oa_buffer.format_size = format_size;
>
>
Lionel Landwerlin Nov. 11, 2019, 9:38 a.m. UTC | #3
On 10/11/2019 19:14, Umesh Nerlige Ramappa wrote:
> On Fri, Nov 08, 2019 at 09:22:00AM +0200, Lionel Landwerlin wrote:
>> On 08/11/2019 01:34, Umesh Nerlige Ramappa wrote:
>>> It turns out that the OAR CONTROL register is not getting configured
>>> correctly in conjunction with the context save/restore bit. When
>>> measuring work for a single context, the OAR counters do not increment.
>>>
>>> - Configure OAR format and enable OAR counters at the same time as
>>>   enabling context save/restore for OAR counters.
>>> - Make SAMPLE_OA_REPORT optional from gen12.
>>>
>>> v2: Update commit message
>>>
>>> Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
>>> ---
>>>  drivers/gpu/drm/i915/i915_perf.c | 23 +++++++++++++++--------
>>>  1 file changed, 15 insertions(+), 8 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/i915_perf.c 
>>> b/drivers/gpu/drm/i915/i915_perf.c
>>> index 2c380aba1ce9..527a16637689 100644
>>> --- a/drivers/gpu/drm/i915/i915_perf.c
>>> +++ b/drivers/gpu/drm/i915/i915_perf.c
>>> @@ -2219,26 +2219,33 @@ static int gen8_configure_context(struct 
>>> i915_gem_context *ctx,
>>>      return err;
>>>  }
>>> -static int gen12_emit_oar_config(struct intel_context *ce, bool 
>>> enable)
>>> +static int gen12_emit_oar_config(struct i915_perf_stream *stream, 
>>> bool enable)
>>>  {
>>>      struct i915_request *rq;
>>> +    struct intel_context *ce = stream->pinned_ctx;
>>>      u32 *cs;
>>> +    u32 format = stream->oa_buffer.format;
>>>      int err = 0;
>>>      rq = i915_request_create(ce);
>>>      if (IS_ERR(rq))
>>>          return PTR_ERR(rq);
>>> -    cs = intel_ring_begin(rq, 4);
>>> +    cs = intel_ring_begin(rq, 6);
>>>      if (IS_ERR(cs)) {
>>>          err = PTR_ERR(cs);
>>>          goto out;
>>>      }
>>> -    *cs++ = MI_LOAD_REGISTER_IMM(1);
>>> +    *cs++ = MI_LOAD_REGISTER_IMM(2);
>>> +    /* Enable context save/restore of OAR counters */
>>>      *cs++ = 
>>> i915_mmio_reg_offset(RING_CONTEXT_CONTROL(ce->engine->mmio_base));
>>>      *cs++ = _MASKED_FIELD(GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE,
>>>                    enable ? GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE : 0);
>>> +    /* Enable OAR counters */
>>> +    *cs++ = i915_mmio_reg_offset(GEN12_OAR_OACONTROL);
>>> +    *cs++ = (format << GEN12_OAR_OACONTROL_COUNTER_FORMAT_SHIFT) |
>>> +        (enable ? GEN12_OAR_OACONTROL_COUNTER_ENABLE : 0);
>>>      *cs++ = MI_NOOP;
>>
>>
>> It's probably a good idea to configure OAR in this function indeed :)
>>
>>
>> But we're already supposed to program it through context image 
>> modification in lrc_configure_all_contexts().
>>
>> So it probably means we have the wrong offset?
>>
>>
>> We should probably remove it from lrc_configure_all_contexts() then. 
>> It's probably trashing some other bit of the context image.
>>
>
> Offset is correct.
>
> When I removed the configuration from lrc_configure_all_contexts(), 
> the test broke :(
> Debugging further, it looks like OAR config (OAR_OACONTROL and 
> RING_CONTEXT_CONTROL) must be applied to pinned ctx image as well as 
> with LRI using kernel_context


We must be missing something, because that doesn't make sense. 
OAR_OACONTROL ought to be needed only for pinned_ctx.

If we program it through gen12_emit_oar_config() there is no need to do 
it in lrc_configure_all_contexts() as well.



>
> For gen12, lrc_configure_all_contexts only needs to configure the 
> stable power state.
>
> Posting rev2, verified with test as posted here - 
> https://patchwork.freedesktop.org/patch/339514/?series=69164&rev=1
> The expectation is that oar counters are saved and restored only for 
> the context passed in perf open ioctl. Also, if some other context 
> issues MI_RPC, it should get valid timestamps, context id etc.., but 
> zeroed counter reports. Let me know if this is not the right 
> understanding.


If I remember correctly, the documentation says MI_RPC results are 
undefined if OAR is not enabled.

So I wouldn't even expect timestamps/context-id to be valid.


>>
>>
>>>      intel_ring_advance(rq, cs);
>>> @@ -2474,8 +2481,7 @@ static int gen12_enable_metric_set(struct 
>>> i915_perf_stream *stream)
>>>       * requested this.
>>>       */
>>>      if (stream->ctx) {
>>> -        ret = gen12_emit_oar_config(stream->pinned_ctx,
>>> -                        oa_config != NULL);
>>> +        ret = gen12_emit_oar_config(stream, oa_config != NULL);
>>>          if (ret)
>>>              return ret;
>>>      }
>>> @@ -2513,7 +2519,7 @@ static void gen12_disable_metric_set(struct 
>>> i915_perf_stream *stream)
>>>      /* disable the context save/restore or OAR counters */
>>>      if (stream->ctx)
>>> -        gen12_emit_oar_config(stream->pinned_ctx, false);
>>> +        gen12_emit_oar_config(stream, false);
>>>      /* Make sure we disable noa to save power. */
>>>      intel_uncore_rmw(uncore, RPM_CONFIG1, GEN10_GT_NOA_ENABLE, 0);
>>> @@ -2713,7 +2719,8 @@ static int i915_oa_stream_init(struct 
>>> i915_perf_stream *stream,
>>>          return -EINVAL;
>>>      }
>>> -    if (!(props->sample_flags & SAMPLE_OA_REPORT)) {
>>> +    if (!(props->sample_flags & SAMPLE_OA_REPORT) &&
>>> +        (INTEL_GEN(perf->i915) < 12 || !stream->ctx)) {
>>
>>
>> Good point, but this should probably go into another patch.
>>
>> Note that we could also consider not sampling the OA buffer a non 
>> privileged operation on Gen12+, since the counters are per context 
>> saved/restored.
>>
>
> The TGL support patch already makes 'not sampling the OA buffer' 
> non-privileged. These changes are only clearing the path. I have moved 
> them to a separate patch in v2.


Oh... Thanks I didn't remember that.



>
> Thanks,
> Umesh
>
>>
>> Thanks,
>>
>>
>> -Lionel
>>
>>>          DRM_DEBUG("Only OA report sampling supported\n");
>>>          return -EINVAL;
>>>      }
>>> @@ -2745,7 +2752,7 @@ static int i915_oa_stream_init(struct 
>>> i915_perf_stream *stream,
>>>      format_size = perf->oa_formats[props->oa_format].size;
>>> -    stream->sample_flags |= SAMPLE_OA_REPORT;
>>> +    stream->sample_flags = props->sample_flags;
>>>      stream->sample_size += format_size;
>>>      stream->oa_buffer.format_size = format_size;
>>
>>
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 2c380aba1ce9..527a16637689 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -2219,26 +2219,33 @@  static int gen8_configure_context(struct i915_gem_context *ctx,
 	return err;
 }
 
-static int gen12_emit_oar_config(struct intel_context *ce, bool enable)
+static int gen12_emit_oar_config(struct i915_perf_stream *stream, bool enable)
 {
 	struct i915_request *rq;
+	struct intel_context *ce = stream->pinned_ctx;
 	u32 *cs;
+	u32 format = stream->oa_buffer.format;
 	int err = 0;
 
 	rq = i915_request_create(ce);
 	if (IS_ERR(rq))
 		return PTR_ERR(rq);
 
-	cs = intel_ring_begin(rq, 4);
+	cs = intel_ring_begin(rq, 6);
 	if (IS_ERR(cs)) {
 		err = PTR_ERR(cs);
 		goto out;
 	}
 
-	*cs++ = MI_LOAD_REGISTER_IMM(1);
+	*cs++ = MI_LOAD_REGISTER_IMM(2);
+	/* Enable context save/restore of OAR counters */
 	*cs++ = i915_mmio_reg_offset(RING_CONTEXT_CONTROL(ce->engine->mmio_base));
 	*cs++ = _MASKED_FIELD(GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE,
 			      enable ? GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE : 0);
+	/* Enable OAR counters */
+	*cs++ = i915_mmio_reg_offset(GEN12_OAR_OACONTROL);
+	*cs++ = (format << GEN12_OAR_OACONTROL_COUNTER_FORMAT_SHIFT) |
+		(enable ? GEN12_OAR_OACONTROL_COUNTER_ENABLE : 0);
 	*cs++ = MI_NOOP;
 
 	intel_ring_advance(rq, cs);
@@ -2474,8 +2481,7 @@  static int gen12_enable_metric_set(struct i915_perf_stream *stream)
 	 * requested this.
 	 */
 	if (stream->ctx) {
-		ret = gen12_emit_oar_config(stream->pinned_ctx,
-					    oa_config != NULL);
+		ret = gen12_emit_oar_config(stream, oa_config != NULL);
 		if (ret)
 			return ret;
 	}
@@ -2513,7 +2519,7 @@  static void gen12_disable_metric_set(struct i915_perf_stream *stream)
 
 	/* disable the context save/restore or OAR counters */
 	if (stream->ctx)
-		gen12_emit_oar_config(stream->pinned_ctx, false);
+		gen12_emit_oar_config(stream, false);
 
 	/* Make sure we disable noa to save power. */
 	intel_uncore_rmw(uncore, RPM_CONFIG1, GEN10_GT_NOA_ENABLE, 0);
@@ -2713,7 +2719,8 @@  static int i915_oa_stream_init(struct i915_perf_stream *stream,
 		return -EINVAL;
 	}
 
-	if (!(props->sample_flags & SAMPLE_OA_REPORT)) {
+	if (!(props->sample_flags & SAMPLE_OA_REPORT) &&
+	    (INTEL_GEN(perf->i915) < 12 || !stream->ctx)) {
 		DRM_DEBUG("Only OA report sampling supported\n");
 		return -EINVAL;
 	}
@@ -2745,7 +2752,7 @@  static int i915_oa_stream_init(struct i915_perf_stream *stream,
 
 	format_size = perf->oa_formats[props->oa_format].size;
 
-	stream->sample_flags |= SAMPLE_OA_REPORT;
+	stream->sample_flags = props->sample_flags;
 	stream->sample_size += format_size;
 
 	stream->oa_buffer.format_size = format_size;