[v2,2/3] drm/i915/perf: enable OAR context save/restore of performance counters
diff mbox series

Message ID 20191018005028.47441-2-umesh.nerlige.ramappa@intel.com
State New
Headers show
Series
  • [v2,1/3] drm/i915/perf: Add helper macros for comparing with whitelisted registers
Related show

Commit Message

Umesh Nerlige Ramappa Oct. 18, 2019, 12:50 a.m. UTC
From: Lionel Landwerlin <lionel.g.landwerlin@intel.com>

We want this so we can preempt performance queries and keep the system
responsive even when long running queries are ongoing. We avoid doing
it for all contexts.

v2: use LRI to modify context control (Chris)
v3: use MASKED_FIELD to program just the masked bits (Chris)
v4: reuse request created during emit_oa_config (Lionel)

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_lrc.h |  1 +
 drivers/gpu/drm/i915/i915_perf.c    | 35 ++++++++++++++++++++++++++++-
 2 files changed, 35 insertions(+), 1 deletion(-)

Comments

Lionel Landwerlin Oct. 18, 2019, 7:54 a.m. UTC | #1
On 18/10/2019 03:50, Umesh Nerlige Ramappa wrote:
> From: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
>
> We want this so we can preempt performance queries and keep the system
> responsive even when long running queries are ongoing. We avoid doing
> it for all contexts.
>
> v2: use LRI to modify context control (Chris)
> v3: use MASKED_FIELD to program just the masked bits (Chris)
> v4: reuse request created during emit_oa_config (Lionel)
>
> Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
> Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>


Looks good :


Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>


> ---
>   drivers/gpu/drm/i915/gt/intel_lrc.h |  1 +
>   drivers/gpu/drm/i915/i915_perf.c    | 35 ++++++++++++++++++++++++++++-
>   2 files changed, 35 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.h b/drivers/gpu/drm/i915/gt/intel_lrc.h
> index 99dc576a4e25..b6daac712c9e 100644
> --- a/drivers/gpu/drm/i915/gt/intel_lrc.h
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.h
> @@ -43,6 +43,7 @@ struct intel_engine_cs;
>   #define	  CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT	(1 << 0)
>   #define   CTX_CTRL_RS_CTX_ENABLE		(1 << 1)
>   #define	  CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT	(1 << 2)
> +#define	  GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE	(1 << 8)
>   #define RING_CONTEXT_STATUS_PTR(base)		_MMIO((base) + 0x3a0)
>   #define RING_EXECLIST_SQ_CONTENTS(base)		_MMIO((base) + 0x510)
>   #define RING_EXECLIST_CONTROL(base)		_MMIO((base) + 0x550)
> diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
> index 91707558a0f5..ce97af484a32 100644
> --- a/drivers/gpu/drm/i915/i915_perf.c
> +++ b/drivers/gpu/drm/i915/i915_perf.c
> @@ -1862,14 +1862,36 @@ get_oa_vma(struct i915_perf_stream *stream, struct i915_oa_config *oa_config)
>   	return i915_vma_get(oa_bo->vma);
>   }
>   
> +static int gen12_emit_oar_config(struct i915_request *rq,
> +				 struct intel_context *ce,
> +				 bool enable)
> +{
> +	u32 *cs;
> +
> +	cs = intel_ring_begin(rq, 4);
> +	if (IS_ERR(cs))
> +		return PTR_ERR(cs);
> +
> +	*cs++ = MI_LOAD_REGISTER_IMM(1);
> +	*cs++ = i915_mmio_reg_offset(RING_CONTEXT_CONTROL(ce->engine->mmio_base));
> +	*cs++ = _MASKED_FIELD(GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE,
> +			      enable ? GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE : 0);
> +	*cs++ = MI_NOOP;
> +
> +	intel_ring_advance(rq, cs);
> +
> +	return 0;
> +}
> +
>   static int emit_oa_config(struct i915_perf_stream *stream,
>   			  struct intel_context *ce)
>   {
> +	struct i915_oa_config *oa_config = stream->oa_config;
>   	struct i915_request *rq;
>   	struct i915_vma *vma;
>   	int err;
>   
> -	vma = get_oa_vma(stream, stream->oa_config);
> +	vma = get_oa_vma(stream, oa_config);
>   	if (IS_ERR(vma))
>   		return PTR_ERR(vma);
>   
> @@ -1891,6 +1913,17 @@ static int emit_oa_config(struct i915_perf_stream *stream,
>   	if (err)
>   		goto err_add_request;
>   
> +	/*
> +	 * For Gen12, performance counters are context saved/restored.
> +	 * Only enable it for the context that requested this.
> +	 */
> +	if (ce == stream->pinned_ctx && IS_GEN(stream->perf->i915, 12)) {
> +		err = gen12_emit_oar_config(rq, ce, oa_config != NULL);
> +		if (err)
> +			goto err_add_request;
> +
> +	}
> +
>   	err = rq->engine->emit_bb_start(rq,
>   					vma->node.start, 0,
>   					I915_DISPATCH_SECURE);
Chris Wilson Oct. 18, 2019, 11:22 p.m. UTC | #2
Quoting Umesh Nerlige Ramappa (2019-10-18 01:50:27)
> From: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
> 
> We want this so we can preempt performance queries and keep the system
> responsive even when long running queries are ongoing. We avoid doing
> it for all contexts.
> 
> v2: use LRI to modify context control (Chris)
> v3: use MASKED_FIELD to program just the masked bits (Chris)
> v4: reuse request created during emit_oa_config (Lionel)
> 
> Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
> Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
> ---
>  drivers/gpu/drm/i915/gt/intel_lrc.h |  1 +
>  drivers/gpu/drm/i915/i915_perf.c    | 35 ++++++++++++++++++++++++++++-
>  2 files changed, 35 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.h b/drivers/gpu/drm/i915/gt/intel_lrc.h
> index 99dc576a4e25..b6daac712c9e 100644
> --- a/drivers/gpu/drm/i915/gt/intel_lrc.h
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.h
> @@ -43,6 +43,7 @@ struct intel_engine_cs;
>  #define          CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT   (1 << 0)
>  #define   CTX_CTRL_RS_CTX_ENABLE               (1 << 1)
>  #define          CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT      (1 << 2)
> +#define          GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE     (1 << 8)
>  #define RING_CONTEXT_STATUS_PTR(base)          _MMIO((base) + 0x3a0)
>  #define RING_EXECLIST_SQ_CONTENTS(base)                _MMIO((base) + 0x510)
>  #define RING_EXECLIST_CONTROL(base)            _MMIO((base) + 0x550)
> diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
> index 91707558a0f5..ce97af484a32 100644
> --- a/drivers/gpu/drm/i915/i915_perf.c
> +++ b/drivers/gpu/drm/i915/i915_perf.c
> @@ -1862,14 +1862,36 @@ get_oa_vma(struct i915_perf_stream *stream, struct i915_oa_config *oa_config)
>         return i915_vma_get(oa_bo->vma);
>  }
>  
> +static int gen12_emit_oar_config(struct i915_request *rq,
> +                                struct intel_context *ce,
> +                                bool enable)
> +{
> +       u32 *cs;
> +
> +       cs = intel_ring_begin(rq, 4);
> +       if (IS_ERR(cs))
> +               return PTR_ERR(cs);
> +
> +       *cs++ = MI_LOAD_REGISTER_IMM(1);
> +       *cs++ = i915_mmio_reg_offset(RING_CONTEXT_CONTROL(ce->engine->mmio_base));
> +       *cs++ = _MASKED_FIELD(GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE,
> +                             enable ? GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE : 0);
> +       *cs++ = MI_NOOP;
> +
> +       intel_ring_advance(rq, cs);
> +
> +       return 0;
> +}
> +
>  static int emit_oa_config(struct i915_perf_stream *stream,
>                           struct intel_context *ce)
>  {
> +       struct i915_oa_config *oa_config = stream->oa_config;
>         struct i915_request *rq;
>         struct i915_vma *vma;
>         int err;
>  
> -       vma = get_oa_vma(stream, stream->oa_config);
> +       vma = get_oa_vma(stream, oa_config);
>         if (IS_ERR(vma))
>                 return PTR_ERR(vma);
>  
> @@ -1891,6 +1913,17 @@ static int emit_oa_config(struct i915_perf_stream *stream,
>         if (err)
>                 goto err_add_request;
>  
> +       /*
> +        * For Gen12, performance counters are context saved/restored.
> +        * Only enable it for the context that requested this.
> +        */
> +       if (ce == stream->pinned_ctx && IS_GEN(stream->perf->i915, 12)) {
> +               err = gen12_emit_oar_config(rq, ce, oa_config != NULL);
> +               if (err)
> +                       goto err_add_request;
> +
> +       }

Do we need to do emit_oa_config(stream, NULL) in
gen8_disable_metric_set()?
-Chris
---------------------------------------------------------------------
Intel Corporation (UK) Limited
Registered No. 1134945 (England)
Registered Office: Pipers Way, Swindon SN3 1RJ
VAT No: 860 2173 47

This e-mail and any attachments may contain confidential material for
the sole use of the intended recipient(s). Any review or distribution
by others is strictly prohibited. If you are not the intended
recipient, please contact the sender and delete all copies.
Lionel Landwerlin Oct. 19, 2019, 7:18 a.m. UTC | #3
On 19/10/2019 02:22, Chris Wilson wrote:
> Quoting Umesh Nerlige Ramappa (2019-10-18 01:50:27)
>> From: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
>>
>> We want this so we can preempt performance queries and keep the system
>> responsive even when long running queries are ongoing. We avoid doing
>> it for all contexts.
>>
>> v2: use LRI to modify context control (Chris)
>> v3: use MASKED_FIELD to program just the masked bits (Chris)
>> v4: reuse request created during emit_oa_config (Lionel)
>>
>> Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
>> Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
>> ---
>>   drivers/gpu/drm/i915/gt/intel_lrc.h |  1 +
>>   drivers/gpu/drm/i915/i915_perf.c    | 35 ++++++++++++++++++++++++++++-
>>   2 files changed, 35 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.h b/drivers/gpu/drm/i915/gt/intel_lrc.h
>> index 99dc576a4e25..b6daac712c9e 100644
>> --- a/drivers/gpu/drm/i915/gt/intel_lrc.h
>> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.h
>> @@ -43,6 +43,7 @@ struct intel_engine_cs;
>>   #define          CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT   (1 << 0)
>>   #define   CTX_CTRL_RS_CTX_ENABLE               (1 << 1)
>>   #define          CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT      (1 << 2)
>> +#define          GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE     (1 << 8)
>>   #define RING_CONTEXT_STATUS_PTR(base)          _MMIO((base) + 0x3a0)
>>   #define RING_EXECLIST_SQ_CONTENTS(base)                _MMIO((base) + 0x510)
>>   #define RING_EXECLIST_CONTROL(base)            _MMIO((base) + 0x550)
>> diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
>> index 91707558a0f5..ce97af484a32 100644
>> --- a/drivers/gpu/drm/i915/i915_perf.c
>> +++ b/drivers/gpu/drm/i915/i915_perf.c
>> @@ -1862,14 +1862,36 @@ get_oa_vma(struct i915_perf_stream *stream, struct i915_oa_config *oa_config)
>>          return i915_vma_get(oa_bo->vma);
>>   }
>>   
>> +static int gen12_emit_oar_config(struct i915_request *rq,
>> +                                struct intel_context *ce,
>> +                                bool enable)
>> +{
>> +       u32 *cs;
>> +
>> +       cs = intel_ring_begin(rq, 4);
>> +       if (IS_ERR(cs))
>> +               return PTR_ERR(cs);
>> +
>> +       *cs++ = MI_LOAD_REGISTER_IMM(1);
>> +       *cs++ = i915_mmio_reg_offset(RING_CONTEXT_CONTROL(ce->engine->mmio_base));
>> +       *cs++ = _MASKED_FIELD(GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE,
>> +                             enable ? GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE : 0);
>> +       *cs++ = MI_NOOP;
>> +
>> +       intel_ring_advance(rq, cs);
>> +
>> +       return 0;
>> +}
>> +
>>   static int emit_oa_config(struct i915_perf_stream *stream,
>>                            struct intel_context *ce)
>>   {
>> +       struct i915_oa_config *oa_config = stream->oa_config;
>>          struct i915_request *rq;
>>          struct i915_vma *vma;
>>          int err;
>>   
>> -       vma = get_oa_vma(stream, stream->oa_config);
>> +       vma = get_oa_vma(stream, oa_config);
>>          if (IS_ERR(vma))
>>                  return PTR_ERR(vma);
>>   
>> @@ -1891,6 +1913,17 @@ static int emit_oa_config(struct i915_perf_stream *stream,
>>          if (err)
>>                  goto err_add_request;
>>   
>> +       /*
>> +        * For Gen12, performance counters are context saved/restored.
>> +        * Only enable it for the context that requested this.
>> +        */
>> +       if (ce == stream->pinned_ctx && IS_GEN(stream->perf->i915, 12)) {
>> +               err = gen12_emit_oar_config(rq, ce, oa_config != NULL);
>> +               if (err)
>> +                       goto err_add_request;
>> +
>> +       }
> Do we need to do emit_oa_config(stream, NULL) in
> gen8_disable_metric_set()?
> -Chris
>
Oh crap... I must have pointed to the wrong function then...

I thought this was called from enable/disable_metric_set().


I guess the best thing would be to call this from 
gen12_enable/disable_metric_set().

But then we would need to create a request for the pinned_ctx.

Sorry :(


-Lionel

Patch
diff mbox series

diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.h b/drivers/gpu/drm/i915/gt/intel_lrc.h
index 99dc576a4e25..b6daac712c9e 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.h
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.h
@@ -43,6 +43,7 @@  struct intel_engine_cs;
 #define	  CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT	(1 << 0)
 #define   CTX_CTRL_RS_CTX_ENABLE		(1 << 1)
 #define	  CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT	(1 << 2)
+#define	  GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE	(1 << 8)
 #define RING_CONTEXT_STATUS_PTR(base)		_MMIO((base) + 0x3a0)
 #define RING_EXECLIST_SQ_CONTENTS(base)		_MMIO((base) + 0x510)
 #define RING_EXECLIST_CONTROL(base)		_MMIO((base) + 0x550)
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 91707558a0f5..ce97af484a32 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -1862,14 +1862,36 @@  get_oa_vma(struct i915_perf_stream *stream, struct i915_oa_config *oa_config)
 	return i915_vma_get(oa_bo->vma);
 }
 
+static int gen12_emit_oar_config(struct i915_request *rq,
+				 struct intel_context *ce,
+				 bool enable)
+{
+	u32 *cs;
+
+	cs = intel_ring_begin(rq, 4);
+	if (IS_ERR(cs))
+		return PTR_ERR(cs);
+
+	*cs++ = MI_LOAD_REGISTER_IMM(1);
+	*cs++ = i915_mmio_reg_offset(RING_CONTEXT_CONTROL(ce->engine->mmio_base));
+	*cs++ = _MASKED_FIELD(GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE,
+			      enable ? GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE : 0);
+	*cs++ = MI_NOOP;
+
+	intel_ring_advance(rq, cs);
+
+	return 0;
+}
+
 static int emit_oa_config(struct i915_perf_stream *stream,
 			  struct intel_context *ce)
 {
+	struct i915_oa_config *oa_config = stream->oa_config;
 	struct i915_request *rq;
 	struct i915_vma *vma;
 	int err;
 
-	vma = get_oa_vma(stream, stream->oa_config);
+	vma = get_oa_vma(stream, oa_config);
 	if (IS_ERR(vma))
 		return PTR_ERR(vma);
 
@@ -1891,6 +1913,17 @@  static int emit_oa_config(struct i915_perf_stream *stream,
 	if (err)
 		goto err_add_request;
 
+	/*
+	 * For Gen12, performance counters are context saved/restored.
+	 * Only enable it for the context that requested this.
+	 */
+	if (ce == stream->pinned_ctx && IS_GEN(stream->perf->i915, 12)) {
+		err = gen12_emit_oar_config(rq, ce, oa_config != NULL);
+		if (err)
+			goto err_add_request;
+
+	}
+
 	err = rq->engine->emit_bb_start(rq,
 					vma->node.start, 0,
 					I915_DISPATCH_SECURE);