diff mbox series

[v5,1/4] drm/i915/perf: break OA config buffer object in 2

Message ID 20200409091706.690045-1-lionel.g.landwerlin@intel.com (mailing list archive)
State New, archived
Headers show
Series [v5,1/4] drm/i915/perf: break OA config buffer object in 2 | expand

Commit Message

Lionel Landwerlin April 9, 2020, 9:17 a.m. UTC
We want to enable performance monitoring on multiple contexts to cover
the Iris use case of using 2 GEM contexts (3D & compute).

So start by breaking the OA configuration BO which contains global &
per context register writes.

NOA muxes & OA configurations are global, while FLEXEU register
configurations are per context.

v2: Use an offset into the same VMA (Chris)

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
---
 drivers/gpu/drm/i915/i915_perf.c | 176 ++++++++++++++++++++-----------
 1 file changed, 116 insertions(+), 60 deletions(-)

Comments

Umesh Nerlige Ramappa April 13, 2020, 11:24 p.m. UTC | #1
Hi Lionel,

What's the implication of using separate contexts for 3d and compute on 
perf OA? Is it only context-filtering? If so, have you considered 
disabling context filtering with a parameter instead of actually 
filtering for specific contexts? Is this privileged use case?

Thanks,
Umesh

On Thu, Apr 09, 2020 at 12:17:03PM +0300, Lionel Landwerlin wrote:
>We want to enable performance monitoring on multiple contexts to cover
>the Iris use case of using 2 GEM contexts (3D & compute).
>
>So start by breaking the OA configuration BO which contains global &
>per context register writes.
>
>NOA muxes & OA configurations are global, while FLEXEU register
>configurations are per context.
>
>v2: Use an offset into the same VMA (Chris)
>
>Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
>---
> drivers/gpu/drm/i915/i915_perf.c | 176 ++++++++++++++++++++-----------
> 1 file changed, 116 insertions(+), 60 deletions(-)
>
>diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
>index 5cde3e4e7be6..d2183fd701a3 100644
>--- a/drivers/gpu/drm/i915/i915_perf.c
>+++ b/drivers/gpu/drm/i915/i915_perf.c
>@@ -372,6 +372,7 @@ struct i915_oa_config_bo {
>
> 	struct i915_oa_config *oa_config;
> 	struct i915_vma *vma;
>+	u32 per_context_offset;
> };
>
> static struct ctl_table_header *sysctl_header;
>@@ -1826,37 +1827,43 @@ static struct i915_oa_config_bo *
> alloc_oa_config_buffer(struct i915_perf_stream *stream,
> 		       struct i915_oa_config *oa_config)
> {
>-	struct drm_i915_gem_object *obj;
> 	struct i915_oa_config_bo *oa_bo;
>+	struct drm_i915_gem_object *obj;
> 	size_t config_length = 0;
>-	u32 *cs;
>+	u32 *cs_start, *cs;
> 	int err;
>
> 	oa_bo = kzalloc(sizeof(*oa_bo), GFP_KERNEL);
> 	if (!oa_bo)
> 		return ERR_PTR(-ENOMEM);
>
>+	/*
>+	 * Global configuration requires a jump into the NOA wait BO for it to
>+	 * apply.
>+	 */
> 	config_length += num_lri_dwords(oa_config->mux_regs_len);
> 	config_length += num_lri_dwords(oa_config->b_counter_regs_len);
>-	config_length += num_lri_dwords(oa_config->flex_regs_len);
> 	config_length += 3; /* MI_BATCH_BUFFER_START */
>+
>+	config_length += num_lri_dwords(oa_config->flex_regs_len);
>+	config_length += 1 /* MI_BATCH_BUFFER_END */;
>+
> 	config_length = ALIGN(sizeof(u32) * config_length, I915_GTT_PAGE_SIZE);
>
>-	obj = i915_gem_object_create_shmem(stream->perf->i915, config_length);
>+	obj = i915_gem_object_create_shmem(stream->perf->i915,
>+					   config_length);
> 	if (IS_ERR(obj)) {
> 		err = PTR_ERR(obj);
> 		goto err_free;
> 	}
>
>-	cs = i915_gem_object_pin_map(obj, I915_MAP_WB);
>-	if (IS_ERR(cs)) {
>-		err = PTR_ERR(cs);
>-		goto err_oa_bo;
>+	cs_start = i915_gem_object_pin_map(obj, I915_MAP_WB);
>+	if (IS_ERR(cs_start)) {
>+		err = PTR_ERR(cs_start);
>+		goto err_bo;
> 	}
>
>-	cs = write_cs_mi_lri(cs,
>-			     oa_config->mux_regs,
>-			     oa_config->mux_regs_len);
>+	cs = cs_start;
> 	cs = write_cs_mi_lri(cs,
> 			     oa_config->b_counter_regs,
> 			     oa_config->b_counter_regs_len);
>@@ -1871,6 +1878,14 @@ alloc_oa_config_buffer(struct i915_perf_stream *stream,
> 	*cs++ = i915_ggtt_offset(stream->noa_wait);
> 	*cs++ = 0;
>
>+	oa_bo->per_context_offset = 4 * (cs - cs_start);
>+
>+	cs = write_cs_mi_lri(cs,
>+			     oa_config->mux_regs,
>+			     oa_config->mux_regs_len);
>+
>+	*cs++ = MI_BATCH_BUFFER_END;
>+
> 	i915_gem_object_flush_map(obj);
> 	i915_gem_object_unpin_map(obj);
>
>@@ -1879,7 +1894,7 @@ alloc_oa_config_buffer(struct i915_perf_stream *stream,
> 				       NULL);
> 	if (IS_ERR(oa_bo->vma)) {
> 		err = PTR_ERR(oa_bo->vma);
>-		goto err_oa_bo;
>+		goto err_bo;
> 	}
>
> 	oa_bo->oa_config = i915_oa_config_get(oa_config);
>@@ -1887,15 +1902,15 @@ alloc_oa_config_buffer(struct i915_perf_stream *stream,
>
> 	return oa_bo;
>
>-err_oa_bo:
>+err_bo:
> 	i915_gem_object_put(obj);
> err_free:
> 	kfree(oa_bo);
> 	return ERR_PTR(err);
> }
>
>-static struct i915_vma *
>-get_oa_vma(struct i915_perf_stream *stream, struct i915_oa_config *oa_config)
>+static struct i915_oa_config_bo *
>+get_oa_bo(struct i915_perf_stream *stream, struct i915_oa_config *oa_config)
> {
> 	struct i915_oa_config_bo *oa_bo;
>
>@@ -1908,34 +1923,31 @@ get_oa_vma(struct i915_perf_stream *stream, struct i915_oa_config *oa_config)
> 		    memcmp(oa_bo->oa_config->uuid,
> 			   oa_config->uuid,
> 			   sizeof(oa_config->uuid)) == 0)
>-			goto out;
>+			return oa_bo;
> 	}
>
>-	oa_bo = alloc_oa_config_buffer(stream, oa_config);
>-	if (IS_ERR(oa_bo))
>-		return ERR_CAST(oa_bo);
>-
>-out:
>-	return i915_vma_get(oa_bo->vma);
>+	return alloc_oa_config_buffer(stream, oa_config);
> }
>
> static int
> emit_oa_config(struct i915_perf_stream *stream,
> 	       struct i915_oa_config *oa_config,
> 	       struct intel_context *ce,
>-	       struct i915_active *active)
>+	       struct i915_active *active,
>+	       bool global)
> {
>+	struct i915_oa_config_bo *oa_bo;
> 	struct i915_request *rq;
>-	struct i915_vma *vma;
>+	u64 vma_offset;
> 	int err;
>
>-	vma = get_oa_vma(stream, oa_config);
>-	if (IS_ERR(vma))
>-		return PTR_ERR(vma);
>+	oa_bo = get_oa_bo(stream, oa_config);
>+	if (IS_ERR(oa_bo))
>+		return PTR_ERR(oa_bo);
>
>-	err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
>+	err = i915_vma_pin(oa_bo->vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
> 	if (err)
>-		goto err_vma_put;
>+		return err;
>
> 	intel_engine_pm_get(ce->engine);
> 	rq = i915_request_create(ce);
>@@ -1957,16 +1969,19 @@ emit_oa_config(struct i915_perf_stream *stream,
> 			goto err_add_request;
> 	}
>
>-	i915_vma_lock(vma);
>-	err = i915_request_await_object(rq, vma->obj, 0);
>+	i915_vma_lock(oa_bo->vma);
>+	err = i915_request_await_object(rq, oa_bo->vma->obj, 0);
> 	if (!err)
>-		err = i915_vma_move_to_active(vma, rq, 0);
>-	i915_vma_unlock(vma);
>+		err = i915_vma_move_to_active(oa_bo->vma, rq, 0);
>+	i915_vma_unlock(oa_bo->vma);
> 	if (err)
> 		goto err_add_request;
>
>-	err = rq->engine->emit_bb_start(rq,
>-					vma->node.start, 0,
>+	vma_offset = oa_bo->vma->node.start;
>+	if (!global)
>+		vma_offset += oa_bo->per_context_offset;
>+
>+	err = rq->engine->emit_bb_start(rq, vma_offset, 0,
> 					I915_DISPATCH_SECURE);
> 	if (err)
> 		goto err_add_request;
>@@ -1974,9 +1989,7 @@ emit_oa_config(struct i915_perf_stream *stream,
> err_add_request:
> 	i915_request_add(rq);
> err_vma_unpin:
>-	i915_vma_unpin(vma);
>-err_vma_put:
>-	i915_vma_put(vma);
>+	i915_vma_unpin(oa_bo->vma);
> 	return err;
> }
>
>@@ -1990,6 +2003,7 @@ hsw_enable_metric_set(struct i915_perf_stream *stream,
> 		      struct i915_active *active)
> {
> 	struct intel_uncore *uncore = stream->uncore;
>+	int err;
>
> 	/*
> 	 * PRM:
>@@ -2006,9 +2020,17 @@ hsw_enable_metric_set(struct i915_perf_stream *stream,
> 	intel_uncore_rmw(uncore, GEN6_UCGCTL1,
> 			 0, GEN6_CSUNIT_CLOCK_GATE_DISABLE);
>
>-	return emit_oa_config(stream,
>-			      stream->oa_config, oa_context(stream),
>-			      active);
>+	err = emit_oa_config(stream, stream->oa_config,
>+			     oa_context(stream),
>+			     active,
>+			     false /* global */);
>+	if (err)
>+		return err;
>+
>+	return emit_oa_config(stream, stream->oa_config,
>+			      oa_context(stream),
>+			      active,
>+			      true /* global */);
> }
>
> static void hsw_disable_metric_set(struct i915_perf_stream *stream)
>@@ -2419,7 +2441,7 @@ gen8_enable_metric_set(struct i915_perf_stream *stream,
> {
> 	struct intel_uncore *uncore = stream->uncore;
> 	struct i915_oa_config *oa_config = stream->oa_config;
>-	int ret;
>+	int err;
>
> 	/*
> 	 * We disable slice/unslice clock ratio change reports on SKL since
>@@ -2455,13 +2477,21 @@ gen8_enable_metric_set(struct i915_perf_stream *stream,
> 	 * to make sure all slices/subslices are ON before writing to NOA
> 	 * registers.
> 	 */
>-	ret = lrc_configure_all_contexts(stream, oa_config, active);
>-	if (ret)
>-		return ret;
>+	err = lrc_configure_all_contexts(stream, oa_config, active);
>+	if (err)
>+		return err;
>
>-	return emit_oa_config(stream,
>-			      stream->oa_config, oa_context(stream),
>-			      active);
>+	err = emit_oa_config(stream, oa_config,
>+			     oa_context(stream),
>+			     active,
>+			     false /* global */);
>+	if (err)
>+		return err;
>+
>+	return emit_oa_config(stream, stream->oa_config,
>+			      oa_context(stream),
>+			      active,
>+			      true /* global */);
> }
>
> static u32 oag_report_ctx_switches(const struct i915_perf_stream *stream)
>@@ -2507,9 +2537,9 @@ gen12_enable_metric_set(struct i915_perf_stream *stream,
> 		return ret;
>
> 	/*
>-	 * For Gen12, performance counters are context
>-	 * saved/restored. Only enable it for the context that
>-	 * requested this.
>+	 * For Gen12, performance counters are also context saved/restored on
>+	 * another set of performance registers. Configure the unit dealing
>+	 * with those.
> 	 */
> 	if (stream->ctx) {
> 		ret = gen12_configure_oar_context(stream, active);
>@@ -2517,9 +2547,17 @@ gen12_enable_metric_set(struct i915_perf_stream *stream,
> 			return ret;
> 	}
>
>-	return emit_oa_config(stream,
>-			      stream->oa_config, oa_context(stream),
>-			      active);
>+	ret = emit_oa_config(stream, oa_config,
>+			     oa_context(stream),
>+			     active,
>+			     false /* global */);
>+	if (ret)
>+		return ret;
>+
>+	return emit_oa_config(stream, stream->oa_config,
>+			      oa_context(stream),
>+			      active,
>+			      true /* global */);
> }
>
> static void gen8_disable_metric_set(struct i915_perf_stream *stream)
>@@ -3174,6 +3212,7 @@ static long i915_perf_config_locked(struct i915_perf_stream *stream,
> 				    unsigned long metrics_set)
> {
> 	struct i915_oa_config *config;
>+	struct i915_active *active = NULL;
> 	long ret = stream->oa_config->id;
>
> 	config = i915_perf_get_oa_config(stream->perf, metrics_set);
>@@ -3181,7 +3220,11 @@ static long i915_perf_config_locked(struct i915_perf_stream *stream,
> 		return -EINVAL;
>
> 	if (config != stream->oa_config) {
>-		int err;
>+		active = i915_active_create();
>+		if (!active) {
>+			ret = -ENOMEM;
>+			goto err_config;
>+		}
>
> 		/*
> 		 * If OA is bound to a specific context, emit the
>@@ -3192,13 +3235,26 @@ static long i915_perf_config_locked(struct i915_perf_stream *stream,
> 		 * When set globally, we use a low priority kernel context,
> 		 * so it will effectively take effect when idle.
> 		 */
>-		err = emit_oa_config(stream, config, oa_context(stream), NULL);
>-		if (!err)
>-			config = xchg(&stream->oa_config, config);
>-		else
>-			ret = err;
>+		ret = emit_oa_config(stream, config,
>+				     oa_context(stream),
>+				     active,
>+				     false /* global */);
>+		if (ret)
>+			goto err_active;
>+
>+		ret = emit_oa_config(stream, config,
>+				     oa_context(stream),
>+				     active,
>+				     true /* global */);
>+		if (ret)
>+			goto err_active;
>+
>+		config = xchg(&stream->oa_config, config);
> 	}
>
>+err_active:
>+	i915_active_put(active);
>+err_config:
> 	i915_oa_config_put(config);
>
> 	return ret;
>-- 
>2.26.0
>
>_______________________________________________
>Intel-gfx mailing list
>Intel-gfx@lists.freedesktop.org
>https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Lionel Landwerlin April 14, 2020, 9:21 a.m. UTC | #2
Yeah it's only filtering.

We need to pin the context to a particular HW tag so that filtering can 
work properly.

-Lionel

On 14/04/2020 02:24, Umesh Nerlige Ramappa wrote:
> Hi Lionel,
>
> What's the implication of using separate contexts for 3d and compute 
> on perf OA? Is it only context-filtering? If so, have you considered 
> disabling context filtering with a parameter instead of actually 
> filtering for specific contexts? Is this privileged use case?
>
> Thanks,
> Umesh
>
> On Thu, Apr 09, 2020 at 12:17:03PM +0300, Lionel Landwerlin wrote:
>> We want to enable performance monitoring on multiple contexts to cover
>> the Iris use case of using 2 GEM contexts (3D & compute).
>>
>> So start by breaking the OA configuration BO which contains global &
>> per context register writes.
>>
>> NOA muxes & OA configurations are global, while FLEXEU register
>> configurations are per context.
>>
>> v2: Use an offset into the same VMA (Chris)
>>
>> Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
>> ---
>> drivers/gpu/drm/i915/i915_perf.c | 176 ++++++++++++++++++++-----------
>> 1 file changed, 116 insertions(+), 60 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_perf.c 
>> b/drivers/gpu/drm/i915/i915_perf.c
>> index 5cde3e4e7be6..d2183fd701a3 100644
>> --- a/drivers/gpu/drm/i915/i915_perf.c
>> +++ b/drivers/gpu/drm/i915/i915_perf.c
>> @@ -372,6 +372,7 @@ struct i915_oa_config_bo {
>>
>>     struct i915_oa_config *oa_config;
>>     struct i915_vma *vma;
>> +    u32 per_context_offset;
>> };
>>
>> static struct ctl_table_header *sysctl_header;
>> @@ -1826,37 +1827,43 @@ static struct i915_oa_config_bo *
>> alloc_oa_config_buffer(struct i915_perf_stream *stream,
>>                struct i915_oa_config *oa_config)
>> {
>> -    struct drm_i915_gem_object *obj;
>>     struct i915_oa_config_bo *oa_bo;
>> +    struct drm_i915_gem_object *obj;
>>     size_t config_length = 0;
>> -    u32 *cs;
>> +    u32 *cs_start, *cs;
>>     int err;
>>
>>     oa_bo = kzalloc(sizeof(*oa_bo), GFP_KERNEL);
>>     if (!oa_bo)
>>         return ERR_PTR(-ENOMEM);
>>
>> +    /*
>> +     * Global configuration requires a jump into the NOA wait BO for 
>> it to
>> +     * apply.
>> +     */
>>     config_length += num_lri_dwords(oa_config->mux_regs_len);
>>     config_length += num_lri_dwords(oa_config->b_counter_regs_len);
>> -    config_length += num_lri_dwords(oa_config->flex_regs_len);
>>     config_length += 3; /* MI_BATCH_BUFFER_START */
>> +
>> +    config_length += num_lri_dwords(oa_config->flex_regs_len);
>> +    config_length += 1 /* MI_BATCH_BUFFER_END */;
>> +
>>     config_length = ALIGN(sizeof(u32) * config_length, 
>> I915_GTT_PAGE_SIZE);
>>
>> -    obj = i915_gem_object_create_shmem(stream->perf->i915, 
>> config_length);
>> +    obj = i915_gem_object_create_shmem(stream->perf->i915,
>> +                       config_length);
>>     if (IS_ERR(obj)) {
>>         err = PTR_ERR(obj);
>>         goto err_free;
>>     }
>>
>> -    cs = i915_gem_object_pin_map(obj, I915_MAP_WB);
>> -    if (IS_ERR(cs)) {
>> -        err = PTR_ERR(cs);
>> -        goto err_oa_bo;
>> +    cs_start = i915_gem_object_pin_map(obj, I915_MAP_WB);
>> +    if (IS_ERR(cs_start)) {
>> +        err = PTR_ERR(cs_start);
>> +        goto err_bo;
>>     }
>>
>> -    cs = write_cs_mi_lri(cs,
>> -                 oa_config->mux_regs,
>> -                 oa_config->mux_regs_len);
>> +    cs = cs_start;
>>     cs = write_cs_mi_lri(cs,
>>                  oa_config->b_counter_regs,
>>                  oa_config->b_counter_regs_len);
>> @@ -1871,6 +1878,14 @@ alloc_oa_config_buffer(struct i915_perf_stream 
>> *stream,
>>     *cs++ = i915_ggtt_offset(stream->noa_wait);
>>     *cs++ = 0;
>>
>> +    oa_bo->per_context_offset = 4 * (cs - cs_start);
>> +
>> +    cs = write_cs_mi_lri(cs,
>> +                 oa_config->mux_regs,
>> +                 oa_config->mux_regs_len);
>> +
>> +    *cs++ = MI_BATCH_BUFFER_END;
>> +
>>     i915_gem_object_flush_map(obj);
>>     i915_gem_object_unpin_map(obj);
>>
>> @@ -1879,7 +1894,7 @@ alloc_oa_config_buffer(struct i915_perf_stream 
>> *stream,
>>                        NULL);
>>     if (IS_ERR(oa_bo->vma)) {
>>         err = PTR_ERR(oa_bo->vma);
>> -        goto err_oa_bo;
>> +        goto err_bo;
>>     }
>>
>>     oa_bo->oa_config = i915_oa_config_get(oa_config);
>> @@ -1887,15 +1902,15 @@ alloc_oa_config_buffer(struct 
>> i915_perf_stream *stream,
>>
>>     return oa_bo;
>>
>> -err_oa_bo:
>> +err_bo:
>>     i915_gem_object_put(obj);
>> err_free:
>>     kfree(oa_bo);
>>     return ERR_PTR(err);
>> }
>>
>> -static struct i915_vma *
>> -get_oa_vma(struct i915_perf_stream *stream, struct i915_oa_config 
>> *oa_config)
>> +static struct i915_oa_config_bo *
>> +get_oa_bo(struct i915_perf_stream *stream, struct i915_oa_config 
>> *oa_config)
>> {
>>     struct i915_oa_config_bo *oa_bo;
>>
>> @@ -1908,34 +1923,31 @@ get_oa_vma(struct i915_perf_stream *stream, 
>> struct i915_oa_config *oa_config)
>>             memcmp(oa_bo->oa_config->uuid,
>>                oa_config->uuid,
>>                sizeof(oa_config->uuid)) == 0)
>> -            goto out;
>> +            return oa_bo;
>>     }
>>
>> -    oa_bo = alloc_oa_config_buffer(stream, oa_config);
>> -    if (IS_ERR(oa_bo))
>> -        return ERR_CAST(oa_bo);
>> -
>> -out:
>> -    return i915_vma_get(oa_bo->vma);
>> +    return alloc_oa_config_buffer(stream, oa_config);
>> }
>>
>> static int
>> emit_oa_config(struct i915_perf_stream *stream,
>>            struct i915_oa_config *oa_config,
>>            struct intel_context *ce,
>> -           struct i915_active *active)
>> +           struct i915_active *active,
>> +           bool global)
>> {
>> +    struct i915_oa_config_bo *oa_bo;
>>     struct i915_request *rq;
>> -    struct i915_vma *vma;
>> +    u64 vma_offset;
>>     int err;
>>
>> -    vma = get_oa_vma(stream, oa_config);
>> -    if (IS_ERR(vma))
>> -        return PTR_ERR(vma);
>> +    oa_bo = get_oa_bo(stream, oa_config);
>> +    if (IS_ERR(oa_bo))
>> +        return PTR_ERR(oa_bo);
>>
>> -    err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
>> +    err = i915_vma_pin(oa_bo->vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
>>     if (err)
>> -        goto err_vma_put;
>> +        return err;
>>
>>     intel_engine_pm_get(ce->engine);
>>     rq = i915_request_create(ce);
>> @@ -1957,16 +1969,19 @@ emit_oa_config(struct i915_perf_stream *stream,
>>             goto err_add_request;
>>     }
>>
>> -    i915_vma_lock(vma);
>> -    err = i915_request_await_object(rq, vma->obj, 0);
>> +    i915_vma_lock(oa_bo->vma);
>> +    err = i915_request_await_object(rq, oa_bo->vma->obj, 0);
>>     if (!err)
>> -        err = i915_vma_move_to_active(vma, rq, 0);
>> -    i915_vma_unlock(vma);
>> +        err = i915_vma_move_to_active(oa_bo->vma, rq, 0);
>> +    i915_vma_unlock(oa_bo->vma);
>>     if (err)
>>         goto err_add_request;
>>
>> -    err = rq->engine->emit_bb_start(rq,
>> -                    vma->node.start, 0,
>> +    vma_offset = oa_bo->vma->node.start;
>> +    if (!global)
>> +        vma_offset += oa_bo->per_context_offset;
>> +
>> +    err = rq->engine->emit_bb_start(rq, vma_offset, 0,
>>                     I915_DISPATCH_SECURE);
>>     if (err)
>>         goto err_add_request;
>> @@ -1974,9 +1989,7 @@ emit_oa_config(struct i915_perf_stream *stream,
>> err_add_request:
>>     i915_request_add(rq);
>> err_vma_unpin:
>> -    i915_vma_unpin(vma);
>> -err_vma_put:
>> -    i915_vma_put(vma);
>> +    i915_vma_unpin(oa_bo->vma);
>>     return err;
>> }
>>
>> @@ -1990,6 +2003,7 @@ hsw_enable_metric_set(struct i915_perf_stream 
>> *stream,
>>               struct i915_active *active)
>> {
>>     struct intel_uncore *uncore = stream->uncore;
>> +    int err;
>>
>>     /*
>>      * PRM:
>> @@ -2006,9 +2020,17 @@ hsw_enable_metric_set(struct i915_perf_stream 
>> *stream,
>>     intel_uncore_rmw(uncore, GEN6_UCGCTL1,
>>              0, GEN6_CSUNIT_CLOCK_GATE_DISABLE);
>>
>> -    return emit_oa_config(stream,
>> -                  stream->oa_config, oa_context(stream),
>> -                  active);
>> +    err = emit_oa_config(stream, stream->oa_config,
>> +                 oa_context(stream),
>> +                 active,
>> +                 false /* global */);
>> +    if (err)
>> +        return err;
>> +
>> +    return emit_oa_config(stream, stream->oa_config,
>> +                  oa_context(stream),
>> +                  active,
>> +                  true /* global */);
>> }
>>
>> static void hsw_disable_metric_set(struct i915_perf_stream *stream)
>> @@ -2419,7 +2441,7 @@ gen8_enable_metric_set(struct i915_perf_stream 
>> *stream,
>> {
>>     struct intel_uncore *uncore = stream->uncore;
>>     struct i915_oa_config *oa_config = stream->oa_config;
>> -    int ret;
>> +    int err;
>>
>>     /*
>>      * We disable slice/unslice clock ratio change reports on SKL since
>> @@ -2455,13 +2477,21 @@ gen8_enable_metric_set(struct 
>> i915_perf_stream *stream,
>>      * to make sure all slices/subslices are ON before writing to NOA
>>      * registers.
>>      */
>> -    ret = lrc_configure_all_contexts(stream, oa_config, active);
>> -    if (ret)
>> -        return ret;
>> +    err = lrc_configure_all_contexts(stream, oa_config, active);
>> +    if (err)
>> +        return err;
>>
>> -    return emit_oa_config(stream,
>> -                  stream->oa_config, oa_context(stream),
>> -                  active);
>> +    err = emit_oa_config(stream, oa_config,
>> +                 oa_context(stream),
>> +                 active,
>> +                 false /* global */);
>> +    if (err)
>> +        return err;
>> +
>> +    return emit_oa_config(stream, stream->oa_config,
>> +                  oa_context(stream),
>> +                  active,
>> +                  true /* global */);
>> }
>>
>> static u32 oag_report_ctx_switches(const struct i915_perf_stream 
>> *stream)
>> @@ -2507,9 +2537,9 @@ gen12_enable_metric_set(struct i915_perf_stream 
>> *stream,
>>         return ret;
>>
>>     /*
>> -     * For Gen12, performance counters are context
>> -     * saved/restored. Only enable it for the context that
>> -     * requested this.
>> +     * For Gen12, performance counters are also context 
>> saved/restored on
>> +     * another set of performance registers. Configure the unit dealing
>> +     * with those.
>>      */
>>     if (stream->ctx) {
>>         ret = gen12_configure_oar_context(stream, active);
>> @@ -2517,9 +2547,17 @@ gen12_enable_metric_set(struct 
>> i915_perf_stream *stream,
>>             return ret;
>>     }
>>
>> -    return emit_oa_config(stream,
>> -                  stream->oa_config, oa_context(stream),
>> -                  active);
>> +    ret = emit_oa_config(stream, oa_config,
>> +                 oa_context(stream),
>> +                 active,
>> +                 false /* global */);
>> +    if (ret)
>> +        return ret;
>> +
>> +    return emit_oa_config(stream, stream->oa_config,
>> +                  oa_context(stream),
>> +                  active,
>> +                  true /* global */);
>> }
>>
>> static void gen8_disable_metric_set(struct i915_perf_stream *stream)
>> @@ -3174,6 +3212,7 @@ static long i915_perf_config_locked(struct 
>> i915_perf_stream *stream,
>>                     unsigned long metrics_set)
>> {
>>     struct i915_oa_config *config;
>> +    struct i915_active *active = NULL;
>>     long ret = stream->oa_config->id;
>>
>>     config = i915_perf_get_oa_config(stream->perf, metrics_set);
>> @@ -3181,7 +3220,11 @@ static long i915_perf_config_locked(struct 
>> i915_perf_stream *stream,
>>         return -EINVAL;
>>
>>     if (config != stream->oa_config) {
>> -        int err;
>> +        active = i915_active_create();
>> +        if (!active) {
>> +            ret = -ENOMEM;
>> +            goto err_config;
>> +        }
>>
>>         /*
>>          * If OA is bound to a specific context, emit the
>> @@ -3192,13 +3235,26 @@ static long i915_perf_config_locked(struct 
>> i915_perf_stream *stream,
>>          * When set globally, we use a low priority kernel context,
>>          * so it will effectively take effect when idle.
>>          */
>> -        err = emit_oa_config(stream, config, oa_context(stream), NULL);
>> -        if (!err)
>> -            config = xchg(&stream->oa_config, config);
>> -        else
>> -            ret = err;
>> +        ret = emit_oa_config(stream, config,
>> +                     oa_context(stream),
>> +                     active,
>> +                     false /* global */);
>> +        if (ret)
>> +            goto err_active;
>> +
>> +        ret = emit_oa_config(stream, config,
>> +                     oa_context(stream),
>> +                     active,
>> +                     true /* global */);
>> +        if (ret)
>> +            goto err_active;
>> +
>> +        config = xchg(&stream->oa_config, config);
>>     }
>>
>> +err_active:
>> +    i915_active_put(active);
>> +err_config:
>>     i915_oa_config_put(config);
>>
>>     return ret;
>> -- 
>> 2.26.0
>>
>> _______________________________________________
>> Intel-gfx mailing list
>> Intel-gfx@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 5cde3e4e7be6..d2183fd701a3 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -372,6 +372,7 @@  struct i915_oa_config_bo {
 
 	struct i915_oa_config *oa_config;
 	struct i915_vma *vma;
+	u32 per_context_offset;
 };
 
 static struct ctl_table_header *sysctl_header;
@@ -1826,37 +1827,43 @@  static struct i915_oa_config_bo *
 alloc_oa_config_buffer(struct i915_perf_stream *stream,
 		       struct i915_oa_config *oa_config)
 {
-	struct drm_i915_gem_object *obj;
 	struct i915_oa_config_bo *oa_bo;
+	struct drm_i915_gem_object *obj;
 	size_t config_length = 0;
-	u32 *cs;
+	u32 *cs_start, *cs;
 	int err;
 
 	oa_bo = kzalloc(sizeof(*oa_bo), GFP_KERNEL);
 	if (!oa_bo)
 		return ERR_PTR(-ENOMEM);
 
+	/*
+	 * Global configuration requires a jump into the NOA wait BO for it to
+	 * apply.
+	 */
 	config_length += num_lri_dwords(oa_config->mux_regs_len);
 	config_length += num_lri_dwords(oa_config->b_counter_regs_len);
-	config_length += num_lri_dwords(oa_config->flex_regs_len);
 	config_length += 3; /* MI_BATCH_BUFFER_START */
+
+	config_length += num_lri_dwords(oa_config->flex_regs_len);
+	config_length += 1 /* MI_BATCH_BUFFER_END */;
+
 	config_length = ALIGN(sizeof(u32) * config_length, I915_GTT_PAGE_SIZE);
 
-	obj = i915_gem_object_create_shmem(stream->perf->i915, config_length);
+	obj = i915_gem_object_create_shmem(stream->perf->i915,
+					   config_length);
 	if (IS_ERR(obj)) {
 		err = PTR_ERR(obj);
 		goto err_free;
 	}
 
-	cs = i915_gem_object_pin_map(obj, I915_MAP_WB);
-	if (IS_ERR(cs)) {
-		err = PTR_ERR(cs);
-		goto err_oa_bo;
+	cs_start = i915_gem_object_pin_map(obj, I915_MAP_WB);
+	if (IS_ERR(cs_start)) {
+		err = PTR_ERR(cs_start);
+		goto err_bo;
 	}
 
-	cs = write_cs_mi_lri(cs,
-			     oa_config->mux_regs,
-			     oa_config->mux_regs_len);
+	cs = cs_start;
 	cs = write_cs_mi_lri(cs,
 			     oa_config->b_counter_regs,
 			     oa_config->b_counter_regs_len);
@@ -1871,6 +1878,14 @@  alloc_oa_config_buffer(struct i915_perf_stream *stream,
 	*cs++ = i915_ggtt_offset(stream->noa_wait);
 	*cs++ = 0;
 
+	oa_bo->per_context_offset = 4 * (cs - cs_start);
+
+	cs = write_cs_mi_lri(cs,
+			     oa_config->mux_regs,
+			     oa_config->mux_regs_len);
+
+	*cs++ = MI_BATCH_BUFFER_END;
+
 	i915_gem_object_flush_map(obj);
 	i915_gem_object_unpin_map(obj);
 
@@ -1879,7 +1894,7 @@  alloc_oa_config_buffer(struct i915_perf_stream *stream,
 				       NULL);
 	if (IS_ERR(oa_bo->vma)) {
 		err = PTR_ERR(oa_bo->vma);
-		goto err_oa_bo;
+		goto err_bo;
 	}
 
 	oa_bo->oa_config = i915_oa_config_get(oa_config);
@@ -1887,15 +1902,15 @@  alloc_oa_config_buffer(struct i915_perf_stream *stream,
 
 	return oa_bo;
 
-err_oa_bo:
+err_bo:
 	i915_gem_object_put(obj);
 err_free:
 	kfree(oa_bo);
 	return ERR_PTR(err);
 }
 
-static struct i915_vma *
-get_oa_vma(struct i915_perf_stream *stream, struct i915_oa_config *oa_config)
+static struct i915_oa_config_bo *
+get_oa_bo(struct i915_perf_stream *stream, struct i915_oa_config *oa_config)
 {
 	struct i915_oa_config_bo *oa_bo;
 
@@ -1908,34 +1923,31 @@  get_oa_vma(struct i915_perf_stream *stream, struct i915_oa_config *oa_config)
 		    memcmp(oa_bo->oa_config->uuid,
 			   oa_config->uuid,
 			   sizeof(oa_config->uuid)) == 0)
-			goto out;
+			return oa_bo;
 	}
 
-	oa_bo = alloc_oa_config_buffer(stream, oa_config);
-	if (IS_ERR(oa_bo))
-		return ERR_CAST(oa_bo);
-
-out:
-	return i915_vma_get(oa_bo->vma);
+	return alloc_oa_config_buffer(stream, oa_config);
 }
 
 static int
 emit_oa_config(struct i915_perf_stream *stream,
 	       struct i915_oa_config *oa_config,
 	       struct intel_context *ce,
-	       struct i915_active *active)
+	       struct i915_active *active,
+	       bool global)
 {
+	struct i915_oa_config_bo *oa_bo;
 	struct i915_request *rq;
-	struct i915_vma *vma;
+	u64 vma_offset;
 	int err;
 
-	vma = get_oa_vma(stream, oa_config);
-	if (IS_ERR(vma))
-		return PTR_ERR(vma);
+	oa_bo = get_oa_bo(stream, oa_config);
+	if (IS_ERR(oa_bo))
+		return PTR_ERR(oa_bo);
 
-	err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
+	err = i915_vma_pin(oa_bo->vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
 	if (err)
-		goto err_vma_put;
+		return err;
 
 	intel_engine_pm_get(ce->engine);
 	rq = i915_request_create(ce);
@@ -1957,16 +1969,19 @@  emit_oa_config(struct i915_perf_stream *stream,
 			goto err_add_request;
 	}
 
-	i915_vma_lock(vma);
-	err = i915_request_await_object(rq, vma->obj, 0);
+	i915_vma_lock(oa_bo->vma);
+	err = i915_request_await_object(rq, oa_bo->vma->obj, 0);
 	if (!err)
-		err = i915_vma_move_to_active(vma, rq, 0);
-	i915_vma_unlock(vma);
+		err = i915_vma_move_to_active(oa_bo->vma, rq, 0);
+	i915_vma_unlock(oa_bo->vma);
 	if (err)
 		goto err_add_request;
 
-	err = rq->engine->emit_bb_start(rq,
-					vma->node.start, 0,
+	vma_offset = oa_bo->vma->node.start;
+	if (!global)
+		vma_offset += oa_bo->per_context_offset;
+
+	err = rq->engine->emit_bb_start(rq, vma_offset, 0,
 					I915_DISPATCH_SECURE);
 	if (err)
 		goto err_add_request;
@@ -1974,9 +1989,7 @@  emit_oa_config(struct i915_perf_stream *stream,
 err_add_request:
 	i915_request_add(rq);
 err_vma_unpin:
-	i915_vma_unpin(vma);
-err_vma_put:
-	i915_vma_put(vma);
+	i915_vma_unpin(oa_bo->vma);
 	return err;
 }
 
@@ -1990,6 +2003,7 @@  hsw_enable_metric_set(struct i915_perf_stream *stream,
 		      struct i915_active *active)
 {
 	struct intel_uncore *uncore = stream->uncore;
+	int err;
 
 	/*
 	 * PRM:
@@ -2006,9 +2020,17 @@  hsw_enable_metric_set(struct i915_perf_stream *stream,
 	intel_uncore_rmw(uncore, GEN6_UCGCTL1,
 			 0, GEN6_CSUNIT_CLOCK_GATE_DISABLE);
 
-	return emit_oa_config(stream,
-			      stream->oa_config, oa_context(stream),
-			      active);
+	err = emit_oa_config(stream, stream->oa_config,
+			     oa_context(stream),
+			     active,
+			     false /* global */);
+	if (err)
+		return err;
+
+	return emit_oa_config(stream, stream->oa_config,
+			      oa_context(stream),
+			      active,
+			      true /* global */);
 }
 
 static void hsw_disable_metric_set(struct i915_perf_stream *stream)
@@ -2419,7 +2441,7 @@  gen8_enable_metric_set(struct i915_perf_stream *stream,
 {
 	struct intel_uncore *uncore = stream->uncore;
 	struct i915_oa_config *oa_config = stream->oa_config;
-	int ret;
+	int err;
 
 	/*
 	 * We disable slice/unslice clock ratio change reports on SKL since
@@ -2455,13 +2477,21 @@  gen8_enable_metric_set(struct i915_perf_stream *stream,
 	 * to make sure all slices/subslices are ON before writing to NOA
 	 * registers.
 	 */
-	ret = lrc_configure_all_contexts(stream, oa_config, active);
-	if (ret)
-		return ret;
+	err = lrc_configure_all_contexts(stream, oa_config, active);
+	if (err)
+		return err;
 
-	return emit_oa_config(stream,
-			      stream->oa_config, oa_context(stream),
-			      active);
+	err = emit_oa_config(stream, oa_config,
+			     oa_context(stream),
+			     active,
+			     false /* global */);
+	if (err)
+		return err;
+
+	return emit_oa_config(stream, stream->oa_config,
+			      oa_context(stream),
+			      active,
+			      true /* global */);
 }
 
 static u32 oag_report_ctx_switches(const struct i915_perf_stream *stream)
@@ -2507,9 +2537,9 @@  gen12_enable_metric_set(struct i915_perf_stream *stream,
 		return ret;
 
 	/*
-	 * For Gen12, performance counters are context
-	 * saved/restored. Only enable it for the context that
-	 * requested this.
+	 * For Gen12, performance counters are also context saved/restored on
+	 * another set of performance registers. Configure the unit dealing
+	 * with those.
 	 */
 	if (stream->ctx) {
 		ret = gen12_configure_oar_context(stream, active);
@@ -2517,9 +2547,17 @@  gen12_enable_metric_set(struct i915_perf_stream *stream,
 			return ret;
 	}
 
-	return emit_oa_config(stream,
-			      stream->oa_config, oa_context(stream),
-			      active);
+	ret = emit_oa_config(stream, oa_config,
+			     oa_context(stream),
+			     active,
+			     false /* global */);
+	if (ret)
+		return ret;
+
+	return emit_oa_config(stream, stream->oa_config,
+			      oa_context(stream),
+			      active,
+			      true /* global */);
 }
 
 static void gen8_disable_metric_set(struct i915_perf_stream *stream)
@@ -3174,6 +3212,7 @@  static long i915_perf_config_locked(struct i915_perf_stream *stream,
 				    unsigned long metrics_set)
 {
 	struct i915_oa_config *config;
+	struct i915_active *active = NULL;
 	long ret = stream->oa_config->id;
 
 	config = i915_perf_get_oa_config(stream->perf, metrics_set);
@@ -3181,7 +3220,11 @@  static long i915_perf_config_locked(struct i915_perf_stream *stream,
 		return -EINVAL;
 
 	if (config != stream->oa_config) {
-		int err;
+		active = i915_active_create();
+		if (!active) {
+			ret = -ENOMEM;
+			goto err_config;
+		}
 
 		/*
 		 * If OA is bound to a specific context, emit the
@@ -3192,13 +3235,26 @@  static long i915_perf_config_locked(struct i915_perf_stream *stream,
 		 * When set globally, we use a low priority kernel context,
 		 * so it will effectively take effect when idle.
 		 */
-		err = emit_oa_config(stream, config, oa_context(stream), NULL);
-		if (!err)
-			config = xchg(&stream->oa_config, config);
-		else
-			ret = err;
+		ret = emit_oa_config(stream, config,
+				     oa_context(stream),
+				     active,
+				     false /* global */);
+		if (ret)
+			goto err_active;
+
+		ret = emit_oa_config(stream, config,
+				     oa_context(stream),
+				     active,
+				     true /* global */);
+		if (ret)
+			goto err_active;
+
+		config = xchg(&stream->oa_config, config);
 	}
 
+err_active:
+	i915_active_put(active);
+err_config:
 	i915_oa_config_put(config);
 
 	return ret;