diff mbox series

[01/19] drm/i915/perf: Fix OA filtering logic for GuC mode

Message ID 20220823204155.8178-2-umesh.nerlige.ramappa@intel.com (mailing list archive)
State New, archived
Headers show
Series Add DG2 OA support | expand

Commit Message

Umesh Nerlige Ramappa Aug. 23, 2022, 8:41 p.m. UTC
With GuC mode of submission, GuC is in control of defining the context id field
that is part of the OA reports. To filter reports, UMD and KMD must know what sw
context id was chosen by GuC. There is not interface between KMD and GuC to
determine this, so read the upper-dword of EXECLIST_STATUS to filter/squash OA
reports for the specific context.

Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_lrc.h |   2 +
 drivers/gpu/drm/i915/i915_perf.c    | 141 ++++++++++++++++++++++++----
 2 files changed, 124 insertions(+), 19 deletions(-)

Comments

Lionel Landwerlin Sept. 6, 2022, 2:33 p.m. UTC | #1
On 23/08/2022 23:41, Umesh Nerlige Ramappa wrote:
> With GuC mode of submission, GuC is in control of defining the context id field
> that is part of the OA reports. To filter reports, UMD and KMD must know what sw
> context id was chosen by GuC. There is not interface between KMD and GuC to
> determine this, so read the upper-dword of EXECLIST_STATUS to filter/squash OA
> reports for the specific context.
>
> Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>


I assume you checked with GuC that this doesn't change as the context is 
running?

With i915/execlist submission mode, we had to ask i915 to pin the 
sw_id/ctx_id.


If that's not the case then filtering is broken.


-Lionel


> ---
>   drivers/gpu/drm/i915/gt/intel_lrc.h |   2 +
>   drivers/gpu/drm/i915/i915_perf.c    | 141 ++++++++++++++++++++++++----
>   2 files changed, 124 insertions(+), 19 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.h b/drivers/gpu/drm/i915/gt/intel_lrc.h
> index a390f0813c8b..7111bae759f3 100644
> --- a/drivers/gpu/drm/i915/gt/intel_lrc.h
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.h
> @@ -110,6 +110,8 @@ enum {
>   #define XEHP_SW_CTX_ID_WIDTH			16
>   #define XEHP_SW_COUNTER_SHIFT			58
>   #define XEHP_SW_COUNTER_WIDTH			6
> +#define GEN12_GUC_SW_CTX_ID_SHIFT		39
> +#define GEN12_GUC_SW_CTX_ID_WIDTH		16
>   
>   static inline void lrc_runtime_start(struct intel_context *ce)
>   {
> diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
> index f3c23fe9ad9c..735244a3aedd 100644
> --- a/drivers/gpu/drm/i915/i915_perf.c
> +++ b/drivers/gpu/drm/i915/i915_perf.c
> @@ -1233,6 +1233,125 @@ static struct intel_context *oa_pin_context(struct i915_perf_stream *stream)
>   	return stream->pinned_ctx;
>   }
>   
> +static int
> +__store_reg_to_mem(struct i915_request *rq, i915_reg_t reg, u32 ggtt_offset)
> +{
> +	u32 *cs, cmd;
> +
> +	cmd = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT;
> +	if (GRAPHICS_VER(rq->engine->i915) >= 8)
> +		cmd++;
> +
> +	cs = intel_ring_begin(rq, 4);
> +	if (IS_ERR(cs))
> +		return PTR_ERR(cs);
> +
> +	*cs++ = cmd;
> +	*cs++ = i915_mmio_reg_offset(reg);
> +	*cs++ = ggtt_offset;
> +	*cs++ = 0;
> +
> +	intel_ring_advance(rq, cs);
> +
> +	return 0;
> +}
> +
> +static int
> +__read_reg(struct intel_context *ce, i915_reg_t reg, u32 ggtt_offset)
> +{
> +	struct i915_request *rq;
> +	int err;
> +
> +	rq = i915_request_create(ce);
> +	if (IS_ERR(rq))
> +		return PTR_ERR(rq);
> +
> +	i915_request_get(rq);
> +
> +	err = __store_reg_to_mem(rq, reg, ggtt_offset);
> +
> +	i915_request_add(rq);
> +	if (!err && i915_request_wait(rq, 0, HZ / 2) < 0)
> +		err = -ETIME;
> +
> +	i915_request_put(rq);
> +
> +	return err;
> +}
> +
> +static int
> +gen12_guc_sw_ctx_id(struct intel_context *ce, u32 *ctx_id)
> +{
> +	struct i915_vma *scratch;
> +	u32 *val;
> +	int err;
> +
> +	scratch = __vm_create_scratch_for_read_pinned(&ce->engine->gt->ggtt->vm, 4);
> +	if (IS_ERR(scratch))
> +		return PTR_ERR(scratch);
> +
> +	err = i915_vma_sync(scratch);
> +	if (err)
> +		goto err_scratch;
> +
> +	err = __read_reg(ce, RING_EXECLIST_STATUS_HI(ce->engine->mmio_base),
> +			 i915_ggtt_offset(scratch));
> +	if (err)
> +		goto err_scratch;
> +
> +	val = i915_gem_object_pin_map_unlocked(scratch->obj, I915_MAP_WB);
> +	if (IS_ERR(val)) {
> +		err = PTR_ERR(val);
> +		goto err_scratch;
> +	}
> +
> +	*ctx_id = *val;
> +	i915_gem_object_unpin_map(scratch->obj);
> +
> +err_scratch:
> +	i915_vma_unpin_and_release(&scratch, 0);
> +	return err;
> +}
> +
> +/*
> + * For execlist mode of submission, pick an unused context id
> + * 0 - (NUM_CONTEXT_TAG -1) are used by other contexts
> + * XXX_MAX_CONTEXT_HW_ID is used by idle context
> + *
> + * For GuC mode of submission read context id from the upper dword of the
> + * EXECLIST_STATUS register.
> + */
> +static int gen12_get_render_context_id(struct i915_perf_stream *stream)
> +{
> +	u32 ctx_id, mask;
> +	int ret;
> +
> +	if (intel_engine_uses_guc(stream->engine)) {
> +		ret = gen12_guc_sw_ctx_id(stream->pinned_ctx, &ctx_id);
> +		if (ret)
> +			return ret;
> +
> +		mask = ((1U << GEN12_GUC_SW_CTX_ID_WIDTH) - 1) <<
> +			(GEN12_GUC_SW_CTX_ID_SHIFT - 32);
> +	} else if (GRAPHICS_VER_FULL(stream->engine->i915) >= IP_VER(12, 50)) {
> +		ctx_id = (XEHP_MAX_CONTEXT_HW_ID - 1) <<
> +			(XEHP_SW_CTX_ID_SHIFT - 32);
> +
> +		mask = ((1U << XEHP_SW_CTX_ID_WIDTH) - 1) <<
> +			(XEHP_SW_CTX_ID_SHIFT - 32);
> +	} else {
> +		ctx_id = (GEN12_MAX_CONTEXT_HW_ID - 1) <<
> +			 (GEN11_SW_CTX_ID_SHIFT - 32);
> +
> +		mask = ((1U << GEN11_SW_CTX_ID_WIDTH) - 1) <<
> +			(GEN11_SW_CTX_ID_SHIFT - 32);
> +	}
> +	stream->specific_ctx_id = ctx_id & mask;
> +	stream->specific_ctx_id_mask = mask;
> +
> +	return 0;
> +}
> +
>   /**
>    * oa_get_render_ctx_id - determine and hold ctx hw id
>    * @stream: An i915-perf stream opened for OA metrics
> @@ -1246,6 +1365,7 @@ static struct intel_context *oa_pin_context(struct i915_perf_stream *stream)
>   static int oa_get_render_ctx_id(struct i915_perf_stream *stream)
>   {
>   	struct intel_context *ce;
> +	int ret = 0;
>   
>   	ce = oa_pin_context(stream);
>   	if (IS_ERR(ce))
> @@ -1292,24 +1412,7 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream)
>   
>   	case 11:
>   	case 12:
> -		if (GRAPHICS_VER_FULL(ce->engine->i915) >= IP_VER(12, 50)) {
> -			stream->specific_ctx_id_mask =
> -				((1U << XEHP_SW_CTX_ID_WIDTH) - 1) <<
> -				(XEHP_SW_CTX_ID_SHIFT - 32);
> -			stream->specific_ctx_id =
> -				(XEHP_MAX_CONTEXT_HW_ID - 1) <<
> -				(XEHP_SW_CTX_ID_SHIFT - 32);
> -		} else {
> -			stream->specific_ctx_id_mask =
> -				((1U << GEN11_SW_CTX_ID_WIDTH) - 1) << (GEN11_SW_CTX_ID_SHIFT - 32);
> -			/*
> -			 * Pick an unused context id
> -			 * 0 - BITS_PER_LONG are used by other contexts
> -			 * GEN12_MAX_CONTEXT_HW_ID (0x7ff) is used by idle context
> -			 */
> -			stream->specific_ctx_id =
> -				(GEN12_MAX_CONTEXT_HW_ID - 1) << (GEN11_SW_CTX_ID_SHIFT - 32);
> -		}
> +		ret = gen12_get_render_context_id(stream);
>   		break;
>   
>   	default:
> @@ -1323,7 +1426,7 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream)
>   		stream->specific_ctx_id,
>   		stream->specific_ctx_id_mask);
>   
> -	return 0;
> +	return ret;
>   }
>   
>   /**
Umesh Nerlige Ramappa Sept. 6, 2022, 5:39 p.m. UTC | #2
On Tue, Sep 06, 2022 at 05:33:00PM +0300, Lionel Landwerlin wrote:
>On 23/08/2022 23:41, Umesh Nerlige Ramappa wrote:
>>With GuC mode of submission, GuC is in control of defining the context id field
>>that is part of the OA reports. To filter reports, UMD and KMD must know what sw
>>context id was chosen by GuC. There is not interface between KMD and GuC to
>>determine this, so read the upper-dword of EXECLIST_STATUS to filter/squash OA
>>reports for the specific context.
>>
>>Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
>
>
>I assume you checked with GuC that this doesn't change as the context 
>is running?

Correct.

>
>With i915/execlist submission mode, we had to ask i915 to pin the 
>sw_id/ctx_id.
>

 From GuC perspective, the context id can change once KMD de-registers 
the context and that will not happen while the context is in use.

Thanks,
Umesh

>
>If that's not the case then filtering is broken.
>
>
>-Lionel
>
>
>>---
>>  drivers/gpu/drm/i915/gt/intel_lrc.h |   2 +
>>  drivers/gpu/drm/i915/i915_perf.c    | 141 ++++++++++++++++++++++++----
>>  2 files changed, 124 insertions(+), 19 deletions(-)
>>
>>diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.h b/drivers/gpu/drm/i915/gt/intel_lrc.h
>>index a390f0813c8b..7111bae759f3 100644
>>--- a/drivers/gpu/drm/i915/gt/intel_lrc.h
>>+++ b/drivers/gpu/drm/i915/gt/intel_lrc.h
>>@@ -110,6 +110,8 @@ enum {
>>  #define XEHP_SW_CTX_ID_WIDTH			16
>>  #define XEHP_SW_COUNTER_SHIFT			58
>>  #define XEHP_SW_COUNTER_WIDTH			6
>>+#define GEN12_GUC_SW_CTX_ID_SHIFT		39
>>+#define GEN12_GUC_SW_CTX_ID_WIDTH		16
>>  static inline void lrc_runtime_start(struct intel_context *ce)
>>  {
>>diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
>>index f3c23fe9ad9c..735244a3aedd 100644
>>--- a/drivers/gpu/drm/i915/i915_perf.c
>>+++ b/drivers/gpu/drm/i915/i915_perf.c
>>@@ -1233,6 +1233,125 @@ static struct intel_context *oa_pin_context(struct i915_perf_stream *stream)
>>  	return stream->pinned_ctx;
>>  }
>>+static int
>>+__store_reg_to_mem(struct i915_request *rq, i915_reg_t reg, u32 ggtt_offset)
>>+{
>>+	u32 *cs, cmd;
>>+
>>+	cmd = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT;
>>+	if (GRAPHICS_VER(rq->engine->i915) >= 8)
>>+		cmd++;
>>+
>>+	cs = intel_ring_begin(rq, 4);
>>+	if (IS_ERR(cs))
>>+		return PTR_ERR(cs);
>>+
>>+	*cs++ = cmd;
>>+	*cs++ = i915_mmio_reg_offset(reg);
>>+	*cs++ = ggtt_offset;
>>+	*cs++ = 0;
>>+
>>+	intel_ring_advance(rq, cs);
>>+
>>+	return 0;
>>+}
>>+
>>+static int
>>+__read_reg(struct intel_context *ce, i915_reg_t reg, u32 ggtt_offset)
>>+{
>>+	struct i915_request *rq;
>>+	int err;
>>+
>>+	rq = i915_request_create(ce);
>>+	if (IS_ERR(rq))
>>+		return PTR_ERR(rq);
>>+
>>+	i915_request_get(rq);
>>+
>>+	err = __store_reg_to_mem(rq, reg, ggtt_offset);
>>+
>>+	i915_request_add(rq);
>>+	if (!err && i915_request_wait(rq, 0, HZ / 2) < 0)
>>+		err = -ETIME;
>>+
>>+	i915_request_put(rq);
>>+
>>+	return err;
>>+}
>>+
>>+static int
>>+gen12_guc_sw_ctx_id(struct intel_context *ce, u32 *ctx_id)
>>+{
>>+	struct i915_vma *scratch;
>>+	u32 *val;
>>+	int err;
>>+
>>+	scratch = __vm_create_scratch_for_read_pinned(&ce->engine->gt->ggtt->vm, 4);
>>+	if (IS_ERR(scratch))
>>+		return PTR_ERR(scratch);
>>+
>>+	err = i915_vma_sync(scratch);
>>+	if (err)
>>+		goto err_scratch;
>>+
>>+	err = __read_reg(ce, RING_EXECLIST_STATUS_HI(ce->engine->mmio_base),
>>+			 i915_ggtt_offset(scratch));
>>+	if (err)
>>+		goto err_scratch;
>>+
>>+	val = i915_gem_object_pin_map_unlocked(scratch->obj, I915_MAP_WB);
>>+	if (IS_ERR(val)) {
>>+		err = PTR_ERR(val);
>>+		goto err_scratch;
>>+	}
>>+
>>+	*ctx_id = *val;
>>+	i915_gem_object_unpin_map(scratch->obj);
>>+
>>+err_scratch:
>>+	i915_vma_unpin_and_release(&scratch, 0);
>>+	return err;
>>+}
>>+
>>+/*
>>+ * For execlist mode of submission, pick an unused context id
>>+ * 0 - (NUM_CONTEXT_TAG -1) are used by other contexts
>>+ * XXX_MAX_CONTEXT_HW_ID is used by idle context
>>+ *
>>+ * For GuC mode of submission read context id from the upper dword of the
>>+ * EXECLIST_STATUS register.
>>+ */
>>+static int gen12_get_render_context_id(struct i915_perf_stream *stream)
>>+{
>>+	u32 ctx_id, mask;
>>+	int ret;
>>+
>>+	if (intel_engine_uses_guc(stream->engine)) {
>>+		ret = gen12_guc_sw_ctx_id(stream->pinned_ctx, &ctx_id);
>>+		if (ret)
>>+			return ret;
>>+
>>+		mask = ((1U << GEN12_GUC_SW_CTX_ID_WIDTH) - 1) <<
>>+			(GEN12_GUC_SW_CTX_ID_SHIFT - 32);
>>+	} else if (GRAPHICS_VER_FULL(stream->engine->i915) >= IP_VER(12, 50)) {
>>+		ctx_id = (XEHP_MAX_CONTEXT_HW_ID - 1) <<
>>+			(XEHP_SW_CTX_ID_SHIFT - 32);
>>+
>>+		mask = ((1U << XEHP_SW_CTX_ID_WIDTH) - 1) <<
>>+			(XEHP_SW_CTX_ID_SHIFT - 32);
>>+	} else {
>>+		ctx_id = (GEN12_MAX_CONTEXT_HW_ID - 1) <<
>>+			 (GEN11_SW_CTX_ID_SHIFT - 32);
>>+
>>+		mask = ((1U << GEN11_SW_CTX_ID_WIDTH) - 1) <<
>>+			(GEN11_SW_CTX_ID_SHIFT - 32);
>>+	}
>>+	stream->specific_ctx_id = ctx_id & mask;
>>+	stream->specific_ctx_id_mask = mask;
>>+
>>+	return 0;
>>+}
>>+
>>  /**
>>   * oa_get_render_ctx_id - determine and hold ctx hw id
>>   * @stream: An i915-perf stream opened for OA metrics
>>@@ -1246,6 +1365,7 @@ static struct intel_context *oa_pin_context(struct i915_perf_stream *stream)
>>  static int oa_get_render_ctx_id(struct i915_perf_stream *stream)
>>  {
>>  	struct intel_context *ce;
>>+	int ret = 0;
>>  	ce = oa_pin_context(stream);
>>  	if (IS_ERR(ce))
>>@@ -1292,24 +1412,7 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream)
>>  	case 11:
>>  	case 12:
>>-		if (GRAPHICS_VER_FULL(ce->engine->i915) >= IP_VER(12, 50)) {
>>-			stream->specific_ctx_id_mask =
>>-				((1U << XEHP_SW_CTX_ID_WIDTH) - 1) <<
>>-				(XEHP_SW_CTX_ID_SHIFT - 32);
>>-			stream->specific_ctx_id =
>>-				(XEHP_MAX_CONTEXT_HW_ID - 1) <<
>>-				(XEHP_SW_CTX_ID_SHIFT - 32);
>>-		} else {
>>-			stream->specific_ctx_id_mask =
>>-				((1U << GEN11_SW_CTX_ID_WIDTH) - 1) << (GEN11_SW_CTX_ID_SHIFT - 32);
>>-			/*
>>-			 * Pick an unused context id
>>-			 * 0 - BITS_PER_LONG are used by other contexts
>>-			 * GEN12_MAX_CONTEXT_HW_ID (0x7ff) is used by idle context
>>-			 */
>>-			stream->specific_ctx_id =
>>-				(GEN12_MAX_CONTEXT_HW_ID - 1) << (GEN11_SW_CTX_ID_SHIFT - 32);
>>-		}
>>+		ret = gen12_get_render_context_id(stream);
>>  		break;
>>  	default:
>>@@ -1323,7 +1426,7 @@ static int oa_get_render_ctx_id(struct i915_perf_stream *stream)
>>  		stream->specific_ctx_id,
>>  		stream->specific_ctx_id_mask);
>>-	return 0;
>>+	return ret;
>>  }
>>  /**
>
>
Lionel Landwerlin Sept. 6, 2022, 6:39 p.m. UTC | #3
On 06/09/2022 20:39, Umesh Nerlige Ramappa wrote:
> On Tue, Sep 06, 2022 at 05:33:00PM +0300, Lionel Landwerlin wrote:
>> On 23/08/2022 23:41, Umesh Nerlige Ramappa wrote:
>>> With GuC mode of submission, GuC is in control of defining the 
>>> context id field
>>> that is part of the OA reports. To filter reports, UMD and KMD must 
>>> know what sw
>>> context id was chosen by GuC. There is not interface between KMD and 
>>> GuC to
>>> determine this, so read the upper-dword of EXECLIST_STATUS to 
>>> filter/squash OA
>>> reports for the specific context.
>>>
>>> Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
>>
>>
>> I assume you checked with GuC that this doesn't change as the context 
>> is running?
>
> Correct.
>
>>
>> With i915/execlist submission mode, we had to ask i915 to pin the 
>> sw_id/ctx_id.
>>
>
> From GuC perspective, the context id can change once KMD de-registers 
> the context and that will not happen while the context is in use.
>
> Thanks,
> Umesh


Thanks Umesh,


Maybe I should have been more precise in my question :


Can the ID change while the i915-perf stream is opened?

Because the ID not changing while the context is running makes sense.

But since the number of available IDs is limited to 2k or something on 
Gfx12, it's possible the GuC has to reuse IDs if too many apps want to 
run during the period of time while i915-perf is active and filtering.


-Lionel


>
>>
>> If that's not the case then filtering is broken.
>>
>>
>> -Lionel
>>
>>
>>> ---
>>>  drivers/gpu/drm/i915/gt/intel_lrc.h |   2 +
>>>  drivers/gpu/drm/i915/i915_perf.c    | 141 ++++++++++++++++++++++++----
>>>  2 files changed, 124 insertions(+), 19 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.h 
>>> b/drivers/gpu/drm/i915/gt/intel_lrc.h
>>> index a390f0813c8b..7111bae759f3 100644
>>> --- a/drivers/gpu/drm/i915/gt/intel_lrc.h
>>> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.h
>>> @@ -110,6 +110,8 @@ enum {
>>>  #define XEHP_SW_CTX_ID_WIDTH            16
>>>  #define XEHP_SW_COUNTER_SHIFT            58
>>>  #define XEHP_SW_COUNTER_WIDTH            6
>>> +#define GEN12_GUC_SW_CTX_ID_SHIFT        39
>>> +#define GEN12_GUC_SW_CTX_ID_WIDTH        16
>>>  static inline void lrc_runtime_start(struct intel_context *ce)
>>>  {
>>> diff --git a/drivers/gpu/drm/i915/i915_perf.c 
>>> b/drivers/gpu/drm/i915/i915_perf.c
>>> index f3c23fe9ad9c..735244a3aedd 100644
>>> --- a/drivers/gpu/drm/i915/i915_perf.c
>>> +++ b/drivers/gpu/drm/i915/i915_perf.c
>>> @@ -1233,6 +1233,125 @@ static struct intel_context 
>>> *oa_pin_context(struct i915_perf_stream *stream)
>>>      return stream->pinned_ctx;
>>>  }
>>> +static int
>>> +__store_reg_to_mem(struct i915_request *rq, i915_reg_t reg, u32 
>>> ggtt_offset)
>>> +{
>>> +    u32 *cs, cmd;
>>> +
>>> +    cmd = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT;
>>> +    if (GRAPHICS_VER(rq->engine->i915) >= 8)
>>> +        cmd++;
>>> +
>>> +    cs = intel_ring_begin(rq, 4);
>>> +    if (IS_ERR(cs))
>>> +        return PTR_ERR(cs);
>>> +
>>> +    *cs++ = cmd;
>>> +    *cs++ = i915_mmio_reg_offset(reg);
>>> +    *cs++ = ggtt_offset;
>>> +    *cs++ = 0;
>>> +
>>> +    intel_ring_advance(rq, cs);
>>> +
>>> +    return 0;
>>> +}
>>> +
>>> +static int
>>> +__read_reg(struct intel_context *ce, i915_reg_t reg, u32 ggtt_offset)
>>> +{
>>> +    struct i915_request *rq;
>>> +    int err;
>>> +
>>> +    rq = i915_request_create(ce);
>>> +    if (IS_ERR(rq))
>>> +        return PTR_ERR(rq);
>>> +
>>> +    i915_request_get(rq);
>>> +
>>> +    err = __store_reg_to_mem(rq, reg, ggtt_offset);
>>> +
>>> +    i915_request_add(rq);
>>> +    if (!err && i915_request_wait(rq, 0, HZ / 2) < 0)
>>> +        err = -ETIME;
>>> +
>>> +    i915_request_put(rq);
>>> +
>>> +    return err;
>>> +}
>>> +
>>> +static int
>>> +gen12_guc_sw_ctx_id(struct intel_context *ce, u32 *ctx_id)
>>> +{
>>> +    struct i915_vma *scratch;
>>> +    u32 *val;
>>> +    int err;
>>> +
>>> +    scratch = 
>>> __vm_create_scratch_for_read_pinned(&ce->engine->gt->ggtt->vm, 4);
>>> +    if (IS_ERR(scratch))
>>> +        return PTR_ERR(scratch);
>>> +
>>> +    err = i915_vma_sync(scratch);
>>> +    if (err)
>>> +        goto err_scratch;
>>> +
>>> +    err = __read_reg(ce, 
>>> RING_EXECLIST_STATUS_HI(ce->engine->mmio_base),
>>> +             i915_ggtt_offset(scratch));
>>> +    if (err)
>>> +        goto err_scratch;
>>> +
>>> +    val = i915_gem_object_pin_map_unlocked(scratch->obj, I915_MAP_WB);
>>> +    if (IS_ERR(val)) {
>>> +        err = PTR_ERR(val);
>>> +        goto err_scratch;
>>> +    }
>>> +
>>> +    *ctx_id = *val;
>>> +    i915_gem_object_unpin_map(scratch->obj);
>>> +
>>> +err_scratch:
>>> +    i915_vma_unpin_and_release(&scratch, 0);
>>> +    return err;
>>> +}
>>> +
>>> +/*
>>> + * For execlist mode of submission, pick an unused context id
>>> + * 0 - (NUM_CONTEXT_TAG -1) are used by other contexts
>>> + * XXX_MAX_CONTEXT_HW_ID is used by idle context
>>> + *
>>> + * For GuC mode of submission read context id from the upper dword 
>>> of the
>>> + * EXECLIST_STATUS register.
>>> + */
>>> +static int gen12_get_render_context_id(struct i915_perf_stream 
>>> *stream)
>>> +{
>>> +    u32 ctx_id, mask;
>>> +    int ret;
>>> +
>>> +    if (intel_engine_uses_guc(stream->engine)) {
>>> +        ret = gen12_guc_sw_ctx_id(stream->pinned_ctx, &ctx_id);
>>> +        if (ret)
>>> +            return ret;
>>> +
>>> +        mask = ((1U << GEN12_GUC_SW_CTX_ID_WIDTH) - 1) <<
>>> +            (GEN12_GUC_SW_CTX_ID_SHIFT - 32);
>>> +    } else if (GRAPHICS_VER_FULL(stream->engine->i915) >= 
>>> IP_VER(12, 50)) {
>>> +        ctx_id = (XEHP_MAX_CONTEXT_HW_ID - 1) <<
>>> +            (XEHP_SW_CTX_ID_SHIFT - 32);
>>> +
>>> +        mask = ((1U << XEHP_SW_CTX_ID_WIDTH) - 1) <<
>>> +            (XEHP_SW_CTX_ID_SHIFT - 32);
>>> +    } else {
>>> +        ctx_id = (GEN12_MAX_CONTEXT_HW_ID - 1) <<
>>> +             (GEN11_SW_CTX_ID_SHIFT - 32);
>>> +
>>> +        mask = ((1U << GEN11_SW_CTX_ID_WIDTH) - 1) <<
>>> +            (GEN11_SW_CTX_ID_SHIFT - 32);
>>> +    }
>>> +    stream->specific_ctx_id = ctx_id & mask;
>>> +    stream->specific_ctx_id_mask = mask;
>>> +
>>> +    return 0;
>>> +}
>>> +
>>>  /**
>>>   * oa_get_render_ctx_id - determine and hold ctx hw id
>>>   * @stream: An i915-perf stream opened for OA metrics
>>> @@ -1246,6 +1365,7 @@ static struct intel_context 
>>> *oa_pin_context(struct i915_perf_stream *stream)
>>>  static int oa_get_render_ctx_id(struct i915_perf_stream *stream)
>>>  {
>>>      struct intel_context *ce;
>>> +    int ret = 0;
>>>      ce = oa_pin_context(stream);
>>>      if (IS_ERR(ce))
>>> @@ -1292,24 +1412,7 @@ static int oa_get_render_ctx_id(struct 
>>> i915_perf_stream *stream)
>>>      case 11:
>>>      case 12:
>>> -        if (GRAPHICS_VER_FULL(ce->engine->i915) >= IP_VER(12, 50)) {
>>> -            stream->specific_ctx_id_mask =
>>> -                ((1U << XEHP_SW_CTX_ID_WIDTH) - 1) <<
>>> -                (XEHP_SW_CTX_ID_SHIFT - 32);
>>> -            stream->specific_ctx_id =
>>> -                (XEHP_MAX_CONTEXT_HW_ID - 1) <<
>>> -                (XEHP_SW_CTX_ID_SHIFT - 32);
>>> -        } else {
>>> -            stream->specific_ctx_id_mask =
>>> -                ((1U << GEN11_SW_CTX_ID_WIDTH) - 1) << 
>>> (GEN11_SW_CTX_ID_SHIFT - 32);
>>> -            /*
>>> -             * Pick an unused context id
>>> -             * 0 - BITS_PER_LONG are used by other contexts
>>> -             * GEN12_MAX_CONTEXT_HW_ID (0x7ff) is used by idle context
>>> -             */
>>> -            stream->specific_ctx_id =
>>> -                (GEN12_MAX_CONTEXT_HW_ID - 1) << 
>>> (GEN11_SW_CTX_ID_SHIFT - 32);
>>> -        }
>>> +        ret = gen12_get_render_context_id(stream);
>>>          break;
>>>      default:
>>> @@ -1323,7 +1426,7 @@ static int oa_get_render_ctx_id(struct 
>>> i915_perf_stream *stream)
>>>          stream->specific_ctx_id,
>>>          stream->specific_ctx_id_mask);
>>> -    return 0;
>>> +    return ret;
>>>  }
>>>  /**
>>
>>
Dixit, Ashutosh Sept. 9, 2022, 11:47 p.m. UTC | #4
On Tue, 23 Aug 2022 13:41:37 -0700, Umesh Nerlige Ramappa wrote:
>

Hi Umesh,

> With GuC mode of submission, GuC is in control of defining the context id field
> that is part of the OA reports. To filter reports, UMD and KMD must know what sw
> context id was chosen by GuC. There is not interface between KMD and GuC to
> determine this, so read the upper-dword of EXECLIST_STATUS to filter/squash OA
> reports for the specific context.

Do you think it is worth defining an interface for GuC to return the sw
ctx_id it will be using for a ctx, say at ctx registration time?

The scheme implemented in this patch to read the ctx_id is certainly very
clever, at least to me. But as Lionel was saying is it a agreed upon
immutable interface? If it is, we can go with this patch.

(Though even then we will need to maintain this code even if in the future
GuC FW is changed to return the ctx_id in order to preserve backwards
comptability with previous GuC versions. So maybe better to have a real
interface between GuC and KMD earlier rather than later?).

Also a couple of general comments below.

>
> Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
> ---
>  drivers/gpu/drm/i915/gt/intel_lrc.h |   2 +
>  drivers/gpu/drm/i915/i915_perf.c    | 141 ++++++++++++++++++++++++----
>  2 files changed, 124 insertions(+), 19 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.h b/drivers/gpu/drm/i915/gt/intel_lrc.h
> index a390f0813c8b..7111bae759f3 100644
> --- a/drivers/gpu/drm/i915/gt/intel_lrc.h
> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.h
> @@ -110,6 +110,8 @@ enum {
>  #define XEHP_SW_CTX_ID_WIDTH			16
>  #define XEHP_SW_COUNTER_SHIFT			58
>  #define XEHP_SW_COUNTER_WIDTH			6
> +#define GEN12_GUC_SW_CTX_ID_SHIFT		39
> +#define GEN12_GUC_SW_CTX_ID_WIDTH		16
>
>  static inline void lrc_runtime_start(struct intel_context *ce)
>  {
> diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
> index f3c23fe9ad9c..735244a3aedd 100644
> --- a/drivers/gpu/drm/i915/i915_perf.c
> +++ b/drivers/gpu/drm/i915/i915_perf.c
> @@ -1233,6 +1233,125 @@ static struct intel_context *oa_pin_context(struct i915_perf_stream *stream)
>	return stream->pinned_ctx;
>  }
>
> +static int
> +__store_reg_to_mem(struct i915_request *rq, i915_reg_t reg, u32 ggtt_offset)
> +{
> +	u32 *cs, cmd;
> +
> +	cmd = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT;
> +	if (GRAPHICS_VER(rq->engine->i915) >= 8)
> +		cmd++;
> +
> +	cs = intel_ring_begin(rq, 4);
> +	if (IS_ERR(cs))
> +		return PTR_ERR(cs);
> +
> +	*cs++ = cmd;
> +	*cs++ = i915_mmio_reg_offset(reg);
> +	*cs++ = ggtt_offset;
> +	*cs++ = 0;
> +
> +	intel_ring_advance(rq, cs);
> +
> +	return 0;
> +}
> +
> +static int
> +__read_reg(struct intel_context *ce, i915_reg_t reg, u32 ggtt_offset)
> +{
> +	struct i915_request *rq;
> +	int err;
> +
> +	rq = i915_request_create(ce);
> +	if (IS_ERR(rq))
> +		return PTR_ERR(rq);
> +
> +	i915_request_get(rq);
> +
> +	err = __store_reg_to_mem(rq, reg, ggtt_offset);
> +
> +	i915_request_add(rq);
> +	if (!err && i915_request_wait(rq, 0, HZ / 2) < 0)
> +		err = -ETIME;
> +
> +	i915_request_put(rq);
> +
> +	return err;
> +}
> +
> +static int
> +gen12_guc_sw_ctx_id(struct intel_context *ce, u32 *ctx_id)
> +{
> +	struct i915_vma *scratch;
> +	u32 *val;
> +	int err;
> +
> +	scratch = __vm_create_scratch_for_read_pinned(&ce->engine->gt->ggtt->vm, 4);
> +	if (IS_ERR(scratch))
> +		return PTR_ERR(scratch);
> +
> +	err = i915_vma_sync(scratch);
> +	if (err)
> +		goto err_scratch;
> +
> +	err = __read_reg(ce, RING_EXECLIST_STATUS_HI(ce->engine->mmio_base),
> +			 i915_ggtt_offset(scratch));

Actually the RING_EXECLIST_STATUS_HI is MMIO so can be read using say
ENGINE_READ/intel_uncore_read. The only issue is how to read it when this
ctx is scheduled which is cleverly solved by the scheme above. But I am not
sure if there is any other simpler way to do it.

> +	if (err)
> +		goto err_scratch;
> +
> +	val = i915_gem_object_pin_map_unlocked(scratch->obj, I915_MAP_WB);
> +	if (IS_ERR(val)) {
> +		err = PTR_ERR(val);
> +		goto err_scratch;
> +	}
> +
> +	*ctx_id = *val;
> +	i915_gem_object_unpin_map(scratch->obj);
> +
> +err_scratch:
> +	i915_vma_unpin_and_release(&scratch, 0);
> +	return err;
> +}
> +
> +/*
> + * For execlist mode of submission, pick an unused context id
> + * 0 - (NUM_CONTEXT_TAG -1) are used by other contexts
> + * XXX_MAX_CONTEXT_HW_ID is used by idle context
> + *
> + * For GuC mode of submission read context id from the upper dword of the
> + * EXECLIST_STATUS register.
> + */
> +static int gen12_get_render_context_id(struct i915_perf_stream *stream)
> +{
> +	u32 ctx_id, mask;
> +	int ret;
> +
> +	if (intel_engine_uses_guc(stream->engine)) {
> +		ret = gen12_guc_sw_ctx_id(stream->pinned_ctx, &ctx_id);
> +		if (ret)
> +			return ret;
> +
> +		mask = ((1U << GEN12_GUC_SW_CTX_ID_WIDTH) - 1) <<
> +			(GEN12_GUC_SW_CTX_ID_SHIFT - 32);
> +	} else if (GRAPHICS_VER_FULL(stream->engine->i915) >= IP_VER(12, 50)) {
> +		ctx_id = (XEHP_MAX_CONTEXT_HW_ID - 1) <<
> +			(XEHP_SW_CTX_ID_SHIFT - 32);
> +
> +		mask = ((1U << XEHP_SW_CTX_ID_WIDTH) - 1) <<
> +			(XEHP_SW_CTX_ID_SHIFT - 32);
> +	} else {
> +		ctx_id = (GEN12_MAX_CONTEXT_HW_ID - 1) <<
> +			 (GEN11_SW_CTX_ID_SHIFT - 32);
> +
> +		mask = ((1U << GEN11_SW_CTX_ID_WIDTH) - 1) <<
> +			(GEN11_SW_CTX_ID_SHIFT - 32);

Previously I missed that these ctx_id's for non-GuC cases are just
constants. How does it work in these cases?

Thanks.
--
Ashutosh
Dixit, Ashutosh Sept. 13, 2022, 3:08 a.m. UTC | #5
On Fri, 09 Sep 2022 16:47:36 -0700, Dixit, Ashutosh wrote:
>
> On Tue, 23 Aug 2022 13:41:37 -0700, Umesh Nerlige Ramappa wrote:
> >
>
> Hi Umesh,
>
> > With GuC mode of submission, GuC is in control of defining the context id field
> > that is part of the OA reports. To filter reports, UMD and KMD must know what sw
> > context id was chosen by GuC. There is not interface between KMD and GuC to
> > determine this, so read the upper-dword of EXECLIST_STATUS to filter/squash OA
> > reports for the specific context.
>
> Do you think it is worth defining an interface for GuC to return the sw
> ctx_id it will be using for a ctx, say at ctx registration time?

Umesh, I came across these in GuC documentation:

guc_pcv1_context_parameters_set_h2g_data_t::context_id
guc_pcv2_context_parameters_set_h2g_data_t::context_id

Also in the code we have in prepare_context_registration_info_v70 'ctx_id =
ce->guc_id.id' which seems to be assigned in new_guc_id. So wondering if
this is what we need and we already have it?

Thanks.
--
Ashutosh
Umesh Nerlige Ramappa Sept. 14, 2022, 10:26 p.m. UTC | #6
On Tue, Sep 06, 2022 at 09:39:33PM +0300, Lionel Landwerlin wrote:
>On 06/09/2022 20:39, Umesh Nerlige Ramappa wrote:
>>On Tue, Sep 06, 2022 at 05:33:00PM +0300, Lionel Landwerlin wrote:
>>>On 23/08/2022 23:41, Umesh Nerlige Ramappa wrote:
>>>>With GuC mode of submission, GuC is in control of defining the 
>>>>context id field
>>>>that is part of the OA reports. To filter reports, UMD and KMD 
>>>>must know what sw
>>>>context id was chosen by GuC. There is not interface between KMD 
>>>>and GuC to
>>>>determine this, so read the upper-dword of EXECLIST_STATUS to 
>>>>filter/squash OA
>>>>reports for the specific context.
>>>>
>>>>Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
>>>
>>>
>>>I assume you checked with GuC that this doesn't change as the 
>>>context is running?
>>
>>Correct.
>>
>>>
>>>With i915/execlist submission mode, we had to ask i915 to pin the 
>>>sw_id/ctx_id.
>>>
>>
>>From GuC perspective, the context id can change once KMD 
>>de-registers the context and that will not happen while the context 
>>is in use.
>>
>>Thanks,
>>Umesh
>
>
>Thanks Umesh,
>
>
>Maybe I should have been more precise in my question :
>
>
>Can the ID change while the i915-perf stream is opened?
>
>Because the ID not changing while the context is running makes sense.
>
>But since the number of available IDs is limited to 2k or something on 
>Gfx12, it's possible the GuC has to reuse IDs if too many apps want to 
>run during the period of time while i915-perf is active and filtering.
>

available guc ids are 64k with 4k reserved for multi-lrc, so GuC may 
have to reuse ids once 60k ids are used up.

Thanks,
Umesh

>
>-Lionel
>
>
>>
>>>
>>>If that's not the case then filtering is broken.
>>>
>>>
>>>-Lionel
>>>
>>>
>>>>---
>>>> drivers/gpu/drm/i915/gt/intel_lrc.h |   2 +
>>>> drivers/gpu/drm/i915/i915_perf.c    | 141 ++++++++++++++++++++++++----
>>>> 2 files changed, 124 insertions(+), 19 deletions(-)
>>>>
>>>>diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.h 
>>>>b/drivers/gpu/drm/i915/gt/intel_lrc.h
>>>>index a390f0813c8b..7111bae759f3 100644
>>>>--- a/drivers/gpu/drm/i915/gt/intel_lrc.h
>>>>+++ b/drivers/gpu/drm/i915/gt/intel_lrc.h
>>>>@@ -110,6 +110,8 @@ enum {
>>>> #define XEHP_SW_CTX_ID_WIDTH            16
>>>> #define XEHP_SW_COUNTER_SHIFT            58
>>>> #define XEHP_SW_COUNTER_WIDTH            6
>>>>+#define GEN12_GUC_SW_CTX_ID_SHIFT        39
>>>>+#define GEN12_GUC_SW_CTX_ID_WIDTH        16
>>>> static inline void lrc_runtime_start(struct intel_context *ce)
>>>> {
>>>>diff --git a/drivers/gpu/drm/i915/i915_perf.c 
>>>>b/drivers/gpu/drm/i915/i915_perf.c
>>>>index f3c23fe9ad9c..735244a3aedd 100644
>>>>--- a/drivers/gpu/drm/i915/i915_perf.c
>>>>+++ b/drivers/gpu/drm/i915/i915_perf.c
>>>>@@ -1233,6 +1233,125 @@ static struct intel_context 
>>>>*oa_pin_context(struct i915_perf_stream *stream)
>>>>     return stream->pinned_ctx;
>>>> }
>>>>+static int
>>>>+__store_reg_to_mem(struct i915_request *rq, i915_reg_t reg, u32 
>>>>ggtt_offset)
>>>>+{
>>>>+    u32 *cs, cmd;
>>>>+
>>>>+    cmd = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT;
>>>>+    if (GRAPHICS_VER(rq->engine->i915) >= 8)
>>>>+        cmd++;
>>>>+
>>>>+    cs = intel_ring_begin(rq, 4);
>>>>+    if (IS_ERR(cs))
>>>>+        return PTR_ERR(cs);
>>>>+
>>>>+    *cs++ = cmd;
>>>>+    *cs++ = i915_mmio_reg_offset(reg);
>>>>+    *cs++ = ggtt_offset;
>>>>+    *cs++ = 0;
>>>>+
>>>>+    intel_ring_advance(rq, cs);
>>>>+
>>>>+    return 0;
>>>>+}
>>>>+
>>>>+static int
>>>>+__read_reg(struct intel_context *ce, i915_reg_t reg, u32 ggtt_offset)
>>>>+{
>>>>+    struct i915_request *rq;
>>>>+    int err;
>>>>+
>>>>+    rq = i915_request_create(ce);
>>>>+    if (IS_ERR(rq))
>>>>+        return PTR_ERR(rq);
>>>>+
>>>>+    i915_request_get(rq);
>>>>+
>>>>+    err = __store_reg_to_mem(rq, reg, ggtt_offset);
>>>>+
>>>>+    i915_request_add(rq);
>>>>+    if (!err && i915_request_wait(rq, 0, HZ / 2) < 0)
>>>>+        err = -ETIME;
>>>>+
>>>>+    i915_request_put(rq);
>>>>+
>>>>+    return err;
>>>>+}
>>>>+
>>>>+static int
>>>>+gen12_guc_sw_ctx_id(struct intel_context *ce, u32 *ctx_id)
>>>>+{
>>>>+    struct i915_vma *scratch;
>>>>+    u32 *val;
>>>>+    int err;
>>>>+
>>>>+    scratch = 
>>>>__vm_create_scratch_for_read_pinned(&ce->engine->gt->ggtt->vm, 
>>>>4);
>>>>+    if (IS_ERR(scratch))
>>>>+        return PTR_ERR(scratch);
>>>>+
>>>>+    err = i915_vma_sync(scratch);
>>>>+    if (err)
>>>>+        goto err_scratch;
>>>>+
>>>>+    err = __read_reg(ce, 
>>>>RING_EXECLIST_STATUS_HI(ce->engine->mmio_base),
>>>>+             i915_ggtt_offset(scratch));
>>>>+    if (err)
>>>>+        goto err_scratch;
>>>>+
>>>>+    val = i915_gem_object_pin_map_unlocked(scratch->obj, I915_MAP_WB);
>>>>+    if (IS_ERR(val)) {
>>>>+        err = PTR_ERR(val);
>>>>+        goto err_scratch;
>>>>+    }
>>>>+
>>>>+    *ctx_id = *val;
>>>>+    i915_gem_object_unpin_map(scratch->obj);
>>>>+
>>>>+err_scratch:
>>>>+    i915_vma_unpin_and_release(&scratch, 0);
>>>>+    return err;
>>>>+}
>>>>+
>>>>+/*
>>>>+ * For execlist mode of submission, pick an unused context id
>>>>+ * 0 - (NUM_CONTEXT_TAG -1) are used by other contexts
>>>>+ * XXX_MAX_CONTEXT_HW_ID is used by idle context
>>>>+ *
>>>>+ * For GuC mode of submission read context id from the upper 
>>>>dword of the
>>>>+ * EXECLIST_STATUS register.
>>>>+ */
>>>>+static int gen12_get_render_context_id(struct i915_perf_stream 
>>>>*stream)
>>>>+{
>>>>+    u32 ctx_id, mask;
>>>>+    int ret;
>>>>+
>>>>+    if (intel_engine_uses_guc(stream->engine)) {
>>>>+        ret = gen12_guc_sw_ctx_id(stream->pinned_ctx, &ctx_id);
>>>>+        if (ret)
>>>>+            return ret;
>>>>+
>>>>+        mask = ((1U << GEN12_GUC_SW_CTX_ID_WIDTH) - 1) <<
>>>>+            (GEN12_GUC_SW_CTX_ID_SHIFT - 32);
>>>>+    } else if (GRAPHICS_VER_FULL(stream->engine->i915) >= 
>>>>IP_VER(12, 50)) {
>>>>+        ctx_id = (XEHP_MAX_CONTEXT_HW_ID - 1) <<
>>>>+            (XEHP_SW_CTX_ID_SHIFT - 32);
>>>>+
>>>>+        mask = ((1U << XEHP_SW_CTX_ID_WIDTH) - 1) <<
>>>>+            (XEHP_SW_CTX_ID_SHIFT - 32);
>>>>+    } else {
>>>>+        ctx_id = (GEN12_MAX_CONTEXT_HW_ID - 1) <<
>>>>+             (GEN11_SW_CTX_ID_SHIFT - 32);
>>>>+
>>>>+        mask = ((1U << GEN11_SW_CTX_ID_WIDTH) - 1) <<
>>>>+            (GEN11_SW_CTX_ID_SHIFT - 32);
>>>>+    }
>>>>+    stream->specific_ctx_id = ctx_id & mask;
>>>>+    stream->specific_ctx_id_mask = mask;
>>>>+
>>>>+    return 0;
>>>>+}
>>>>+
>>>> /**
>>>>  * oa_get_render_ctx_id - determine and hold ctx hw id
>>>>  * @stream: An i915-perf stream opened for OA metrics
>>>>@@ -1246,6 +1365,7 @@ static struct intel_context 
>>>>*oa_pin_context(struct i915_perf_stream *stream)
>>>> static int oa_get_render_ctx_id(struct i915_perf_stream *stream)
>>>> {
>>>>     struct intel_context *ce;
>>>>+    int ret = 0;
>>>>     ce = oa_pin_context(stream);
>>>>     if (IS_ERR(ce))
>>>>@@ -1292,24 +1412,7 @@ static int oa_get_render_ctx_id(struct 
>>>>i915_perf_stream *stream)
>>>>     case 11:
>>>>     case 12:
>>>>-        if (GRAPHICS_VER_FULL(ce->engine->i915) >= IP_VER(12, 50)) {
>>>>-            stream->specific_ctx_id_mask =
>>>>-                ((1U << XEHP_SW_CTX_ID_WIDTH) - 1) <<
>>>>-                (XEHP_SW_CTX_ID_SHIFT - 32);
>>>>-            stream->specific_ctx_id =
>>>>-                (XEHP_MAX_CONTEXT_HW_ID - 1) <<
>>>>-                (XEHP_SW_CTX_ID_SHIFT - 32);
>>>>-        } else {
>>>>-            stream->specific_ctx_id_mask =
>>>>-                ((1U << GEN11_SW_CTX_ID_WIDTH) - 1) << 
>>>>(GEN11_SW_CTX_ID_SHIFT - 32);
>>>>-            /*
>>>>-             * Pick an unused context id
>>>>-             * 0 - BITS_PER_LONG are used by other contexts
>>>>-             * GEN12_MAX_CONTEXT_HW_ID (0x7ff) is used by idle context
>>>>-             */
>>>>-            stream->specific_ctx_id =
>>>>-                (GEN12_MAX_CONTEXT_HW_ID - 1) << 
>>>>(GEN11_SW_CTX_ID_SHIFT - 32);
>>>>-        }
>>>>+        ret = gen12_get_render_context_id(stream);
>>>>         break;
>>>>     default:
>>>>@@ -1323,7 +1426,7 @@ static int oa_get_render_ctx_id(struct 
>>>>i915_perf_stream *stream)
>>>>         stream->specific_ctx_id,
>>>>         stream->specific_ctx_id_mask);
>>>>-    return 0;
>>>>+    return ret;
>>>> }
>>>> /**
>>>
>>>
>
Umesh Nerlige Ramappa Sept. 14, 2022, 11:13 p.m. UTC | #7
On Wed, Sep 14, 2022 at 03:26:15PM -0700, Umesh Nerlige Ramappa wrote:
>On Tue, Sep 06, 2022 at 09:39:33PM +0300, Lionel Landwerlin wrote:
>>On 06/09/2022 20:39, Umesh Nerlige Ramappa wrote:
>>>On Tue, Sep 06, 2022 at 05:33:00PM +0300, Lionel Landwerlin wrote:
>>>>On 23/08/2022 23:41, Umesh Nerlige Ramappa wrote:
>>>>>With GuC mode of submission, GuC is in control of defining the 
>>>>>context id field
>>>>>that is part of the OA reports. To filter reports, UMD and KMD 
>>>>>must know what sw
>>>>>context id was chosen by GuC. There is not interface between 
>>>>>KMD and GuC to
>>>>>determine this, so read the upper-dword of EXECLIST_STATUS to 
>>>>>filter/squash OA
>>>>>reports for the specific context.
>>>>>
>>>>>Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
>>>>
>>>>
>>>>I assume you checked with GuC that this doesn't change as the 
>>>>context is running?
>>>
>>>Correct.
>>>
>>>>
>>>>With i915/execlist submission mode, we had to ask i915 to pin 
>>>>the sw_id/ctx_id.
>>>>
>>>
>>>From GuC perspective, the context id can change once KMD 
>>>de-registers the context and that will not happen while the 
>>>context is in use.
>>>
>>>Thanks,
>>>Umesh
>>
>>
>>Thanks Umesh,
>>
>>
>>Maybe I should have been more precise in my question :
>>
>>
>>Can the ID change while the i915-perf stream is opened?
>>
>>Because the ID not changing while the context is running makes sense.
>>
>>But since the number of available IDs is limited to 2k or something 
>>on Gfx12, it's possible the GuC has to reuse IDs if too many apps 
>>want to run during the period of time while i915-perf is active and 
>>filtering.
>>
>
>available guc ids are 64k with 4k reserved for multi-lrc, so GuC may 
>have to reuse ids once 60k ids are used up.

Spoke to the GuC team again and if there are a lot of contexts (> 60K) 
running, there is a possibility of the context id being recycled. In 
that case, the capture would be broken. I would track this as a separate 
JIRA and follow up on a solution.

 From OA use case perspective, are we interested in monitoring just one 
hardware context? If we make sure this context is not stolen, are we 
good? 

Thanks,
Umesh

>
>Thanks,
>Umesh
>
>>
>>-Lionel
>>
>>
>>>
>>>>
>>>>If that's not the case then filtering is broken.
>>>>
>>>>
>>>>-Lionel
>>>>
>>>>
>>>>>---
>>>>> drivers/gpu/drm/i915/gt/intel_lrc.h |   2 +
>>>>> drivers/gpu/drm/i915/i915_perf.c    | 141 ++++++++++++++++++++++++----
>>>>> 2 files changed, 124 insertions(+), 19 deletions(-)
>>>>>
>>>>>diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.h 
>>>>>b/drivers/gpu/drm/i915/gt/intel_lrc.h
>>>>>index a390f0813c8b..7111bae759f3 100644
>>>>>--- a/drivers/gpu/drm/i915/gt/intel_lrc.h
>>>>>+++ b/drivers/gpu/drm/i915/gt/intel_lrc.h
>>>>>@@ -110,6 +110,8 @@ enum {
>>>>> #define XEHP_SW_CTX_ID_WIDTH            16
>>>>> #define XEHP_SW_COUNTER_SHIFT            58
>>>>> #define XEHP_SW_COUNTER_WIDTH            6
>>>>>+#define GEN12_GUC_SW_CTX_ID_SHIFT        39
>>>>>+#define GEN12_GUC_SW_CTX_ID_WIDTH        16
>>>>> static inline void lrc_runtime_start(struct intel_context *ce)
>>>>> {
>>>>>diff --git a/drivers/gpu/drm/i915/i915_perf.c 
>>>>>b/drivers/gpu/drm/i915/i915_perf.c
>>>>>index f3c23fe9ad9c..735244a3aedd 100644
>>>>>--- a/drivers/gpu/drm/i915/i915_perf.c
>>>>>+++ b/drivers/gpu/drm/i915/i915_perf.c
>>>>>@@ -1233,6 +1233,125 @@ static struct intel_context 
>>>>>*oa_pin_context(struct i915_perf_stream *stream)
>>>>>     return stream->pinned_ctx;
>>>>> }
>>>>>+static int
>>>>>+__store_reg_to_mem(struct i915_request *rq, i915_reg_t reg, 
>>>>>u32 ggtt_offset)
>>>>>+{
>>>>>+    u32 *cs, cmd;
>>>>>+
>>>>>+    cmd = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT;
>>>>>+    if (GRAPHICS_VER(rq->engine->i915) >= 8)
>>>>>+        cmd++;
>>>>>+
>>>>>+    cs = intel_ring_begin(rq, 4);
>>>>>+    if (IS_ERR(cs))
>>>>>+        return PTR_ERR(cs);
>>>>>+
>>>>>+    *cs++ = cmd;
>>>>>+    *cs++ = i915_mmio_reg_offset(reg);
>>>>>+    *cs++ = ggtt_offset;
>>>>>+    *cs++ = 0;
>>>>>+
>>>>>+    intel_ring_advance(rq, cs);
>>>>>+
>>>>>+    return 0;
>>>>>+}
>>>>>+
>>>>>+static int
>>>>>+__read_reg(struct intel_context *ce, i915_reg_t reg, u32 ggtt_offset)
>>>>>+{
>>>>>+    struct i915_request *rq;
>>>>>+    int err;
>>>>>+
>>>>>+    rq = i915_request_create(ce);
>>>>>+    if (IS_ERR(rq))
>>>>>+        return PTR_ERR(rq);
>>>>>+
>>>>>+    i915_request_get(rq);
>>>>>+
>>>>>+    err = __store_reg_to_mem(rq, reg, ggtt_offset);
>>>>>+
>>>>>+    i915_request_add(rq);
>>>>>+    if (!err && i915_request_wait(rq, 0, HZ / 2) < 0)
>>>>>+        err = -ETIME;
>>>>>+
>>>>>+    i915_request_put(rq);
>>>>>+
>>>>>+    return err;
>>>>>+}
>>>>>+
>>>>>+static int
>>>>>+gen12_guc_sw_ctx_id(struct intel_context *ce, u32 *ctx_id)
>>>>>+{
>>>>>+    struct i915_vma *scratch;
>>>>>+    u32 *val;
>>>>>+    int err;
>>>>>+
>>>>>+    scratch = 
>>>>>__vm_create_scratch_for_read_pinned(&ce->engine->gt->ggtt->vm, 
>>>>>4);
>>>>>+    if (IS_ERR(scratch))
>>>>>+        return PTR_ERR(scratch);
>>>>>+
>>>>>+    err = i915_vma_sync(scratch);
>>>>>+    if (err)
>>>>>+        goto err_scratch;
>>>>>+
>>>>>+    err = __read_reg(ce, 
>>>>>RING_EXECLIST_STATUS_HI(ce->engine->mmio_base),
>>>>>+             i915_ggtt_offset(scratch));
>>>>>+    if (err)
>>>>>+        goto err_scratch;
>>>>>+
>>>>>+    val = i915_gem_object_pin_map_unlocked(scratch->obj, I915_MAP_WB);
>>>>>+    if (IS_ERR(val)) {
>>>>>+        err = PTR_ERR(val);
>>>>>+        goto err_scratch;
>>>>>+    }
>>>>>+
>>>>>+    *ctx_id = *val;
>>>>>+    i915_gem_object_unpin_map(scratch->obj);
>>>>>+
>>>>>+err_scratch:
>>>>>+    i915_vma_unpin_and_release(&scratch, 0);
>>>>>+    return err;
>>>>>+}
>>>>>+
>>>>>+/*
>>>>>+ * For execlist mode of submission, pick an unused context id
>>>>>+ * 0 - (NUM_CONTEXT_TAG -1) are used by other contexts
>>>>>+ * XXX_MAX_CONTEXT_HW_ID is used by idle context
>>>>>+ *
>>>>>+ * For GuC mode of submission read context id from the upper 
>>>>>dword of the
>>>>>+ * EXECLIST_STATUS register.
>>>>>+ */
>>>>>+static int gen12_get_render_context_id(struct 
>>>>>i915_perf_stream *stream)
>>>>>+{
>>>>>+    u32 ctx_id, mask;
>>>>>+    int ret;
>>>>>+
>>>>>+    if (intel_engine_uses_guc(stream->engine)) {
>>>>>+        ret = gen12_guc_sw_ctx_id(stream->pinned_ctx, &ctx_id);
>>>>>+        if (ret)
>>>>>+            return ret;
>>>>>+
>>>>>+        mask = ((1U << GEN12_GUC_SW_CTX_ID_WIDTH) - 1) <<
>>>>>+            (GEN12_GUC_SW_CTX_ID_SHIFT - 32);
>>>>>+    } else if (GRAPHICS_VER_FULL(stream->engine->i915) >= 
>>>>>IP_VER(12, 50)) {
>>>>>+        ctx_id = (XEHP_MAX_CONTEXT_HW_ID - 1) <<
>>>>>+            (XEHP_SW_CTX_ID_SHIFT - 32);
>>>>>+
>>>>>+        mask = ((1U << XEHP_SW_CTX_ID_WIDTH) - 1) <<
>>>>>+            (XEHP_SW_CTX_ID_SHIFT - 32);
>>>>>+    } else {
>>>>>+        ctx_id = (GEN12_MAX_CONTEXT_HW_ID - 1) <<
>>>>>+             (GEN11_SW_CTX_ID_SHIFT - 32);
>>>>>+
>>>>>+        mask = ((1U << GEN11_SW_CTX_ID_WIDTH) - 1) <<
>>>>>+            (GEN11_SW_CTX_ID_SHIFT - 32);
>>>>>+    }
>>>>>+    stream->specific_ctx_id = ctx_id & mask;
>>>>>+    stream->specific_ctx_id_mask = mask;
>>>>>+
>>>>>+    return 0;
>>>>>+}
>>>>>+
>>>>> /**
>>>>>  * oa_get_render_ctx_id - determine and hold ctx hw id
>>>>>  * @stream: An i915-perf stream opened for OA metrics
>>>>>@@ -1246,6 +1365,7 @@ static struct intel_context 
>>>>>*oa_pin_context(struct i915_perf_stream *stream)
>>>>> static int oa_get_render_ctx_id(struct i915_perf_stream *stream)
>>>>> {
>>>>>     struct intel_context *ce;
>>>>>+    int ret = 0;
>>>>>     ce = oa_pin_context(stream);
>>>>>     if (IS_ERR(ce))
>>>>>@@ -1292,24 +1412,7 @@ static int oa_get_render_ctx_id(struct 
>>>>>i915_perf_stream *stream)
>>>>>     case 11:
>>>>>     case 12:
>>>>>-        if (GRAPHICS_VER_FULL(ce->engine->i915) >= IP_VER(12, 50)) {
>>>>>-            stream->specific_ctx_id_mask =
>>>>>-                ((1U << XEHP_SW_CTX_ID_WIDTH) - 1) <<
>>>>>-                (XEHP_SW_CTX_ID_SHIFT - 32);
>>>>>-            stream->specific_ctx_id =
>>>>>-                (XEHP_MAX_CONTEXT_HW_ID - 1) <<
>>>>>-                (XEHP_SW_CTX_ID_SHIFT - 32);
>>>>>-        } else {
>>>>>-            stream->specific_ctx_id_mask =
>>>>>-                ((1U << GEN11_SW_CTX_ID_WIDTH) - 1) << 
>>>>>(GEN11_SW_CTX_ID_SHIFT - 32);
>>>>>-            /*
>>>>>-             * Pick an unused context id
>>>>>-             * 0 - BITS_PER_LONG are used by other contexts
>>>>>-             * GEN12_MAX_CONTEXT_HW_ID (0x7ff) is used by idle context
>>>>>-             */
>>>>>-            stream->specific_ctx_id =
>>>>>-                (GEN12_MAX_CONTEXT_HW_ID - 1) << 
>>>>>(GEN11_SW_CTX_ID_SHIFT - 32);
>>>>>-        }
>>>>>+        ret = gen12_get_render_context_id(stream);
>>>>>         break;
>>>>>     default:
>>>>>@@ -1323,7 +1426,7 @@ static int oa_get_render_ctx_id(struct 
>>>>>i915_perf_stream *stream)
>>>>>         stream->specific_ctx_id,
>>>>>         stream->specific_ctx_id_mask);
>>>>>-    return 0;
>>>>>+    return ret;
>>>>> }
>>>>> /**
>>>>
>>>>
>>
Umesh Nerlige Ramappa Sept. 14, 2022, 11:36 p.m. UTC | #8
On Fri, Sep 09, 2022 at 04:47:36PM -0700, Dixit, Ashutosh wrote:
>On Tue, 23 Aug 2022 13:41:37 -0700, Umesh Nerlige Ramappa wrote:
>>
>
>Hi Umesh,
>
>> With GuC mode of submission, GuC is in control of defining the context id field
>> that is part of the OA reports. To filter reports, UMD and KMD must know what sw
>> context id was chosen by GuC. There is not interface between KMD and GuC to
>> determine this, so read the upper-dword of EXECLIST_STATUS to filter/squash OA
>> reports for the specific context.
>
>Do you think it is worth defining an interface for GuC to return the sw
>ctx_id it will be using for a ctx, say at ctx registration time?
>
>The scheme implemented in this patch to read the ctx_id is certainly very
>clever, at least to me. But as Lionel was saying is it a agreed upon
>immutable interface? If it is, we can go with this patch.
>
>(Though even then we will need to maintain this code even if in the future
>GuC FW is changed to return the ctx_id in order to preserve backwards
>comptability with previous GuC versions. So maybe better to have a real
>interface between GuC and KMD earlier rather than later?).

Agree, ideally this should be obtained from GuC and properly 
synchronized with kmd. OR GuC should provide a way to pin the context id 
for such cases so that the id is not stolen/unpinned. Anyways, we need 
to follow this up as a JIRA.

I may drop this patch and add a message that OA buffer filtering may be 
broken if a gem context is passed.

>
>Also a couple of general comments below.
>
>>
>> Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
>> ---
>>  drivers/gpu/drm/i915/gt/intel_lrc.h |   2 +
>>  drivers/gpu/drm/i915/i915_perf.c    | 141 ++++++++++++++++++++++++----
>>  2 files changed, 124 insertions(+), 19 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.h b/drivers/gpu/drm/i915/gt/intel_lrc.h
>> index a390f0813c8b..7111bae759f3 100644
>> --- a/drivers/gpu/drm/i915/gt/intel_lrc.h
>> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.h
>> @@ -110,6 +110,8 @@ enum {
>>  #define XEHP_SW_CTX_ID_WIDTH			16
>>  #define XEHP_SW_COUNTER_SHIFT			58
>>  #define XEHP_SW_COUNTER_WIDTH			6
>> +#define GEN12_GUC_SW_CTX_ID_SHIFT		39
>> +#define GEN12_GUC_SW_CTX_ID_WIDTH		16
>>
>>  static inline void lrc_runtime_start(struct intel_context *ce)
>>  {
>> diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
>> index f3c23fe9ad9c..735244a3aedd 100644
>> --- a/drivers/gpu/drm/i915/i915_perf.c
>> +++ b/drivers/gpu/drm/i915/i915_perf.c
>> @@ -1233,6 +1233,125 @@ static struct intel_context *oa_pin_context(struct i915_perf_stream *stream)
>>	return stream->pinned_ctx;
>>  }
>>
>> +static int
>> +__store_reg_to_mem(struct i915_request *rq, i915_reg_t reg, u32 ggtt_offset)
>> +{
>> +	u32 *cs, cmd;
>> +
>> +	cmd = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT;
>> +	if (GRAPHICS_VER(rq->engine->i915) >= 8)
>> +		cmd++;
>> +
>> +	cs = intel_ring_begin(rq, 4);
>> +	if (IS_ERR(cs))
>> +		return PTR_ERR(cs);
>> +
>> +	*cs++ = cmd;
>> +	*cs++ = i915_mmio_reg_offset(reg);
>> +	*cs++ = ggtt_offset;
>> +	*cs++ = 0;
>> +
>> +	intel_ring_advance(rq, cs);
>> +
>> +	return 0;
>> +}
>> +
>> +static int
>> +__read_reg(struct intel_context *ce, i915_reg_t reg, u32 ggtt_offset)
>> +{
>> +	struct i915_request *rq;
>> +	int err;
>> +
>> +	rq = i915_request_create(ce);
>> +	if (IS_ERR(rq))
>> +		return PTR_ERR(rq);
>> +
>> +	i915_request_get(rq);
>> +
>> +	err = __store_reg_to_mem(rq, reg, ggtt_offset);
>> +
>> +	i915_request_add(rq);
>> +	if (!err && i915_request_wait(rq, 0, HZ / 2) < 0)
>> +		err = -ETIME;
>> +
>> +	i915_request_put(rq);
>> +
>> +	return err;
>> +}
>> +
>> +static int
>> +gen12_guc_sw_ctx_id(struct intel_context *ce, u32 *ctx_id)
>> +{
>> +	struct i915_vma *scratch;
>> +	u32 *val;
>> +	int err;
>> +
>> +	scratch = __vm_create_scratch_for_read_pinned(&ce->engine->gt->ggtt->vm, 4);
>> +	if (IS_ERR(scratch))
>> +		return PTR_ERR(scratch);
>> +
>> +	err = i915_vma_sync(scratch);
>> +	if (err)
>> +		goto err_scratch;
>> +
>> +	err = __read_reg(ce, RING_EXECLIST_STATUS_HI(ce->engine->mmio_base),
>> +			 i915_ggtt_offset(scratch));
>
>Actually the RING_EXECLIST_STATUS_HI is MMIO so can be read using say
>ENGINE_READ/intel_uncore_read. The only issue is how to read it when this
>ctx is scheduled which is cleverly solved by the scheme above. But I am not
>sure if there is any other simpler way to do it.
>
>> +	if (err)
>> +		goto err_scratch;
>> +
>> +	val = i915_gem_object_pin_map_unlocked(scratch->obj, I915_MAP_WB);
>> +	if (IS_ERR(val)) {
>> +		err = PTR_ERR(val);
>> +		goto err_scratch;
>> +	}
>> +
>> +	*ctx_id = *val;
>> +	i915_gem_object_unpin_map(scratch->obj);
>> +
>> +err_scratch:
>> +	i915_vma_unpin_and_release(&scratch, 0);
>> +	return err;
>> +}
>> +
>> +/*
>> + * For execlist mode of submission, pick an unused context id
>> + * 0 - (NUM_CONTEXT_TAG -1) are used by other contexts
>> + * XXX_MAX_CONTEXT_HW_ID is used by idle context
>> + *
>> + * For GuC mode of submission read context id from the upper dword of the
>> + * EXECLIST_STATUS register.
>> + */
>> +static int gen12_get_render_context_id(struct i915_perf_stream *stream)
>> +{
>> +	u32 ctx_id, mask;
>> +	int ret;
>> +
>> +	if (intel_engine_uses_guc(stream->engine)) {
>> +		ret = gen12_guc_sw_ctx_id(stream->pinned_ctx, &ctx_id);
>> +		if (ret)
>> +			return ret;
>> +
>> +		mask = ((1U << GEN12_GUC_SW_CTX_ID_WIDTH) - 1) <<
>> +			(GEN12_GUC_SW_CTX_ID_SHIFT - 32);
>> +	} else if (GRAPHICS_VER_FULL(stream->engine->i915) >= IP_VER(12, 50)) {
>> +		ctx_id = (XEHP_MAX_CONTEXT_HW_ID - 1) <<
>> +			(XEHP_SW_CTX_ID_SHIFT - 32);
>> +
>> +		mask = ((1U << XEHP_SW_CTX_ID_WIDTH) - 1) <<
>> +			(XEHP_SW_CTX_ID_SHIFT - 32);
>> +	} else {
>> +		ctx_id = (GEN12_MAX_CONTEXT_HW_ID - 1) <<
>> +			 (GEN11_SW_CTX_ID_SHIFT - 32);
>> +
>> +		mask = ((1U << GEN11_SW_CTX_ID_WIDTH) - 1) <<
>> +			(GEN11_SW_CTX_ID_SHIFT - 32);
>
>Previously I missed that these ctx_id's for non-GuC cases are just
>constants. How does it work in these cases?

In those cases we use a fixed id for the OA use case:

in gen12_get_render_context_id()
stream->specific_ctx_id = ctx_id & mask

in oa_get_render_ctx_id()
ce->tag = stream->specific_ctx_id;

in __execlists_schedule_in()
ce->lrc.ccid = ce->tag;

Thanks,
Umesh

>
>Thanks.
>--
>Ashutosh
Umesh Nerlige Ramappa Sept. 14, 2022, 11:37 p.m. UTC | #9
On Mon, Sep 12, 2022 at 08:08:33PM -0700, Dixit, Ashutosh wrote:
>On Fri, 09 Sep 2022 16:47:36 -0700, Dixit, Ashutosh wrote:
>>
>> On Tue, 23 Aug 2022 13:41:37 -0700, Umesh Nerlige Ramappa wrote:
>> >
>>
>> Hi Umesh,
>>
>> > With GuC mode of submission, GuC is in control of defining the context id field
>> > that is part of the OA reports. To filter reports, UMD and KMD must know what sw
>> > context id was chosen by GuC. There is not interface between KMD and GuC to
>> > determine this, so read the upper-dword of EXECLIST_STATUS to filter/squash OA
>> > reports for the specific context.
>>
>> Do you think it is worth defining an interface for GuC to return the sw
>> ctx_id it will be using for a ctx, say at ctx registration time?
>
>Umesh, I came across these in GuC documentation:
>
>guc_pcv1_context_parameters_set_h2g_data_t::context_id
>guc_pcv2_context_parameters_set_h2g_data_t::context_id
>
>Also in the code we have in prepare_context_registration_info_v70 'ctx_id =
>ce->guc_id.id' which seems to be assigned in new_guc_id. So wondering if
>this is what we need and we already have it?

this id is different from what GuC programs into the lrca.

Thanks,
Umesh
>
>Thanks.
>--
>Ashutosh
Umesh Nerlige Ramappa Sept. 15, 2022, 10:49 p.m. UTC | #10
On Wed, Sep 14, 2022 at 04:13:41PM -0700, Umesh Nerlige Ramappa wrote:
>On Wed, Sep 14, 2022 at 03:26:15PM -0700, Umesh Nerlige Ramappa wrote:
>>On Tue, Sep 06, 2022 at 09:39:33PM +0300, Lionel Landwerlin wrote:
>>>On 06/09/2022 20:39, Umesh Nerlige Ramappa wrote:
>>>>On Tue, Sep 06, 2022 at 05:33:00PM +0300, Lionel Landwerlin wrote:
>>>>>On 23/08/2022 23:41, Umesh Nerlige Ramappa wrote:
>>>>>>With GuC mode of submission, GuC is in control of defining 
>>>>>>the context id field
>>>>>>that is part of the OA reports. To filter reports, UMD and 
>>>>>>KMD must know what sw
>>>>>>context id was chosen by GuC. There is not interface between 
>>>>>>KMD and GuC to
>>>>>>determine this, so read the upper-dword of EXECLIST_STATUS 
>>>>>>to filter/squash OA
>>>>>>reports for the specific context.
>>>>>>
>>>>>>Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
>>>>>
>>>>>
>>>>>I assume you checked with GuC that this doesn't change as the 
>>>>>context is running?
>>>>
>>>>Correct.
>>>>
>>>>>
>>>>>With i915/execlist submission mode, we had to ask i915 to pin 
>>>>>the sw_id/ctx_id.
>>>>>
>>>>
>>>>From GuC perspective, the context id can change once KMD 
>>>>de-registers the context and that will not happen while the 
>>>>context is in use.
>>>>
>>>>Thanks,
>>>>Umesh
>>>
>>>
>>>Thanks Umesh,
>>>
>>>
>>>Maybe I should have been more precise in my question :
>>>
>>>
>>>Can the ID change while the i915-perf stream is opened?
>>>
>>>Because the ID not changing while the context is running makes sense.
>>>
>>>But since the number of available IDs is limited to 2k or 
>>>something on Gfx12, it's possible the GuC has to reuse IDs if too 
>>>many apps want to run during the period of time while i915-perf is 
>>>active and filtering.
>>>
>>
>>available guc ids are 64k with 4k reserved for multi-lrc, so GuC may 
>>have to reuse ids once 60k ids are used up.
>
>Spoke to the GuC team again and if there are a lot of contexts (> 60K) 
>running, there is a possibility of the context id being recycled. In 
>that case, the capture would be broken. I would track this as a 
>separate JIRA and follow up on a solution.
>
>From OA use case perspective, are we interested in monitoring just one 
>hardware context? If we make sure this context is not stolen, are we 
>good?
>

+ John

Based on John's inputs - if a context is pinned, then KMD does not steal 
it's id. It would just look for something else or wait for a context to 
be available (pin count 0 I believe).

Since we pin the context for the duration of the OA use case, we should 
be good here.

Thanks,
Umesh

>Thanks,
>Umesh
>
>>
>>Thanks,
>>Umesh
>>
>>>
>>>-Lionel
>>>
>>>
>>>>
>>>>>
>>>>>If that's not the case then filtering is broken.
>>>>>
>>>>>
>>>>>-Lionel
>>>>>
>>>>>
Dixit, Ashutosh Sept. 20, 2022, 3:22 a.m. UTC | #11
On Thu, 15 Sep 2022 15:49:27 -0700, Umesh Nerlige Ramappa wrote:
>
> On Wed, Sep 14, 2022 at 04:13:41PM -0700, Umesh Nerlige Ramappa wrote:
> > On Wed, Sep 14, 2022 at 03:26:15PM -0700, Umesh Nerlige Ramappa wrote:
> >> On Tue, Sep 06, 2022 at 09:39:33PM +0300, Lionel Landwerlin wrote:
> >>> On 06/09/2022 20:39, Umesh Nerlige Ramappa wrote:
> >>>> On Tue, Sep 06, 2022 at 05:33:00PM +0300, Lionel Landwerlin wrote:
> >>>>> On 23/08/2022 23:41, Umesh Nerlige Ramappa wrote:
> >>>>>> With GuC mode of submission, GuC is in control of defining the
> >>>>>> context id field
> >>>>>> that is part of the OA reports. To filter reports, UMD and KMD must
> >>>>>> know what sw
> >>>>>> context id was chosen by GuC. There is not interface between KMD and
> >>>>>> GuC to
> >>>>>> determine this, so read the upper-dword of EXECLIST_STATUS to
> >>>>>> filter/squash OA
> >>>>>> reports for the specific context.
> >>>>>>
> >>>>>> Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
> >>>>>
> >>>>>
> >>>>> I assume you checked with GuC that this doesn't change as the context
> >>>>> is running?
> >>>>
> >>>> Correct.
> >>>>
> >>>>>
> >>>>> With i915/execlist submission mode, we had to ask i915 to pin the
> >>>>> sw_id/ctx_id.
> >>>>>
> >>>>
> >>>> From GuC perspective, the context id can change once KMD de-registers
> >>>> the context and that will not happen while the context is in use.
> >>>>
> >>>> Thanks,
> >>>> Umesh
> >>>
> >>>
> >>> Thanks Umesh,
> >>>
> >>>
> >>> Maybe I should have been more precise in my question :
> >>>
> >>>
> >>> Can the ID change while the i915-perf stream is opened?
> >>>
> >>> Because the ID not changing while the context is running makes sense.
> >>>
> >>> But since the number of available IDs is limited to 2k or something on
> >>> Gfx12, it's possible the GuC has to reuse IDs if too many apps want to
> >>> run during the period of time while i915-perf is active and filtering.
> >>>
> >>
> >> available guc ids are 64k with 4k reserved for multi-lrc, so GuC may
> >> have to reuse ids once 60k ids are used up.
> >
> > Spoke to the GuC team again and if there are a lot of contexts (> 60K)
> > running, there is a possibility of the context id being recycled. In that
> > case, the capture would be broken. I would track this as a separate JIRA
> > and follow up on a solution.
> >
> > From OA use case perspective, are we interested in monitoring just one
> > hardware context? If we make sure this context is not stolen, are we
> > good?
> >
>
> + John
>
> Based on John's inputs - if a context is pinned, then KMD does not steal
> it's id. It would just look for something else or wait for a context to be
> available (pin count 0 I believe).
>
> Since we pin the context for the duration of the OA use case, we should be
> good here.

Since this appears to be true I am thinking of okay'ing this patch rather
than define a new interface with GuC for this. Let me know if there are any
objections about this.

Thanks.
--
Ashutosh

> >>> -Lionel
> >>>
> >>>
> >>>>
> >>>>>
> >>>>> If that's not the case then filtering is broken.
> >>>>>
> >>>>>
> >>>>> -Lionel
> >>>>>
> >>>>>
Dixit, Ashutosh Sept. 22, 2022, 3:44 a.m. UTC | #12
On Fri, 09 Sep 2022 16:47:36 -0700, Dixit, Ashutosh wrote:
>
> On Tue, 23 Aug 2022 13:41:37 -0700, Umesh Nerlige Ramappa wrote:
> >
> > +/*
> > + * For execlist mode of submission, pick an unused context id
> > + * 0 - (NUM_CONTEXT_TAG -1) are used by other contexts
> > + * XXX_MAX_CONTEXT_HW_ID is used by idle context
> > + *
> > + * For GuC mode of submission read context id from the upper dword of the
> > + * EXECLIST_STATUS register.
> > + */
> > +static int gen12_get_render_context_id(struct i915_perf_stream *stream)
> > +{
> > +	u32 ctx_id, mask;
> > +	int ret;
> > +
> > +	if (intel_engine_uses_guc(stream->engine)) {
> > +		ret = gen12_guc_sw_ctx_id(stream->pinned_ctx, &ctx_id);
> > +		if (ret)
> > +			return ret;
> > +
> > +		mask = ((1U << GEN12_GUC_SW_CTX_ID_WIDTH) - 1) <<
> > +			(GEN12_GUC_SW_CTX_ID_SHIFT - 32);
> > +	} else if (GRAPHICS_VER_FULL(stream->engine->i915) >= IP_VER(12, 50)) {
> > +		ctx_id = (XEHP_MAX_CONTEXT_HW_ID - 1) <<
> > +			(XEHP_SW_CTX_ID_SHIFT - 32);
> > +
> > +		mask = ((1U << XEHP_SW_CTX_ID_WIDTH) - 1) <<
> > +			(XEHP_SW_CTX_ID_SHIFT - 32);
> > +	} else {
> > +		ctx_id = (GEN12_MAX_CONTEXT_HW_ID - 1) <<
> > +			 (GEN11_SW_CTX_ID_SHIFT - 32);
> > +
> > +		mask = ((1U << GEN11_SW_CTX_ID_WIDTH) - 1) <<
> > +			(GEN11_SW_CTX_ID_SHIFT - 32);
>
> Previously I missed that these ctx_id's for non-GuC cases are just
> constants. How does it work in these cases?

For the record, offline reply from Umesh for this question:

Looks like the SW context id is set to a unique value by the KMD for
execlist mode here - __execlists_schedule_in() as ccid. Later it is written
to the execlist port here (as lrc.desc) - execlists_submit_ports(). It's
just a unique value that the kmd determines. For OA we are setting a
ce->tag when OA use case is active and it used by the
__execlists_schedule_in().

Related commit from Chris - 2935ed5339c49

I think the reason why OA is setting it is because this value is not
assigned until __execlists_schedule_in() is called. For OA context, this
may happen much later. The code that Chris has added, just assigns a value
in OA and then uses it later in the __execlists_schedule_in() path.
Dixit, Ashutosh Sept. 22, 2022, 3:49 a.m. UTC | #13
On Wed, 21 Sep 2022 20:44:57 -0700, Dixit, Ashutosh wrote:
>
> On Fri, 09 Sep 2022 16:47:36 -0700, Dixit, Ashutosh wrote:
> >
> > On Tue, 23 Aug 2022 13:41:37 -0700, Umesh Nerlige Ramappa wrote:
> > >
> > > +/*
> > > + * For execlist mode of submission, pick an unused context id
> > > + * 0 - (NUM_CONTEXT_TAG -1) are used by other contexts
> > > + * XXX_MAX_CONTEXT_HW_ID is used by idle context
> > > + *
> > > + * For GuC mode of submission read context id from the upper dword of the
> > > + * EXECLIST_STATUS register.
> > > + */
> > > +static int gen12_get_render_context_id(struct i915_perf_stream *stream)
> > > +{
> > > +	u32 ctx_id, mask;
> > > +	int ret;
> > > +
> > > +	if (intel_engine_uses_guc(stream->engine)) {
> > > +		ret = gen12_guc_sw_ctx_id(stream->pinned_ctx, &ctx_id);
> > > +		if (ret)
> > > +			return ret;
> > > +
> > > +		mask = ((1U << GEN12_GUC_SW_CTX_ID_WIDTH) - 1) <<
> > > +			(GEN12_GUC_SW_CTX_ID_SHIFT - 32);
> > > +	} else if (GRAPHICS_VER_FULL(stream->engine->i915) >= IP_VER(12, 50)) {
> > > +		ctx_id = (XEHP_MAX_CONTEXT_HW_ID - 1) <<
> > > +			(XEHP_SW_CTX_ID_SHIFT - 32);
> > > +
> > > +		mask = ((1U << XEHP_SW_CTX_ID_WIDTH) - 1) <<
> > > +			(XEHP_SW_CTX_ID_SHIFT - 32);
> > > +	} else {
> > > +		ctx_id = (GEN12_MAX_CONTEXT_HW_ID - 1) <<
> > > +			 (GEN11_SW_CTX_ID_SHIFT - 32);
> > > +
> > > +		mask = ((1U << GEN11_SW_CTX_ID_WIDTH) - 1) <<
> > > +			(GEN11_SW_CTX_ID_SHIFT - 32);
> >
> > Previously I missed that these ctx_id's for non-GuC cases are just
> > constants. How does it work in these cases?
>
> For the record, offline reply from Umesh for this question:
>
> Looks like the SW context id is set to a unique value by the KMD for
> execlist mode here - __execlists_schedule_in() as ccid. Later it is written
> to the execlist port here (as lrc.desc) - execlists_submit_ports(). It's
> just a unique value that the kmd determines. For OA we are setting a
> ce->tag when OA use case is active and it used by the
> __execlists_schedule_in().
>
> Related commit from Chris - 2935ed5339c49
>
> I think the reason why OA is setting it is because this value is not
> assigned until __execlists_schedule_in() is called. For OA context, this
> may happen much later. The code that Chris has added, just assigns a value
> in OA and then uses it later in the __execlists_schedule_in() path.

I would still think this should not be a constant value but something which
depends on the context or the context id. Anyway since this is a
pre-existing issue not introducd in this patch, I will disregard this and
continue reviewing this patch.

Thanks.
--
Ashutosh
Dixit, Ashutosh Sept. 22, 2022, 3:51 a.m. UTC | #14
On Mon, 19 Sep 2022 20:22:40 -0700, Dixit, Ashutosh wrote:
>
> On Thu, 15 Sep 2022 15:49:27 -0700, Umesh Nerlige Ramappa wrote:
> >
> > On Wed, Sep 14, 2022 at 04:13:41PM -0700, Umesh Nerlige Ramappa wrote:
> > > On Wed, Sep 14, 2022 at 03:26:15PM -0700, Umesh Nerlige Ramappa wrote:
> > >> On Tue, Sep 06, 2022 at 09:39:33PM +0300, Lionel Landwerlin wrote:
> > >>> On 06/09/2022 20:39, Umesh Nerlige Ramappa wrote:
> > >>>> On Tue, Sep 06, 2022 at 05:33:00PM +0300, Lionel Landwerlin wrote:
> > >>>>> On 23/08/2022 23:41, Umesh Nerlige Ramappa wrote:
> > >>>>>> With GuC mode of submission, GuC is in control of defining the
> > >>>>>> context id field
> > >>>>>> that is part of the OA reports. To filter reports, UMD and KMD must
> > >>>>>> know what sw
> > >>>>>> context id was chosen by GuC. There is not interface between KMD and
> > >>>>>> GuC to
> > >>>>>> determine this, so read the upper-dword of EXECLIST_STATUS to
> > >>>>>> filter/squash OA
> > >>>>>> reports for the specific context.
> > >>>>>>
> > >>>>>> Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
> > >>>>>
> > >>>>>
> > >>>>> I assume you checked with GuC that this doesn't change as the context
> > >>>>> is running?
> > >>>>
> > >>>> Correct.
> > >>>>
> > >>>>>
> > >>>>> With i915/execlist submission mode, we had to ask i915 to pin the
> > >>>>> sw_id/ctx_id.
> > >>>>>
> > >>>>
> > >>>> From GuC perspective, the context id can change once KMD de-registers
> > >>>> the context and that will not happen while the context is in use.
> > >>>>
> > >>>> Thanks,
> > >>>> Umesh
> > >>>
> > >>>
> > >>> Thanks Umesh,
> > >>>
> > >>>
> > >>> Maybe I should have been more precise in my question :
> > >>>
> > >>>
> > >>> Can the ID change while the i915-perf stream is opened?
> > >>>
> > >>> Because the ID not changing while the context is running makes sense.
> > >>>
> > >>> But since the number of available IDs is limited to 2k or something on
> > >>> Gfx12, it's possible the GuC has to reuse IDs if too many apps want to
> > >>> run during the period of time while i915-perf is active and filtering.
> > >>>
> > >>
> > >> available guc ids are 64k with 4k reserved for multi-lrc, so GuC may
> > >> have to reuse ids once 60k ids are used up.
> > >
> > > Spoke to the GuC team again and if there are a lot of contexts (> 60K)
> > > running, there is a possibility of the context id being recycled. In that
> > > case, the capture would be broken. I would track this as a separate JIRA
> > > and follow up on a solution.
> > >
> > > From OA use case perspective, are we interested in monitoring just one
> > > hardware context? If we make sure this context is not stolen, are we
> > > good?
> > >
> >
> > + John
> >
> > Based on John's inputs - if a context is pinned, then KMD does not steal
> > it's id. It would just look for something else or wait for a context to be
> > available (pin count 0 I believe).
> >
> > Since we pin the context for the duration of the OA use case, we should be
> > good here.
>
> Since this appears to be true I am thinking of okay'ing this patch rather
> than define a new interface with GuC for this. Let me know if there are any
> objections about this.

With the above comments/assumptions this is:

Reviewed-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
Lionel Landwerlin Sept. 22, 2022, 11:05 a.m. UTC | #15
On 15/09/2022 02:13, Umesh Nerlige Ramappa wrote:
> On Wed, Sep 14, 2022 at 03:26:15PM -0700, Umesh Nerlige Ramappa wrote:
>> On Tue, Sep 06, 2022 at 09:39:33PM +0300, Lionel Landwerlin wrote:
>>> On 06/09/2022 20:39, Umesh Nerlige Ramappa wrote:
>>>> On Tue, Sep 06, 2022 at 05:33:00PM +0300, Lionel Landwerlin wrote:
>>>>> On 23/08/2022 23:41, Umesh Nerlige Ramappa wrote:
>>>>>> With GuC mode of submission, GuC is in control of defining the 
>>>>>> context id field
>>>>>> that is part of the OA reports. To filter reports, UMD and KMD 
>>>>>> must know what sw
>>>>>> context id was chosen by GuC. There is not interface between KMD 
>>>>>> and GuC to
>>>>>> determine this, so read the upper-dword of EXECLIST_STATUS to 
>>>>>> filter/squash OA
>>>>>> reports for the specific context.
>>>>>>
>>>>>> Signed-off-by: Umesh Nerlige Ramappa 
>>>>>> <umesh.nerlige.ramappa@intel.com>
>>>>>
>>>>>
>>>>> I assume you checked with GuC that this doesn't change as the 
>>>>> context is running?
>>>>
>>>> Correct.
>>>>
>>>>>
>>>>> With i915/execlist submission mode, we had to ask i915 to pin the 
>>>>> sw_id/ctx_id.
>>>>>
>>>>
>>>> From GuC perspective, the context id can change once KMD 
>>>> de-registers the context and that will not happen while the context 
>>>> is in use.
>>>>
>>>> Thanks,
>>>> Umesh
>>>
>>>
>>> Thanks Umesh,
>>>
>>>
>>> Maybe I should have been more precise in my question :
>>>
>>>
>>> Can the ID change while the i915-perf stream is opened?
>>>
>>> Because the ID not changing while the context is running makes sense.
>>>
>>> But since the number of available IDs is limited to 2k or something 
>>> on Gfx12, it's possible the GuC has to reuse IDs if too many apps 
>>> want to run during the period of time while i915-perf is active and 
>>> filtering.
>>>
>>
>> available guc ids are 64k with 4k reserved for multi-lrc, so GuC may 
>> have to reuse ids once 60k ids are used up.
>
> Spoke to the GuC team again and if there are a lot of contexts (> 60K) 
> running, there is a possibility of the context id being recycled. In 
> that case, the capture would be broken. I would track this as a 
> separate JIRA and follow up on a solution.
>
> From OA use case perspective, are we interested in monitoring just one 
> hardware context? If we make sure this context is not stolen, are we 
> good?
> Thanks,
> Umesh


Yep, we only care about that one ID not changing.


Thanks,

-Lionel


>
>>
>> Thanks,
>> Umesh
>>
>>>
>>> -Lionel
>>>
>>>
>>>>
>>>>>
>>>>> If that's not the case then filtering is broken.
>>>>>
>>>>>
>>>>> -Lionel
>>>>>
>>>>>
>>>>>> ---
>>>>>>  drivers/gpu/drm/i915/gt/intel_lrc.h |   2 +
>>>>>>  drivers/gpu/drm/i915/i915_perf.c    | 141 
>>>>>> ++++++++++++++++++++++++----
>>>>>>  2 files changed, 124 insertions(+), 19 deletions(-)
>>>>>>
>>>>>> diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.h 
>>>>>> b/drivers/gpu/drm/i915/gt/intel_lrc.h
>>>>>> index a390f0813c8b..7111bae759f3 100644
>>>>>> --- a/drivers/gpu/drm/i915/gt/intel_lrc.h
>>>>>> +++ b/drivers/gpu/drm/i915/gt/intel_lrc.h
>>>>>> @@ -110,6 +110,8 @@ enum {
>>>>>>  #define XEHP_SW_CTX_ID_WIDTH            16
>>>>>>  #define XEHP_SW_COUNTER_SHIFT            58
>>>>>>  #define XEHP_SW_COUNTER_WIDTH            6
>>>>>> +#define GEN12_GUC_SW_CTX_ID_SHIFT        39
>>>>>> +#define GEN12_GUC_SW_CTX_ID_WIDTH        16
>>>>>>  static inline void lrc_runtime_start(struct intel_context *ce)
>>>>>>  {
>>>>>> diff --git a/drivers/gpu/drm/i915/i915_perf.c 
>>>>>> b/drivers/gpu/drm/i915/i915_perf.c
>>>>>> index f3c23fe9ad9c..735244a3aedd 100644
>>>>>> --- a/drivers/gpu/drm/i915/i915_perf.c
>>>>>> +++ b/drivers/gpu/drm/i915/i915_perf.c
>>>>>> @@ -1233,6 +1233,125 @@ static struct intel_context 
>>>>>> *oa_pin_context(struct i915_perf_stream *stream)
>>>>>>      return stream->pinned_ctx;
>>>>>>  }
>>>>>> +static int
>>>>>> +__store_reg_to_mem(struct i915_request *rq, i915_reg_t reg, u32 
>>>>>> ggtt_offset)
>>>>>> +{
>>>>>> +    u32 *cs, cmd;
>>>>>> +
>>>>>> +    cmd = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT;
>>>>>> +    if (GRAPHICS_VER(rq->engine->i915) >= 8)
>>>>>> +        cmd++;
>>>>>> +
>>>>>> +    cs = intel_ring_begin(rq, 4);
>>>>>> +    if (IS_ERR(cs))
>>>>>> +        return PTR_ERR(cs);
>>>>>> +
>>>>>> +    *cs++ = cmd;
>>>>>> +    *cs++ = i915_mmio_reg_offset(reg);
>>>>>> +    *cs++ = ggtt_offset;
>>>>>> +    *cs++ = 0;
>>>>>> +
>>>>>> +    intel_ring_advance(rq, cs);
>>>>>> +
>>>>>> +    return 0;
>>>>>> +}
>>>>>> +
>>>>>> +static int
>>>>>> +__read_reg(struct intel_context *ce, i915_reg_t reg, u32 
>>>>>> ggtt_offset)
>>>>>> +{
>>>>>> +    struct i915_request *rq;
>>>>>> +    int err;
>>>>>> +
>>>>>> +    rq = i915_request_create(ce);
>>>>>> +    if (IS_ERR(rq))
>>>>>> +        return PTR_ERR(rq);
>>>>>> +
>>>>>> +    i915_request_get(rq);
>>>>>> +
>>>>>> +    err = __store_reg_to_mem(rq, reg, ggtt_offset);
>>>>>> +
>>>>>> +    i915_request_add(rq);
>>>>>> +    if (!err && i915_request_wait(rq, 0, HZ / 2) < 0)
>>>>>> +        err = -ETIME;
>>>>>> +
>>>>>> +    i915_request_put(rq);
>>>>>> +
>>>>>> +    return err;
>>>>>> +}
>>>>>> +
>>>>>> +static int
>>>>>> +gen12_guc_sw_ctx_id(struct intel_context *ce, u32 *ctx_id)
>>>>>> +{
>>>>>> +    struct i915_vma *scratch;
>>>>>> +    u32 *val;
>>>>>> +    int err;
>>>>>> +
>>>>>> +    scratch = 
>>>>>> __vm_create_scratch_for_read_pinned(&ce->engine->gt->ggtt->vm, 4);
>>>>>> +    if (IS_ERR(scratch))
>>>>>> +        return PTR_ERR(scratch);
>>>>>> +
>>>>>> +    err = i915_vma_sync(scratch);
>>>>>> +    if (err)
>>>>>> +        goto err_scratch;
>>>>>> +
>>>>>> +    err = __read_reg(ce, 
>>>>>> RING_EXECLIST_STATUS_HI(ce->engine->mmio_base),
>>>>>> +             i915_ggtt_offset(scratch));
>>>>>> +    if (err)
>>>>>> +        goto err_scratch;
>>>>>> +
>>>>>> +    val = i915_gem_object_pin_map_unlocked(scratch->obj, 
>>>>>> I915_MAP_WB);
>>>>>> +    if (IS_ERR(val)) {
>>>>>> +        err = PTR_ERR(val);
>>>>>> +        goto err_scratch;
>>>>>> +    }
>>>>>> +
>>>>>> +    *ctx_id = *val;
>>>>>> +    i915_gem_object_unpin_map(scratch->obj);
>>>>>> +
>>>>>> +err_scratch:
>>>>>> +    i915_vma_unpin_and_release(&scratch, 0);
>>>>>> +    return err;
>>>>>> +}
>>>>>> +
>>>>>> +/*
>>>>>> + * For execlist mode of submission, pick an unused context id
>>>>>> + * 0 - (NUM_CONTEXT_TAG -1) are used by other contexts
>>>>>> + * XXX_MAX_CONTEXT_HW_ID is used by idle context
>>>>>> + *
>>>>>> + * For GuC mode of submission read context id from the upper 
>>>>>> dword of the
>>>>>> + * EXECLIST_STATUS register.
>>>>>> + */
>>>>>> +static int gen12_get_render_context_id(struct i915_perf_stream 
>>>>>> *stream)
>>>>>> +{
>>>>>> +    u32 ctx_id, mask;
>>>>>> +    int ret;
>>>>>> +
>>>>>> +    if (intel_engine_uses_guc(stream->engine)) {
>>>>>> +        ret = gen12_guc_sw_ctx_id(stream->pinned_ctx, &ctx_id);
>>>>>> +        if (ret)
>>>>>> +            return ret;
>>>>>> +
>>>>>> +        mask = ((1U << GEN12_GUC_SW_CTX_ID_WIDTH) - 1) <<
>>>>>> +            (GEN12_GUC_SW_CTX_ID_SHIFT - 32);
>>>>>> +    } else if (GRAPHICS_VER_FULL(stream->engine->i915) >= 
>>>>>> IP_VER(12, 50)) {
>>>>>> +        ctx_id = (XEHP_MAX_CONTEXT_HW_ID - 1) <<
>>>>>> +            (XEHP_SW_CTX_ID_SHIFT - 32);
>>>>>> +
>>>>>> +        mask = ((1U << XEHP_SW_CTX_ID_WIDTH) - 1) <<
>>>>>> +            (XEHP_SW_CTX_ID_SHIFT - 32);
>>>>>> +    } else {
>>>>>> +        ctx_id = (GEN12_MAX_CONTEXT_HW_ID - 1) <<
>>>>>> +             (GEN11_SW_CTX_ID_SHIFT - 32);
>>>>>> +
>>>>>> +        mask = ((1U << GEN11_SW_CTX_ID_WIDTH) - 1) <<
>>>>>> +            (GEN11_SW_CTX_ID_SHIFT - 32);
>>>>>> +    }
>>>>>> +    stream->specific_ctx_id = ctx_id & mask;
>>>>>> +    stream->specific_ctx_id_mask = mask;
>>>>>> +
>>>>>> +    return 0;
>>>>>> +}
>>>>>> +
>>>>>>  /**
>>>>>>   * oa_get_render_ctx_id - determine and hold ctx hw id
>>>>>>   * @stream: An i915-perf stream opened for OA metrics
>>>>>> @@ -1246,6 +1365,7 @@ static struct intel_context 
>>>>>> *oa_pin_context(struct i915_perf_stream *stream)
>>>>>>  static int oa_get_render_ctx_id(struct i915_perf_stream *stream)
>>>>>>  {
>>>>>>      struct intel_context *ce;
>>>>>> +    int ret = 0;
>>>>>>      ce = oa_pin_context(stream);
>>>>>>      if (IS_ERR(ce))
>>>>>> @@ -1292,24 +1412,7 @@ static int oa_get_render_ctx_id(struct 
>>>>>> i915_perf_stream *stream)
>>>>>>      case 11:
>>>>>>      case 12:
>>>>>> -        if (GRAPHICS_VER_FULL(ce->engine->i915) >= IP_VER(12, 
>>>>>> 50)) {
>>>>>> -            stream->specific_ctx_id_mask =
>>>>>> -                ((1U << XEHP_SW_CTX_ID_WIDTH) - 1) <<
>>>>>> -                (XEHP_SW_CTX_ID_SHIFT - 32);
>>>>>> -            stream->specific_ctx_id =
>>>>>> -                (XEHP_MAX_CONTEXT_HW_ID - 1) <<
>>>>>> -                (XEHP_SW_CTX_ID_SHIFT - 32);
>>>>>> -        } else {
>>>>>> -            stream->specific_ctx_id_mask =
>>>>>> -                ((1U << GEN11_SW_CTX_ID_WIDTH) - 1) << 
>>>>>> (GEN11_SW_CTX_ID_SHIFT - 32);
>>>>>> -            /*
>>>>>> -             * Pick an unused context id
>>>>>> -             * 0 - BITS_PER_LONG are used by other contexts
>>>>>> -             * GEN12_MAX_CONTEXT_HW_ID (0x7ff) is used by idle 
>>>>>> context
>>>>>> -             */
>>>>>> -            stream->specific_ctx_id =
>>>>>> -                (GEN12_MAX_CONTEXT_HW_ID - 1) << 
>>>>>> (GEN11_SW_CTX_ID_SHIFT - 32);
>>>>>> -        }
>>>>>> +        ret = gen12_get_render_context_id(stream);
>>>>>>          break;
>>>>>>      default:
>>>>>> @@ -1323,7 +1426,7 @@ static int oa_get_render_ctx_id(struct 
>>>>>> i915_perf_stream *stream)
>>>>>>          stream->specific_ctx_id,
>>>>>>          stream->specific_ctx_id_mask);
>>>>>> -    return 0;
>>>>>> +    return ret;
>>>>>>  }
>>>>>>  /**
>>>>>
>>>>>
>>>
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.h b/drivers/gpu/drm/i915/gt/intel_lrc.h
index a390f0813c8b..7111bae759f3 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.h
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.h
@@ -110,6 +110,8 @@  enum {
 #define XEHP_SW_CTX_ID_WIDTH			16
 #define XEHP_SW_COUNTER_SHIFT			58
 #define XEHP_SW_COUNTER_WIDTH			6
+#define GEN12_GUC_SW_CTX_ID_SHIFT		39
+#define GEN12_GUC_SW_CTX_ID_WIDTH		16
 
 static inline void lrc_runtime_start(struct intel_context *ce)
 {
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index f3c23fe9ad9c..735244a3aedd 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -1233,6 +1233,125 @@  static struct intel_context *oa_pin_context(struct i915_perf_stream *stream)
 	return stream->pinned_ctx;
 }
 
+static int
+__store_reg_to_mem(struct i915_request *rq, i915_reg_t reg, u32 ggtt_offset)
+{
+	u32 *cs, cmd;
+
+	cmd = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT;
+	if (GRAPHICS_VER(rq->engine->i915) >= 8)
+		cmd++;
+
+	cs = intel_ring_begin(rq, 4);
+	if (IS_ERR(cs))
+		return PTR_ERR(cs);
+
+	*cs++ = cmd;
+	*cs++ = i915_mmio_reg_offset(reg);
+	*cs++ = ggtt_offset;
+	*cs++ = 0;
+
+	intel_ring_advance(rq, cs);
+
+	return 0;
+}
+
+static int
+__read_reg(struct intel_context *ce, i915_reg_t reg, u32 ggtt_offset)
+{
+	struct i915_request *rq;
+	int err;
+
+	rq = i915_request_create(ce);
+	if (IS_ERR(rq))
+		return PTR_ERR(rq);
+
+	i915_request_get(rq);
+
+	err = __store_reg_to_mem(rq, reg, ggtt_offset);
+
+	i915_request_add(rq);
+	if (!err && i915_request_wait(rq, 0, HZ / 2) < 0)
+		err = -ETIME;
+
+	i915_request_put(rq);
+
+	return err;
+}
+
+static int
+gen12_guc_sw_ctx_id(struct intel_context *ce, u32 *ctx_id)
+{
+	struct i915_vma *scratch;
+	u32 *val;
+	int err;
+
+	scratch = __vm_create_scratch_for_read_pinned(&ce->engine->gt->ggtt->vm, 4);
+	if (IS_ERR(scratch))
+		return PTR_ERR(scratch);
+
+	err = i915_vma_sync(scratch);
+	if (err)
+		goto err_scratch;
+
+	err = __read_reg(ce, RING_EXECLIST_STATUS_HI(ce->engine->mmio_base),
+			 i915_ggtt_offset(scratch));
+	if (err)
+		goto err_scratch;
+
+	val = i915_gem_object_pin_map_unlocked(scratch->obj, I915_MAP_WB);
+	if (IS_ERR(val)) {
+		err = PTR_ERR(val);
+		goto err_scratch;
+	}
+
+	*ctx_id = *val;
+	i915_gem_object_unpin_map(scratch->obj);
+
+err_scratch:
+	i915_vma_unpin_and_release(&scratch, 0);
+	return err;
+}
+
+/*
+ * For execlist mode of submission, pick an unused context id
+ * 0 - (NUM_CONTEXT_TAG -1) are used by other contexts
+ * XXX_MAX_CONTEXT_HW_ID is used by idle context
+ *
+ * For GuC mode of submission read context id from the upper dword of the
+ * EXECLIST_STATUS register.
+ */
+static int gen12_get_render_context_id(struct i915_perf_stream *stream)
+{
+	u32 ctx_id, mask;
+	int ret;
+
+	if (intel_engine_uses_guc(stream->engine)) {
+		ret = gen12_guc_sw_ctx_id(stream->pinned_ctx, &ctx_id);
+		if (ret)
+			return ret;
+
+		mask = ((1U << GEN12_GUC_SW_CTX_ID_WIDTH) - 1) <<
+			(GEN12_GUC_SW_CTX_ID_SHIFT - 32);
+	} else if (GRAPHICS_VER_FULL(stream->engine->i915) >= IP_VER(12, 50)) {
+		ctx_id = (XEHP_MAX_CONTEXT_HW_ID - 1) <<
+			(XEHP_SW_CTX_ID_SHIFT - 32);
+
+		mask = ((1U << XEHP_SW_CTX_ID_WIDTH) - 1) <<
+			(XEHP_SW_CTX_ID_SHIFT - 32);
+	} else {
+		ctx_id = (GEN12_MAX_CONTEXT_HW_ID - 1) <<
+			 (GEN11_SW_CTX_ID_SHIFT - 32);
+
+		mask = ((1U << GEN11_SW_CTX_ID_WIDTH) - 1) <<
+			(GEN11_SW_CTX_ID_SHIFT - 32);
+	}
+	stream->specific_ctx_id = ctx_id & mask;
+	stream->specific_ctx_id_mask = mask;
+
+	return 0;
+}
+
 /**
  * oa_get_render_ctx_id - determine and hold ctx hw id
  * @stream: An i915-perf stream opened for OA metrics
@@ -1246,6 +1365,7 @@  static struct intel_context *oa_pin_context(struct i915_perf_stream *stream)
 static int oa_get_render_ctx_id(struct i915_perf_stream *stream)
 {
 	struct intel_context *ce;
+	int ret = 0;
 
 	ce = oa_pin_context(stream);
 	if (IS_ERR(ce))
@@ -1292,24 +1412,7 @@  static int oa_get_render_ctx_id(struct i915_perf_stream *stream)
 
 	case 11:
 	case 12:
-		if (GRAPHICS_VER_FULL(ce->engine->i915) >= IP_VER(12, 50)) {
-			stream->specific_ctx_id_mask =
-				((1U << XEHP_SW_CTX_ID_WIDTH) - 1) <<
-				(XEHP_SW_CTX_ID_SHIFT - 32);
-			stream->specific_ctx_id =
-				(XEHP_MAX_CONTEXT_HW_ID - 1) <<
-				(XEHP_SW_CTX_ID_SHIFT - 32);
-		} else {
-			stream->specific_ctx_id_mask =
-				((1U << GEN11_SW_CTX_ID_WIDTH) - 1) << (GEN11_SW_CTX_ID_SHIFT - 32);
-			/*
-			 * Pick an unused context id
-			 * 0 - BITS_PER_LONG are used by other contexts
-			 * GEN12_MAX_CONTEXT_HW_ID (0x7ff) is used by idle context
-			 */
-			stream->specific_ctx_id =
-				(GEN12_MAX_CONTEXT_HW_ID - 1) << (GEN11_SW_CTX_ID_SHIFT - 32);
-		}
+		ret = gen12_get_render_context_id(stream);
 		break;
 
 	default:
@@ -1323,7 +1426,7 @@  static int oa_get_render_ctx_id(struct i915_perf_stream *stream)
 		stream->specific_ctx_id,
 		stream->specific_ctx_id_mask);
 
-	return 0;
+	return ret;
 }
 
 /**