diff mbox series

drm/i915/mtl: Add support for 32 bit OAG formats in MTL

Message ID 20221129012146.995006-1-umesh.nerlige.ramappa@intel.com (mailing list archive)
State New, archived
Headers show
Series drm/i915/mtl: Add support for 32 bit OAG formats in MTL | expand

Commit Message

Umesh Nerlige Ramappa Nov. 29, 2022, 1:21 a.m. UTC
As part of OA support for MTL,

- Enable 32 bit OAG formats for MTL.
- 0x200c is repurposed on MTL. Use a separate mux table to verify oa
  configs passed by user.
- Similar to ACM, OA/CS timestamp is mismatched on MTL. Add MTL to the
  WA.
- On MTL, gt->scratch was using stolen lmem. An MI_SRM to stolen lmem is
  hanging. Add a page in noa_wait BO to save/restore GPR registers for
  the noa_wait logic.

Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_gt_types.h |  6 ---
 drivers/gpu/drm/i915/i915_perf.c         | 49 ++++++++++++++++++------
 2 files changed, 38 insertions(+), 17 deletions(-)

Comments

Lucas De Marchi Nov. 29, 2022, 7:15 a.m. UTC | #1
On Mon, Nov 28, 2022 at 05:21:46PM -0800, Umesh Nerlige Ramappa wrote:
>As part of OA support for MTL,
>
>- Enable 32 bit OAG formats for MTL.
>- 0x200c is repurposed on MTL. Use a separate mux table to verify oa
>  configs passed by user.
>- Similar to ACM, OA/CS timestamp is mismatched on MTL. Add MTL to the
>  WA.
>- On MTL, gt->scratch was using stolen lmem. An MI_SRM to stolen lmem is
>  hanging. Add a page in noa_wait BO to save/restore GPR registers for
>  the noa_wait logic.

why are all these changes squashed in a single patch?

>
>Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
>---
> drivers/gpu/drm/i915/gt/intel_gt_types.h |  6 ---
> drivers/gpu/drm/i915/i915_perf.c         | 49 ++++++++++++++++++------
> 2 files changed, 38 insertions(+), 17 deletions(-)
>
>diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h
>index c1d9cd255e06..13dffe0a3d20 100644
>--- a/drivers/gpu/drm/i915/gt/intel_gt_types.h
>+++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h
>@@ -296,12 +296,6 @@ enum intel_gt_scratch_field {
>
> 	/* 8 bytes */
> 	INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA = 256,
>-
>-	/* 6 * 8 bytes */
>-	INTEL_GT_SCRATCH_FIELD_PERF_CS_GPR = 2048,
>-
>-	/* 4 bytes */
>-	INTEL_GT_SCRATCH_FIELD_PERF_PREDICATE_RESULT_1 = 2096,
> };
>
> #endif /* __INTEL_GT_TYPES_H__ */
>diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
>index 00e09bb18b13..a735b9540113 100644
>--- a/drivers/gpu/drm/i915/i915_perf.c
>+++ b/drivers/gpu/drm/i915/i915_perf.c
>@@ -1842,8 +1842,7 @@ static u32 *save_restore_register(struct i915_perf_stream *stream, u32 *cs,
> 	for (d = 0; d < dword_count; d++) {
> 		*cs++ = cmd;
> 		*cs++ = i915_mmio_reg_offset(reg) + 4 * d;
>-		*cs++ = intel_gt_scratch_offset(stream->engine->gt,
>-						offset) + 4 * d;
>+		*cs++ = i915_ggtt_offset(stream->noa_wait) + offset + 4 * d;
> 		*cs++ = 0;
> 	}
>
>@@ -1876,7 +1875,13 @@ static int alloc_noa_wait(struct i915_perf_stream *stream)
> 					  MI_PREDICATE_RESULT_2_ENGINE(base) :
> 					  MI_PREDICATE_RESULT_1(RENDER_RING_BASE);
>
>-	bo = i915_gem_object_create_internal(i915, 4096);
>+	/*
>+	 * gt->scratch was being used to save/restore the GPR registers, but on
>+	 * MTL the scratch uses stolen lmem. An MI_SRM to this memory region
>+	 * causes an engine hang. Instead allocate an additional page here to

humn.. is it because of the pte being wrong?  "stolen lmem" in mtl is
still system memory... do we know why we'd need this change?

Lucas De Marchi


>+	 * save/restore GPR registers
>+	 */
>+	bo = i915_gem_object_create_internal(i915, 8192);
> 	if (IS_ERR(bo)) {
> 		drm_err(&i915->drm,
> 			"Failed to allocate NOA wait batchbuffer\n");
>@@ -1910,14 +1915,19 @@ static int alloc_noa_wait(struct i915_perf_stream *stream)
> 		goto err_unpin;
> 	}
>
>+	stream->noa_wait = vma;
>+
>+#define GPR_SAVE_OFFSET 4096
>+#define PREDICATE_SAVE_OFFSET 4160
>+
> 	/* Save registers. */
> 	for (i = 0; i < N_CS_GPR; i++)
> 		cs = save_restore_register(
> 			stream, cs, true /* save */, CS_GPR(i),
>-			INTEL_GT_SCRATCH_FIELD_PERF_CS_GPR + 8 * i, 2);
>+			GPR_SAVE_OFFSET + 8 * i, 2);
> 	cs = save_restore_register(
> 		stream, cs, true /* save */, mi_predicate_result,
>-		INTEL_GT_SCRATCH_FIELD_PERF_PREDICATE_RESULT_1, 1);
>+		PREDICATE_SAVE_OFFSET, 1);
>
> 	/* First timestamp snapshot location. */
> 	ts0 = cs;
>@@ -2033,10 +2043,10 @@ static int alloc_noa_wait(struct i915_perf_stream *stream)
> 	for (i = 0; i < N_CS_GPR; i++)
> 		cs = save_restore_register(
> 			stream, cs, false /* restore */, CS_GPR(i),
>-			INTEL_GT_SCRATCH_FIELD_PERF_CS_GPR + 8 * i, 2);
>+			GPR_SAVE_OFFSET + 8 * i, 2);
> 	cs = save_restore_register(
> 		stream, cs, false /* restore */, mi_predicate_result,
>-		INTEL_GT_SCRATCH_FIELD_PERF_PREDICATE_RESULT_1, 1);
>+		PREDICATE_SAVE_OFFSET, 1);
>
> 	/* And return to the ring. */
> 	*cs++ = MI_BATCH_BUFFER_END;
>@@ -2046,7 +2056,6 @@ static int alloc_noa_wait(struct i915_perf_stream *stream)
> 	i915_gem_object_flush_map(bo);
> 	__i915_gem_object_release_map(bo);
>
>-	stream->noa_wait = vma;
> 	goto out_ww;
>
> err_unpin:
>@@ -3127,8 +3136,11 @@ get_sseu_config(struct intel_sseu *out_sseu,
>  */
> u32 i915_perf_oa_timestamp_frequency(struct drm_i915_private *i915)
> {
>-	/* Wa_18013179988:dg2 */
>-	if (IS_DG2(i915)) {
>+	/*
>+	 * Wa_18013179988:dg2
>+	 * Wa_14015846243:mtl
>+	 */
>+	if (IS_DG2(i915) || IS_METEORLAKE(i915)) {
> 		intel_wakeref_t wakeref;
> 		u32 reg, shift;
>
>@@ -4306,6 +4318,17 @@ static const struct i915_range gen12_oa_mux_regs[] = {
> 	{}
> };
>
>+/*
>+ * Ref: 14010536224:
>+ * 0x20cc is repurposed on MTL, so use a separate array for MTL.
>+ */
>+static const struct i915_range mtl_oa_mux_regs[] = {
>+	{ .start = 0x0d00, .end = 0x0d04 },	/* RPM_CONFIG[0-1] */
>+	{ .start = 0x0d0c, .end = 0x0d2c },	/* NOA_CONFIG[0-8] */
>+	{ .start = 0x9840, .end = 0x9840 },	/* GDT_CHICKEN_BITS */
>+	{ .start = 0x9884, .end = 0x9888 },	/* NOA_WRITE */
>+};
>+
> static bool gen7_is_valid_b_counter_addr(struct i915_perf *perf, u32 addr)
> {
> 	return reg_in_range_table(addr, gen7_oa_b_counters);
>@@ -4349,7 +4372,10 @@ static bool xehp_is_valid_b_counter_addr(struct i915_perf *perf, u32 addr)
>
> static bool gen12_is_valid_mux_addr(struct i915_perf *perf, u32 addr)
> {
>-	return reg_in_range_table(addr, gen12_oa_mux_regs);
>+	if (IS_METEORLAKE(perf->i915))
>+		return reg_in_range_table(addr, mtl_oa_mux_regs);
>+	else
>+		return reg_in_range_table(addr, gen12_oa_mux_regs);
> }
>
> static u32 mask_reg_value(u32 reg, u32 val)
>@@ -4746,6 +4772,7 @@ static void oa_init_supported_formats(struct i915_perf *perf)
> 		break;
>
> 	case INTEL_DG2:
>+	case INTEL_METEORLAKE:
> 		oa_format_add(perf, I915_OAR_FORMAT_A32u40_A4u32_B8_C8);
> 		oa_format_add(perf, I915_OA_FORMAT_A24u40_A14u32_B8_C8);
> 		break;
>-- 
>2.36.1
>
Umesh Nerlige Ramappa Nov. 29, 2022, 6:40 p.m. UTC | #2
On Mon, Nov 28, 2022 at 11:15:52PM -0800, Lucas De Marchi wrote:
>On Mon, Nov 28, 2022 at 05:21:46PM -0800, Umesh Nerlige Ramappa wrote:
>>As part of OA support for MTL,
>>
>>- Enable 32 bit OAG formats for MTL.
>>- 0x200c is repurposed on MTL. Use a separate mux table to verify oa
>> configs passed by user.
>>- Similar to ACM, OA/CS timestamp is mismatched on MTL. Add MTL to the
>> WA.
>>- On MTL, gt->scratch was using stolen lmem. An MI_SRM to stolen lmem is
>> hanging. Add a page in noa_wait BO to save/restore GPR registers for
>> the noa_wait logic.
>
>why are all these changes squashed in a single patch?

I don't have a good reason other than we had it this way to enable OA 
for MTL. I can break them into separate patches in the next revision.

>
>>
>>Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
>>---
>>drivers/gpu/drm/i915/gt/intel_gt_types.h |  6 ---
>>drivers/gpu/drm/i915/i915_perf.c         | 49 ++++++++++++++++++------
>>2 files changed, 38 insertions(+), 17 deletions(-)
>>
>>diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h
>>index c1d9cd255e06..13dffe0a3d20 100644
>>--- a/drivers/gpu/drm/i915/gt/intel_gt_types.h
>>+++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h
>>@@ -296,12 +296,6 @@ enum intel_gt_scratch_field {
>>
>>	/* 8 bytes */
>>	INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA = 256,
>>-
>>-	/* 6 * 8 bytes */
>>-	INTEL_GT_SCRATCH_FIELD_PERF_CS_GPR = 2048,
>>-
>>-	/* 4 bytes */
>>-	INTEL_GT_SCRATCH_FIELD_PERF_PREDICATE_RESULT_1 = 2096,
>>};
>>
>>#endif /* __INTEL_GT_TYPES_H__ */
>>diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
>>index 00e09bb18b13..a735b9540113 100644
>>--- a/drivers/gpu/drm/i915/i915_perf.c
>>+++ b/drivers/gpu/drm/i915/i915_perf.c
>>@@ -1842,8 +1842,7 @@ static u32 *save_restore_register(struct i915_perf_stream *stream, u32 *cs,
>>	for (d = 0; d < dword_count; d++) {
>>		*cs++ = cmd;
>>		*cs++ = i915_mmio_reg_offset(reg) + 4 * d;
>>-		*cs++ = intel_gt_scratch_offset(stream->engine->gt,
>>-						offset) + 4 * d;
>>+		*cs++ = i915_ggtt_offset(stream->noa_wait) + offset + 4 * d;
>>		*cs++ = 0;
>>	}
>>
>>@@ -1876,7 +1875,13 @@ static int alloc_noa_wait(struct i915_perf_stream *stream)
>>					  MI_PREDICATE_RESULT_2_ENGINE(base) :
>>					  MI_PREDICATE_RESULT_1(RENDER_RING_BASE);
>>
>>-	bo = i915_gem_object_create_internal(i915, 4096);
>>+	/*
>>+	 * gt->scratch was being used to save/restore the GPR registers, but on
>>+	 * MTL the scratch uses stolen lmem. An MI_SRM to this memory region
>>+	 * causes an engine hang. Instead allocate an additional page here to
>
>humn.. is it because of the pte being wrong?"stolen lmem" in mtl is
>still system memory... do we know why we'd need this change?

This memory is accessed by the CS to save/restore some registers and 
that was causing a hang, so I guess mapping this memory to ggtt did not 
work at the time. I am not sure if this was fixed later, but using a 
memory that's owned by OA seems to work fine.

Thanks,
Umesh
>
>Lucas De Marchi
>
>
>>+	 * save/restore GPR registers
>>+	 */
>>+	bo = i915_gem_object_create_internal(i915, 8192);
>>	if (IS_ERR(bo)) {
>>		drm_err(&i915->drm,
>>			"Failed to allocate NOA wait batchbuffer\n");
>>@@ -1910,14 +1915,19 @@ static int alloc_noa_wait(struct i915_perf_stream *stream)
>>		goto err_unpin;
>>	}
>>
>>+	stream->noa_wait = vma;
>>+
>>+#define GPR_SAVE_OFFSET 4096
>>+#define PREDICATE_SAVE_OFFSET 4160
>>+
>>	/* Save registers. */
>>	for (i = 0; i < N_CS_GPR; i++)
>>		cs = save_restore_register(
>>			stream, cs, true /* save */, CS_GPR(i),
>>-			INTEL_GT_SCRATCH_FIELD_PERF_CS_GPR + 8 * i, 2);
>>+			GPR_SAVE_OFFSET + 8 * i, 2);
>>	cs = save_restore_register(
>>		stream, cs, true /* save */, mi_predicate_result,
>>-		INTEL_GT_SCRATCH_FIELD_PERF_PREDICATE_RESULT_1, 1);
>>+		PREDICATE_SAVE_OFFSET, 1);
>>
>>	/* First timestamp snapshot location. */
>>	ts0 = cs;
>>@@ -2033,10 +2043,10 @@ static int alloc_noa_wait(struct i915_perf_stream *stream)
>>	for (i = 0; i < N_CS_GPR; i++)
>>		cs = save_restore_register(
>>			stream, cs, false /* restore */, CS_GPR(i),
>>-			INTEL_GT_SCRATCH_FIELD_PERF_CS_GPR + 8 * i, 2);
>>+			GPR_SAVE_OFFSET + 8 * i, 2);
>>	cs = save_restore_register(
>>		stream, cs, false /* restore */, mi_predicate_result,
>>-		INTEL_GT_SCRATCH_FIELD_PERF_PREDICATE_RESULT_1, 1);
>>+		PREDICATE_SAVE_OFFSET, 1);
>>
>>	/* And return to the ring. */
>>	*cs++ = MI_BATCH_BUFFER_END;
>>@@ -2046,7 +2056,6 @@ static int alloc_noa_wait(struct i915_perf_stream *stream)
>>	i915_gem_object_flush_map(bo);
>>	__i915_gem_object_release_map(bo);
>>
>>-	stream->noa_wait = vma;
>>	goto out_ww;
>>
>>err_unpin:
>>@@ -3127,8 +3136,11 @@ get_sseu_config(struct intel_sseu *out_sseu,
>> */
>>u32 i915_perf_oa_timestamp_frequency(struct drm_i915_private *i915)
>>{
>>-	/* Wa_18013179988:dg2 */
>>-	if (IS_DG2(i915)) {
>>+	/*
>>+	 * Wa_18013179988:dg2
>>+	 * Wa_14015846243:mtl
>>+	 */
>>+	if (IS_DG2(i915) || IS_METEORLAKE(i915)) {
>>		intel_wakeref_t wakeref;
>>		u32 reg, shift;
>>
>>@@ -4306,6 +4318,17 @@ static const struct i915_range gen12_oa_mux_regs[] = {
>>	{}
>>};
>>
>>+/*
>>+ * Ref: 14010536224:
>>+ * 0x20cc is repurposed on MTL, so use a separate array for MTL.
>>+ */
>>+static const struct i915_range mtl_oa_mux_regs[] = {
>>+	{ .start = 0x0d00, .end = 0x0d04 },	/* RPM_CONFIG[0-1] */
>>+	{ .start = 0x0d0c, .end = 0x0d2c },	/* NOA_CONFIG[0-8] */
>>+	{ .start = 0x9840, .end = 0x9840 },	/* GDT_CHICKEN_BITS */
>>+	{ .start = 0x9884, .end = 0x9888 },	/* NOA_WRITE */
>>+};
>>+
>>static bool gen7_is_valid_b_counter_addr(struct i915_perf *perf, u32 addr)
>>{
>>	return reg_in_range_table(addr, gen7_oa_b_counters);
>>@@ -4349,7 +4372,10 @@ static bool xehp_is_valid_b_counter_addr(struct i915_perf *perf, u32 addr)
>>
>>static bool gen12_is_valid_mux_addr(struct i915_perf *perf, u32 addr)
>>{
>>-	return reg_in_range_table(addr, gen12_oa_mux_regs);
>>+	if (IS_METEORLAKE(perf->i915))
>>+		return reg_in_range_table(addr, mtl_oa_mux_regs);
>>+	else
>>+		return reg_in_range_table(addr, gen12_oa_mux_regs);
>>}
>>
>>static u32 mask_reg_value(u32 reg, u32 val)
>>@@ -4746,6 +4772,7 @@ static void oa_init_supported_formats(struct i915_perf *perf)
>>		break;
>>
>>	case INTEL_DG2:
>>+	case INTEL_METEORLAKE:
>>		oa_format_add(perf, I915_OAR_FORMAT_A32u40_A4u32_B8_C8);
>>		oa_format_add(perf, I915_OA_FORMAT_A24u40_A14u32_B8_C8);
>>		break;
>>-- 
>>2.36.1
>>
Dixit, Ashutosh Nov. 30, 2022, 1:17 a.m. UTC | #3
On Mon, 28 Nov 2022 17:21:46 -0800, Umesh Nerlige Ramappa wrote:
>

Hi Umesh,

Overall looks ok, just a couple of questions below. Splitting the patches
would be nice and easier to review, but I'm almost done with this one ;-)

> @@ -1876,7 +1875,13 @@ static int alloc_noa_wait(struct i915_perf_stream *stream)
>					  MI_PREDICATE_RESULT_2_ENGINE(base) :
>					  MI_PREDICATE_RESULT_1(RENDER_RING_BASE);
>
> -	bo = i915_gem_object_create_internal(i915, 4096);
> +	/*
> +	 * gt->scratch was being used to save/restore the GPR registers, but on
> +	 * MTL the scratch uses stolen lmem. An MI_SRM to this memory region
> +	 * causes an engine hang. Instead allocate an additional page here to
> +	 * save/restore GPR registers
> +	 */
> +	bo = i915_gem_object_create_internal(i915, 8192);

Do we know how much space was used in stream->noa_wait originally? Anyway
allocating an additional page is not an issue so ok to skip the question.

> @@ -4746,6 +4772,7 @@ static void oa_init_supported_formats(struct i915_perf *perf)
>		break;
>
>	case INTEL_DG2:
> +	case INTEL_METEORLAKE:
>		oa_format_add(perf, I915_OAR_FORMAT_A32u40_A4u32_B8_C8);
>		oa_format_add(perf, I915_OA_FORMAT_A24u40_A14u32_B8_C8);

Do these formats also need to be added?

		oa_format_add(perf, I915_OAR_FORMAT_A32u40_A4u32_B8_C8);
		oa_format_add(perf, I915_OA_FORMAT_A24u40_A14u32_B8_C8);
		oa_format_add(perf, I915_OAR_FORMAT_A36u64_B8_C8);
		oa_format_add(perf, I915_OA_FORMAT_A38u64_R2u64_B8_C8);

Or these are not considered OAG formats?

>		break;

Thanks.
--
Ashutosh
Dixit, Ashutosh Nov. 30, 2022, 1:47 a.m. UTC | #4
On Tue, 29 Nov 2022 17:17:13 -0800, Dixit, Ashutosh wrote:
>
> > @@ -4746,6 +4772,7 @@ static void oa_init_supported_formats(struct i915_perf *perf)
> >		break;
> >
> >	case INTEL_DG2:
> > +	case INTEL_METEORLAKE:
> >		oa_format_add(perf, I915_OAR_FORMAT_A32u40_A4u32_B8_C8);
> >		oa_format_add(perf, I915_OA_FORMAT_A24u40_A14u32_B8_C8);
>
> Do these formats also need to be added?

Sorry!

>		oa_format_add(perf, I915_OAR_FORMAT_A32u40_A4u32_B8_C8);
>		oa_format_add(perf, I915_OA_FORMAT_A24u40_A14u32_B8_C8);

These two are the same as above.

>		oa_format_add(perf, I915_OAR_FORMAT_A36u64_B8_C8);
>		oa_format_add(perf, I915_OA_FORMAT_A38u64_R2u64_B8_C8);

And it seems these are not 32 bit (where 32 bit refers to the header
size).

> Or these are not considered OAG formats?

So it's ok, no changes needed here.

>
> >		break;

> Thanks.
> --
> Ashutosh
Dixit, Ashutosh Nov. 30, 2022, 1:51 a.m. UTC | #5
On Mon, 28 Nov 2022 17:21:46 -0800, Umesh Nerlige Ramappa wrote:
>
> +/*
> + * Ref: 14010536224:
> + * 0x20cc is repurposed on MTL, so use a separate array for MTL.

Wondering if it was WAIT_FOR_RC6_EXIT (seen in gen12_oa_mux_regs) which
moved elsewhere and if that needs to be added to the array below too?

> + */
> +static const struct i915_range mtl_oa_mux_regs[] = {
> +	{ .start = 0x0d00, .end = 0x0d04 },	/* RPM_CONFIG[0-1] */
> +	{ .start = 0x0d0c, .end = 0x0d2c },	/* NOA_CONFIG[0-8] */
> +	{ .start = 0x9840, .end = 0x9840 },	/* GDT_CHICKEN_BITS */
> +	{ .start = 0x9884, .end = 0x9888 },	/* NOA_WRITE */
> +};
> +
>  static bool gen7_is_valid_b_counter_addr(struct i915_perf *perf, u32 addr)
>  {
>	return reg_in_range_table(addr, gen7_oa_b_counters);
> @@ -4349,7 +4372,10 @@ static bool xehp_is_valid_b_counter_addr(struct i915_perf *perf, u32 addr)
>
>  static bool gen12_is_valid_mux_addr(struct i915_perf *perf, u32 addr)
>  {
> -	return reg_in_range_table(addr, gen12_oa_mux_regs);
> +	if (IS_METEORLAKE(perf->i915))
> +		return reg_in_range_table(addr, mtl_oa_mux_regs);
> +	else
> +		return reg_in_range_table(addr, gen12_oa_mux_regs);

But otherwise this is:

Reviewed-by: Ashutosh Dixit <ashutosh.dixit@intel.com>

If you decide to split the patches, please add my R-b on all the split patches.
Umesh Nerlige Ramappa Nov. 30, 2022, 8 p.m. UTC | #6
On Tue, Nov 29, 2022 at 05:51:13PM -0800, Dixit, Ashutosh wrote:
>On Mon, 28 Nov 2022 17:21:46 -0800, Umesh Nerlige Ramappa wrote:
>>
>> +/*
>> + * Ref: 14010536224:
>> + * 0x20cc is repurposed on MTL, so use a separate array for MTL.
>
>Wondering if it was WAIT_FOR_RC6_EXIT (seen in gen12_oa_mux_regs) which
>moved elsewhere and if that needs to be added to the array below too?

WAIT_FOR_RC6_EXIT (0x20cc) moved elsewhere so it should be "removed" 
from mtl oa mux array.

>
>> + */
>> +static const struct i915_range mtl_oa_mux_regs[] = {
>> +	{ .start = 0x0d00, .end = 0x0d04 },	/* RPM_CONFIG[0-1] */
>> +	{ .start = 0x0d0c, .end = 0x0d2c },	/* NOA_CONFIG[0-8] */
>> +	{ .start = 0x9840, .end = 0x9840 },	/* GDT_CHICKEN_BITS */
>> +	{ .start = 0x9884, .end = 0x9888 },	/* NOA_WRITE */
>> +};
>> +
>>  static bool gen7_is_valid_b_counter_addr(struct i915_perf *perf, u32 addr)
>>  {
>>	return reg_in_range_table(addr, gen7_oa_b_counters);
>> @@ -4349,7 +4372,10 @@ static bool xehp_is_valid_b_counter_addr(struct i915_perf *perf, u32 addr)
>>
>>  static bool gen12_is_valid_mux_addr(struct i915_perf *perf, u32 addr)
>>  {
>> -	return reg_in_range_table(addr, gen12_oa_mux_regs);
>> +	if (IS_METEORLAKE(perf->i915))
>> +		return reg_in_range_table(addr, mtl_oa_mux_regs);
>> +	else
>> +		return reg_in_range_table(addr, gen12_oa_mux_regs);
>
>But otherwise this is:
>
>Reviewed-by: Ashutosh Dixit <ashutosh.dixit@intel.com>

I will break them into separate patches though. If the diff is 
identical, I will carry over your R-b on the split patches. Please let 
me know if that's a concern.

Thanks,
Umesh
>
>If you decide to split the patches, please add my R-b on all the split patches.
Dixit, Ashutosh Nov. 30, 2022, 8:14 p.m. UTC | #7
On Wed, 30 Nov 2022 12:00:57 -0800, Umesh Nerlige Ramappa wrote:
>
> On Tue, Nov 29, 2022 at 05:51:13PM -0800, Dixit, Ashutosh wrote:
> > On Mon, 28 Nov 2022 17:21:46 -0800, Umesh Nerlige Ramappa wrote:
> >>
> >> +/*
> >> + * Ref: 14010536224:
> >> + * 0x20cc is repurposed on MTL, so use a separate array for MTL.
> >
> > Wondering if it was WAIT_FOR_RC6_EXIT (seen in gen12_oa_mux_regs) which
> > moved elsewhere and if that needs to be added to the array below too?
>
> WAIT_FOR_RC6_EXIT (0x20cc) moved elsewhere so it should be "removed" from
> mtl oa mux array.

What I was saying was let's say WAIT_FOR_RC6_EXIT moved to 0xc0ffee so now
should 0xc0ffee be added to mtl_oa_mux_regs?

>
> >
> >> + */
> >> +static const struct i915_range mtl_oa_mux_regs[] = {
> >> +	{ .start = 0x0d00, .end = 0x0d04 },	/* RPM_CONFIG[0-1] */
> >> +	{ .start = 0x0d0c, .end = 0x0d2c },	/* NOA_CONFIG[0-8] */
> >> +	{ .start = 0x9840, .end = 0x9840 },	/* GDT_CHICKEN_BITS */
> >> +	{ .start = 0x9884, .end = 0x9888 },	/* NOA_WRITE */
> >> +};
> >> +
> >>  static bool gen7_is_valid_b_counter_addr(struct i915_perf *perf, u32 addr)
> >>  {
> >>	return reg_in_range_table(addr, gen7_oa_b_counters);
> >> @@ -4349,7 +4372,10 @@ static bool xehp_is_valid_b_counter_addr(struct i915_perf *perf, u32 addr)
> >>
> >>  static bool gen12_is_valid_mux_addr(struct i915_perf *perf, u32 addr)
> >>  {
> >> -	return reg_in_range_table(addr, gen12_oa_mux_regs);
> >> +	if (IS_METEORLAKE(perf->i915))
> >> +		return reg_in_range_table(addr, mtl_oa_mux_regs);
> >> +	else
> >> +		return reg_in_range_table(addr, gen12_oa_mux_regs);
> >
> > But otherwise this is:
> >
> > Reviewed-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
>
> I will break them into separate patches though. If the diff is identical, I
> will carry over your R-b on the split patches. Please let me know if that's
> a concern.

No I can quickly review again anyway.

> > If you decide to split the patches, please add my R-b on all the split patches.

Thanks.
--
Ashutosh
Umesh Nerlige Ramappa Nov. 30, 2022, 11:39 p.m. UTC | #8
On Wed, Nov 30, 2022 at 12:14:20PM -0800, Dixit, Ashutosh wrote:
>On Wed, 30 Nov 2022 12:00:57 -0800, Umesh Nerlige Ramappa wrote:
>>
>> On Tue, Nov 29, 2022 at 05:51:13PM -0800, Dixit, Ashutosh wrote:
>> > On Mon, 28 Nov 2022 17:21:46 -0800, Umesh Nerlige Ramappa wrote:
>> >>
>> >> +/*
>> >> + * Ref: 14010536224:
>> >> + * 0x20cc is repurposed on MTL, so use a separate array for MTL.
>> >
>> > Wondering if it was WAIT_FOR_RC6_EXIT (seen in gen12_oa_mux_regs) which
>> > moved elsewhere and if that needs to be added to the array below too?
>>
>> WAIT_FOR_RC6_EXIT (0x20cc) moved elsewhere so it should be "removed" from
>> mtl oa mux array.
>
>What I was saying was let's say WAIT_FOR_RC6_EXIT moved to 0xc0ffee so now
>should 0xc0ffee be added to mtl_oa_mux_regs?

oh, sorry, I misread that.

I looked for WAIT_FOR_RC6_EXIT in the bspec and did not find it defined 
for MTL, so it's dropped completely. If you could confirm, that would be 
great.

>
>>
>> >
>> >> + */
>> >> +static const struct i915_range mtl_oa_mux_regs[] = {
>> >> +	{ .start = 0x0d00, .end = 0x0d04 },	/* RPM_CONFIG[0-1] */
>> >> +	{ .start = 0x0d0c, .end = 0x0d2c },	/* NOA_CONFIG[0-8] */
>> >> +	{ .start = 0x9840, .end = 0x9840 },	/* GDT_CHICKEN_BITS */
>> >> +	{ .start = 0x9884, .end = 0x9888 },	/* NOA_WRITE */
>> >> +};
>> >> +
>> >>  static bool gen7_is_valid_b_counter_addr(struct i915_perf *perf, u32 addr)
>> >>  {
>> >>	return reg_in_range_table(addr, gen7_oa_b_counters);
>> >> @@ -4349,7 +4372,10 @@ static bool xehp_is_valid_b_counter_addr(struct i915_perf *perf, u32 addr)
>> >>
>> >>  static bool gen12_is_valid_mux_addr(struct i915_perf *perf, u32 addr)
>> >>  {
>> >> -	return reg_in_range_table(addr, gen12_oa_mux_regs);
>> >> +	if (IS_METEORLAKE(perf->i915))
>> >> +		return reg_in_range_table(addr, mtl_oa_mux_regs);
>> >> +	else
>> >> +		return reg_in_range_table(addr, gen12_oa_mux_regs);
>> >
>> > But otherwise this is:
>> >
>> > Reviewed-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
>>
>> I will break them into separate patches though. If the diff is identical, I
>> will carry over your R-b on the split patches. Please let me know if that's
>> a concern.
>
>No I can quickly review again anyway.

got it, will not add the R-bs.

Thanks,
Umesh

>
>> > If you decide to split the patches, please add my R-b on all the split patches.
>
>Thanks.
>--
>Ashutosh
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h
index c1d9cd255e06..13dffe0a3d20 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h
@@ -296,12 +296,6 @@  enum intel_gt_scratch_field {
 
 	/* 8 bytes */
 	INTEL_GT_SCRATCH_FIELD_COHERENTL3_WA = 256,
-
-	/* 6 * 8 bytes */
-	INTEL_GT_SCRATCH_FIELD_PERF_CS_GPR = 2048,
-
-	/* 4 bytes */
-	INTEL_GT_SCRATCH_FIELD_PERF_PREDICATE_RESULT_1 = 2096,
 };
 
 #endif /* __INTEL_GT_TYPES_H__ */
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 00e09bb18b13..a735b9540113 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -1842,8 +1842,7 @@  static u32 *save_restore_register(struct i915_perf_stream *stream, u32 *cs,
 	for (d = 0; d < dword_count; d++) {
 		*cs++ = cmd;
 		*cs++ = i915_mmio_reg_offset(reg) + 4 * d;
-		*cs++ = intel_gt_scratch_offset(stream->engine->gt,
-						offset) + 4 * d;
+		*cs++ = i915_ggtt_offset(stream->noa_wait) + offset + 4 * d;
 		*cs++ = 0;
 	}
 
@@ -1876,7 +1875,13 @@  static int alloc_noa_wait(struct i915_perf_stream *stream)
 					  MI_PREDICATE_RESULT_2_ENGINE(base) :
 					  MI_PREDICATE_RESULT_1(RENDER_RING_BASE);
 
-	bo = i915_gem_object_create_internal(i915, 4096);
+	/*
+	 * gt->scratch was being used to save/restore the GPR registers, but on
+	 * MTL the scratch uses stolen lmem. An MI_SRM to this memory region
+	 * causes an engine hang. Instead allocate an additional page here to
+	 * save/restore GPR registers
+	 */
+	bo = i915_gem_object_create_internal(i915, 8192);
 	if (IS_ERR(bo)) {
 		drm_err(&i915->drm,
 			"Failed to allocate NOA wait batchbuffer\n");
@@ -1910,14 +1915,19 @@  static int alloc_noa_wait(struct i915_perf_stream *stream)
 		goto err_unpin;
 	}
 
+	stream->noa_wait = vma;
+
+#define GPR_SAVE_OFFSET 4096
+#define PREDICATE_SAVE_OFFSET 4160
+
 	/* Save registers. */
 	for (i = 0; i < N_CS_GPR; i++)
 		cs = save_restore_register(
 			stream, cs, true /* save */, CS_GPR(i),
-			INTEL_GT_SCRATCH_FIELD_PERF_CS_GPR + 8 * i, 2);
+			GPR_SAVE_OFFSET + 8 * i, 2);
 	cs = save_restore_register(
 		stream, cs, true /* save */, mi_predicate_result,
-		INTEL_GT_SCRATCH_FIELD_PERF_PREDICATE_RESULT_1, 1);
+		PREDICATE_SAVE_OFFSET, 1);
 
 	/* First timestamp snapshot location. */
 	ts0 = cs;
@@ -2033,10 +2043,10 @@  static int alloc_noa_wait(struct i915_perf_stream *stream)
 	for (i = 0; i < N_CS_GPR; i++)
 		cs = save_restore_register(
 			stream, cs, false /* restore */, CS_GPR(i),
-			INTEL_GT_SCRATCH_FIELD_PERF_CS_GPR + 8 * i, 2);
+			GPR_SAVE_OFFSET + 8 * i, 2);
 	cs = save_restore_register(
 		stream, cs, false /* restore */, mi_predicate_result,
-		INTEL_GT_SCRATCH_FIELD_PERF_PREDICATE_RESULT_1, 1);
+		PREDICATE_SAVE_OFFSET, 1);
 
 	/* And return to the ring. */
 	*cs++ = MI_BATCH_BUFFER_END;
@@ -2046,7 +2056,6 @@  static int alloc_noa_wait(struct i915_perf_stream *stream)
 	i915_gem_object_flush_map(bo);
 	__i915_gem_object_release_map(bo);
 
-	stream->noa_wait = vma;
 	goto out_ww;
 
 err_unpin:
@@ -3127,8 +3136,11 @@  get_sseu_config(struct intel_sseu *out_sseu,
  */
 u32 i915_perf_oa_timestamp_frequency(struct drm_i915_private *i915)
 {
-	/* Wa_18013179988:dg2 */
-	if (IS_DG2(i915)) {
+	/*
+	 * Wa_18013179988:dg2
+	 * Wa_14015846243:mtl
+	 */
+	if (IS_DG2(i915) || IS_METEORLAKE(i915)) {
 		intel_wakeref_t wakeref;
 		u32 reg, shift;
 
@@ -4306,6 +4318,17 @@  static const struct i915_range gen12_oa_mux_regs[] = {
 	{}
 };
 
+/*
+ * Ref: 14010536224:
+ * 0x20cc is repurposed on MTL, so use a separate array for MTL.
+ */
+static const struct i915_range mtl_oa_mux_regs[] = {
+	{ .start = 0x0d00, .end = 0x0d04 },	/* RPM_CONFIG[0-1] */
+	{ .start = 0x0d0c, .end = 0x0d2c },	/* NOA_CONFIG[0-8] */
+	{ .start = 0x9840, .end = 0x9840 },	/* GDT_CHICKEN_BITS */
+	{ .start = 0x9884, .end = 0x9888 },	/* NOA_WRITE */
+};
+
 static bool gen7_is_valid_b_counter_addr(struct i915_perf *perf, u32 addr)
 {
 	return reg_in_range_table(addr, gen7_oa_b_counters);
@@ -4349,7 +4372,10 @@  static bool xehp_is_valid_b_counter_addr(struct i915_perf *perf, u32 addr)
 
 static bool gen12_is_valid_mux_addr(struct i915_perf *perf, u32 addr)
 {
-	return reg_in_range_table(addr, gen12_oa_mux_regs);
+	if (IS_METEORLAKE(perf->i915))
+		return reg_in_range_table(addr, mtl_oa_mux_regs);
+	else
+		return reg_in_range_table(addr, gen12_oa_mux_regs);
 }
 
 static u32 mask_reg_value(u32 reg, u32 val)
@@ -4746,6 +4772,7 @@  static void oa_init_supported_formats(struct i915_perf *perf)
 		break;
 
 	case INTEL_DG2:
+	case INTEL_METEORLAKE:
 		oa_format_add(perf, I915_OAR_FORMAT_A32u40_A4u32_B8_C8);
 		oa_format_add(perf, I915_OA_FORMAT_A24u40_A14u32_B8_C8);
 		break;