diff mbox series

[12/19] drm/i915/perf: Parse 64bit report header formats correctly

Message ID 20220823204155.8178-13-umesh.nerlige.ramappa@intel.com (mailing list archive)
State New, archived
Headers show
Series Add DG2 OA support | expand

Commit Message

Umesh Nerlige Ramappa Aug. 23, 2022, 8:41 p.m. UTC
Now that OA formats come in flavor of 64 bit reports, the report header
has 64 bit report-id, timestamp, context-id and gpu-ticks fields. When
filtering these reports, use the right width for these fields.

v2: Let compiler decide on inline (Jani)

Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
---
 drivers/gpu/drm/i915/i915_perf.c       | 101 ++++++++++++++++++++-----
 drivers/gpu/drm/i915/i915_perf_types.h |   6 ++
 2 files changed, 86 insertions(+), 21 deletions(-)

Comments

Ashutosh Dixit Sept. 16, 2022, 12:47 a.m. UTC | #1
On Tue, 23 Aug 2022 13:41:48 -0700, Umesh Nerlige Ramappa wrote:
>

Hi Umesh,

> @@ -740,23 +802,19 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream,
>		u8 *report = oa_buf_base + head;
>		u32 *report32 = (void *)report;
>		u32 ctx_id;
> -		u32 reason;
> +		u64 reason;
>
>		/*
>		 * The reason field includes flags identifying what
>		 * triggered this specific report (mostly timer
>		 * triggered or e.g. due to a context switch).
>		 *
> -		 * This field is never expected to be zero so we can
> -		 * check that the report isn't invalid before copying
> -		 * it to userspace...
> +		 * In MMIO triggered reports, some platforms do not set the
> +		 * reason bit in this field and it is valid to have a reason
> +		 * field of zero.
>		 */
> -		reason = ((report32[0] >> OAREPORT_REASON_SHIFT) &
> -			  (GRAPHICS_VER(stream->perf->i915) == 12 ?
> -			   OAREPORT_REASON_MASK_EXTENDED :
> -			   OAREPORT_REASON_MASK));
> -
> -		ctx_id = report32[2] & stream->specific_ctx_id_mask;
> +		reason = oa_report_reason(stream, report);
> +		ctx_id = oa_context_id(stream, report32);
>
>		/*
>		 * Squash whatever is in the CTX_ID field if it's marked as
> @@ -766,9 +824,10 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream,
>		 * Note: that we don't clear the valid_ctx_bit so userspace can
>		 * understand that the ID has been squashed by the kernel.
>		 */
> -		if (!(report32[0] & stream->perf->gen8_valid_ctx_bit) &&
> -		    GRAPHICS_VER(stream->perf->i915) <= 11)
> -			ctx_id = report32[2] = INVALID_CTX_ID;
> +		if (oa_report_ctx_invalid(stream, report)) {
> +			ctx_id = INVALID_CTX_ID;
> +			oa_context_id_squash(stream, report32);
> +		}
>
>		/*
>		 * NB: For Gen 8 the OA unit no longer supports clock gating
> @@ -812,7 +871,7 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream,
>			 */
>			if (stream->ctx &&
>			    stream->specific_ctx_id != ctx_id) {
> -				report32[2] = INVALID_CTX_ID;
> +				oa_context_id_squash(stream, report32);
>			}
>
>			ret = append_oa_sample(stream, buf, count, offset,
> @@ -824,11 +883,11 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream,
>		}
>
>		/*
> -		 * Clear out the first 2 dword as a mean to detect unlanded
> +		 * Clear out the report id and timestamp as a means to detect unlanded
>		 * reports.
>		 */
> -		report32[0] = 0;
> -		report32[1] = 0;
> +		oa_report_id_clear(stream, report32);
> +		oa_timestamp_clear(stream, report32);

Because we now have these new functions, why do we now need two pointers
report and report32 pointing to the same location? I think we can just have
a single 'void *report' which we can pass into all these functions,
correct?

With this change, this is:

Reviewed-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
diff mbox series

Patch

diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 9e455bd3bce5..167e7355980a 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -324,8 +324,8 @@  static const struct i915_oa_format oa_formats[I915_OA_FORMAT_MAX] = {
 	[I915_OA_FORMAT_A32u40_A4u32_B8_C8] = { 5, 256 },
 	[I915_OAR_FORMAT_A32u40_A4u32_B8_C8]    = { 5, 256 },
 	[I915_OA_FORMAT_A24u40_A14u32_B8_C8]    = { 5, 256 },
-	[I915_OAR_FORMAT_A36u64_B8_C8]		= { 1, 384 },
-	[I915_OA_FORMAT_A38u64_R2u64_B8_C8]	= { 1, 448 },
+	[I915_OAR_FORMAT_A36u64_B8_C8]		= { 1, 384, HDR_64_BIT },
+	[I915_OA_FORMAT_A38u64_R2u64_B8_C8]	= { 1, 448, HDR_64_BIT },
 };
 
 #define SAMPLE_OA_REPORT      (1<<0)
@@ -456,6 +456,67 @@  static u32 gen7_oa_hw_tail_read(struct i915_perf_stream *stream)
 	return oastatus1 & GEN7_OASTATUS1_TAIL_MASK;
 }
 
+#define oa_report_header_64bit(__s) \
+	((__s)->oa_buffer.format->header == HDR_64_BIT)
+
+static u64 oa_report_id(struct i915_perf_stream *stream, void *report)
+{
+	return oa_report_header_64bit(stream) ? *(u64 *)report : *(u32 *)report;
+}
+
+static u64 oa_report_reason(struct i915_perf_stream *stream, void *report)
+{
+	return (oa_report_id(stream, report) >> OAREPORT_REASON_SHIFT) &
+	       (GRAPHICS_VER(stream->perf->i915) == 12 ?
+		OAREPORT_REASON_MASK_EXTENDED :
+		OAREPORT_REASON_MASK);
+}
+
+static void oa_report_id_clear(struct i915_perf_stream *stream, u32 *report)
+{
+	if (oa_report_header_64bit(stream))
+		*(u64 *)report = 0;
+	else
+		*report = 0;
+}
+
+static bool oa_report_ctx_invalid(struct i915_perf_stream *stream, void *report)
+{
+	return !(oa_report_id(stream, report) &
+	       stream->perf->gen8_valid_ctx_bit) &&
+	       GRAPHICS_VER(stream->perf->i915) <= 11;
+}
+
+static u64 oa_timestamp(struct i915_perf_stream *stream, void *report)
+{
+	return oa_report_header_64bit(stream) ?
+		*((u64 *)report + 1) :
+		*((u32 *)report + 1);
+}
+
+static void oa_timestamp_clear(struct i915_perf_stream *stream, u32 *report)
+{
+	if (oa_report_header_64bit(stream))
+		*(u64 *)&report[2] = 0;
+	else
+		report[1] = 0;
+}
+
+static u32 oa_context_id(struct i915_perf_stream *stream, u32 *report)
+{
+	u32 ctx_id = oa_report_header_64bit(stream) ? report[4] : report[2];
+
+	return ctx_id & stream->specific_ctx_id_mask;
+}
+
+static void oa_context_id_squash(struct i915_perf_stream *stream, u32 *report)
+{
+	if (oa_report_header_64bit(stream))
+		report[4] = INVALID_CTX_ID;
+	else
+		report[2] = INVALID_CTX_ID;
+}
+
 /**
  * oa_buffer_check_unlocked - check for data and update tail ptr state
  * @stream: i915 stream instance
@@ -545,9 +606,10 @@  static bool oa_buffer_check_unlocked(struct i915_perf_stream *stream)
 		 * If not : (╯°□°)╯︵ ┻━┻
 		 */
 		while (_oa_taken(stream, tail, aged_tail) >= report_size) {
-			u32 *report32 = (void *)(stream->oa_buffer.vaddr + tail);
+			void *report = stream->oa_buffer.vaddr + tail;
 
-			if (report32[0] != 0 || report32[1] != 0)
+			if (oa_report_id(stream, report) ||
+			    oa_timestamp(stream, report))
 				break;
 
 			tail = _rewind_tail(stream, tail, report_size);
@@ -740,23 +802,19 @@  static int gen8_append_oa_reports(struct i915_perf_stream *stream,
 		u8 *report = oa_buf_base + head;
 		u32 *report32 = (void *)report;
 		u32 ctx_id;
-		u32 reason;
+		u64 reason;
 
 		/*
 		 * The reason field includes flags identifying what
 		 * triggered this specific report (mostly timer
 		 * triggered or e.g. due to a context switch).
 		 *
-		 * This field is never expected to be zero so we can
-		 * check that the report isn't invalid before copying
-		 * it to userspace...
+		 * In MMIO triggered reports, some platforms do not set the
+		 * reason bit in this field and it is valid to have a reason
+		 * field of zero.
 		 */
-		reason = ((report32[0] >> OAREPORT_REASON_SHIFT) &
-			  (GRAPHICS_VER(stream->perf->i915) == 12 ?
-			   OAREPORT_REASON_MASK_EXTENDED :
-			   OAREPORT_REASON_MASK));
-
-		ctx_id = report32[2] & stream->specific_ctx_id_mask;
+		reason = oa_report_reason(stream, report);
+		ctx_id = oa_context_id(stream, report32);
 
 		/*
 		 * Squash whatever is in the CTX_ID field if it's marked as
@@ -766,9 +824,10 @@  static int gen8_append_oa_reports(struct i915_perf_stream *stream,
 		 * Note: that we don't clear the valid_ctx_bit so userspace can
 		 * understand that the ID has been squashed by the kernel.
 		 */
-		if (!(report32[0] & stream->perf->gen8_valid_ctx_bit) &&
-		    GRAPHICS_VER(stream->perf->i915) <= 11)
-			ctx_id = report32[2] = INVALID_CTX_ID;
+		if (oa_report_ctx_invalid(stream, report)) {
+			ctx_id = INVALID_CTX_ID;
+			oa_context_id_squash(stream, report32);
+		}
 
 		/*
 		 * NB: For Gen 8 the OA unit no longer supports clock gating
@@ -812,7 +871,7 @@  static int gen8_append_oa_reports(struct i915_perf_stream *stream,
 			 */
 			if (stream->ctx &&
 			    stream->specific_ctx_id != ctx_id) {
-				report32[2] = INVALID_CTX_ID;
+				oa_context_id_squash(stream, report32);
 			}
 
 			ret = append_oa_sample(stream, buf, count, offset,
@@ -824,11 +883,11 @@  static int gen8_append_oa_reports(struct i915_perf_stream *stream,
 		}
 
 		/*
-		 * Clear out the first 2 dword as a mean to detect unlanded
+		 * Clear out the report id and timestamp as a means to detect unlanded
 		 * reports.
 		 */
-		report32[0] = 0;
-		report32[1] = 0;
+		oa_report_id_clear(stream, report32);
+		oa_timestamp_clear(stream, report32);
 	}
 
 	if (start_offset != *offset) {
diff --git a/drivers/gpu/drm/i915/i915_perf_types.h b/drivers/gpu/drm/i915/i915_perf_types.h
index e0c96b44eda8..68db5f94bc58 100644
--- a/drivers/gpu/drm/i915/i915_perf_types.h
+++ b/drivers/gpu/drm/i915/i915_perf_types.h
@@ -30,9 +30,15 @@  struct i915_vma;
 struct intel_context;
 struct intel_engine_cs;
 
+enum report_header {
+	HDR_32_BIT = 0,
+	HDR_64_BIT,
+};
+
 struct i915_oa_format {
 	u32 format;
 	int size;
+	enum report_header header;
 };
 
 struct i915_oa_reg {