diff mbox

[i-g-t,2/2] intel_gpu_overlay: Update for class:instance engine tracepoints

Message ID 20180605165051.29136-2-tvrtko.ursulin@linux.intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Tvrtko Ursulin June 5, 2018, 4:50 p.m. UTC
From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

A miminal hack to parse the new tracepoint format and invent new "ring
id's" based on engine class and instance.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
---
 overlay/gpu-perf.c | 36 ++++++++++++++++++++++++++++--------
 1 file changed, 28 insertions(+), 8 deletions(-)

Comments

Lionel Landwerlin June 5, 2018, 5:14 p.m. UTC | #1
On 05/06/18 17:50, Tvrtko Ursulin wrote:
> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>
> A miminal hack to parse the new tracepoint format and invent new "ring
> id's" based on engine class and instance.
>
> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
> ---
>   overlay/gpu-perf.c | 36 ++++++++++++++++++++++++++++--------
>   1 file changed, 28 insertions(+), 8 deletions(-)
>
> diff --git a/overlay/gpu-perf.c b/overlay/gpu-perf.c
> index ea3480050ab9..e77125672088 100644
> --- a/overlay/gpu-perf.c
> +++ b/overlay/gpu-perf.c
> @@ -85,7 +85,8 @@ struct tracepoint {
>   
>   	int device_field;
>   	int ctx_field;
> -	int ring_field;
> +	int class_field;
> +	int instance_field;
>   	int seqno_field;
>   	int global_seqno_field;
>   	int plane_field;
> @@ -151,8 +152,10 @@ tracepoint_id(int tp_id)
>   				tp->device_field = f;
>   			} else if (!strcmp(tp->fields[f].name, "ctx")) {
>   				tp->ctx_field = f;
> -			} else if (!strcmp(tp->fields[f].name, "ring")) {
> -				tp->ring_field = f;
> +			} else if (!strcmp(tp->fields[f].name, "class")) {
> +				tp->class_field = f;
> +			} else if (!strcmp(tp->fields[f].name, "instance")) {
> +				tp->instance_field = f;

That looks good to me. We only support the most recent kernel?

>   			} else if (!strcmp(tp->fields[f].name, "seqno")) {
>   				tp->seqno_field = f;
>   			} else if (!strcmp(tp->fields[f].name, "global_seqno")) {
> @@ -175,6 +178,23 @@ tracepoint_id(int tp_id)
>   			     tracepoints[tp_id].fields[			\
>   				     tracepoints[tp_id].field_name##_field].offset))
>   
> +#define READ_TP_FIELD_U16(sample, tp_id, field_name)			\
> +	(*(const uint16_t *)((sample)->tracepoint_data +		\
> +			     tracepoints[tp_id].fields[			\
> +				     tracepoints[tp_id].field_name##_field].offset))
> +
> +#define GET_RING_ID(sample, tp_id) \
> +({ \
> +	unsigned char class, instance, ring; \
> +\
> +	class = READ_TP_FIELD_U16(sample, tp_id, class); \
> +	instance = READ_TP_FIELD_U16(sample, tp_id, instance); \
> +\
> +	ring = class * 2 + instance; \

Do you want to make it clear that we cannot have more than 2 instances 
per class?

> +\
> +	ring; \
> +})
> +
>   static int perf_tracepoint_open(struct gpu_perf *gp, int tp_id,
>   				int (*func)(struct gpu_perf *, const void *))
>   {
> @@ -313,7 +333,7 @@ static int request_add(struct gpu_perf *gp, const void *event)
>   	if (comm == NULL)
>   		return 0;
>   
> -	comm->nr_requests[READ_TP_FIELD_U32(sample, TP_GEM_REQUEST_ADD, ring)]++;
> +	comm->nr_requests[GET_RING_ID(sample, TP_GEM_REQUEST_ADD)]++;
>   	return 1;
>   }
>   
> @@ -329,7 +349,7 @@ static int ctx_switch(struct gpu_perf *gp, const void *event)
>   {
>   	const struct sample_event *sample = event;
>   
> -	gp->ctx_switch[READ_TP_FIELD_U32(sample, TP_GEM_RING_SWITCH_CONTEXT, ring)]++;
> +	gp->ctx_switch[GET_RING_ID(sample, TP_GEM_RING_SWITCH_CONTEXT)]++;
>   	return 1;
>   }
>   
> @@ -367,8 +387,8 @@ static int wait_begin(struct gpu_perf *gp, const void *event)
>   	wait->context = READ_TP_FIELD_U32(sample, TP_GEM_REQUEST_WAIT_BEGIN, ctx);
>   	wait->seqno = READ_TP_FIELD_U32(sample, TP_GEM_REQUEST_WAIT_BEGIN, seqno);
>   	wait->time = sample->time;
> -	wait->next = gp->wait[READ_TP_FIELD_U32(sample, TP_GEM_REQUEST_WAIT_BEGIN, ring)];
> -	gp->wait[READ_TP_FIELD_U32(sample, TP_GEM_REQUEST_WAIT_BEGIN, ring)] = wait;
> +	wait->next = gp->wait[GET_RING_ID(sample, TP_GEM_REQUEST_WAIT_BEGIN)];
> +	gp->wait[GET_RING_ID(sample, TP_GEM_REQUEST_WAIT_BEGIN)] = wait;
>   
>   	return 0;
>   }
> @@ -377,7 +397,7 @@ static int wait_end(struct gpu_perf *gp, const void *event)
>   {
>   	const struct sample_event *sample = event;
>   	struct gpu_perf_time *wait, **prev;
> -	uint32_t engine = READ_TP_FIELD_U32(sample, TP_GEM_REQUEST_WAIT_END, ring);
> +	uint32_t engine = GET_RING_ID(sample, TP_GEM_REQUEST_WAIT_END);
>   	uint32_t context = READ_TP_FIELD_U32(sample, TP_GEM_REQUEST_WAIT_END, ctx);
>   	uint32_t seqno = READ_TP_FIELD_U32(sample, TP_GEM_REQUEST_WAIT_END, seqno);
>
Chris Wilson June 5, 2018, 7:40 p.m. UTC | #2
Quoting Lionel Landwerlin (2018-06-05 18:14:58)
> On 05/06/18 17:50, Tvrtko Ursulin wrote:
> > From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> >
> > A miminal hack to parse the new tracepoint format and invent new "ring
> > id's" based on engine class and instance.
> >
> > Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> > Cc: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
> > ---
> >   overlay/gpu-perf.c | 36 ++++++++++++++++++++++++++++--------
> >   1 file changed, 28 insertions(+), 8 deletions(-)
> >
> > diff --git a/overlay/gpu-perf.c b/overlay/gpu-perf.c
> > index ea3480050ab9..e77125672088 100644
> > --- a/overlay/gpu-perf.c
> > +++ b/overlay/gpu-perf.c
> > @@ -85,7 +85,8 @@ struct tracepoint {
> >   
> >       int device_field;
> >       int ctx_field;
> > -     int ring_field;
> > +     int class_field;
> > +     int instance_field;
> >       int seqno_field;
> >       int global_seqno_field;
> >       int plane_field;
> > @@ -151,8 +152,10 @@ tracepoint_id(int tp_id)
> >                               tp->device_field = f;
> >                       } else if (!strcmp(tp->fields[f].name, "ctx")) {
> >                               tp->ctx_field = f;
> > -                     } else if (!strcmp(tp->fields[f].name, "ring")) {
> > -                             tp->ring_field = f;
> > +                     } else if (!strcmp(tp->fields[f].name, "class")) {
> > +                             tp->class_field = f;
> > +                     } else if (!strcmp(tp->fields[f].name, "instance")) {
> > +                             tp->instance_field = f;
> 
> That looks good to me. We only support the most recent kernel?

Yes. It's a devtool using a dev interface. It's sole purpose is for
debugging the current kernel, or userspace in conjunction with drm-tip.
It's a temporary hack...

> >                       } else if (!strcmp(tp->fields[f].name, "seqno")) {
> >                               tp->seqno_field = f;
> >                       } else if (!strcmp(tp->fields[f].name, "global_seqno")) {
> > @@ -175,6 +178,23 @@ tracepoint_id(int tp_id)
> >                            tracepoints[tp_id].fields[                 \
> >                                    tracepoints[tp_id].field_name##_field].offset))
> >   
> > +#define READ_TP_FIELD_U16(sample, tp_id, field_name)                 \
> > +     (*(const uint16_t *)((sample)->tracepoint_data +                \
> > +                          tracepoints[tp_id].fields[                 \
> > +                                  tracepoints[tp_id].field_name##_field].offset))
> > +
> > +#define GET_RING_ID(sample, tp_id) \
> > +({ \
> > +     unsigned char class, instance, ring; \
> > +\
> > +     class = READ_TP_FIELD_U16(sample, tp_id, class); \
> > +     instance = READ_TP_FIELD_U16(sample, tp_id, instance); \
> > +\
> > +     ring = class * 2 + instance; \
> 
> Do you want to make it clear that we cannot have more than 2 instances 
> per class?

Or make it easier to spot and expand in future.
-Chris
diff mbox

Patch

diff --git a/overlay/gpu-perf.c b/overlay/gpu-perf.c
index ea3480050ab9..e77125672088 100644
--- a/overlay/gpu-perf.c
+++ b/overlay/gpu-perf.c
@@ -85,7 +85,8 @@  struct tracepoint {
 
 	int device_field;
 	int ctx_field;
-	int ring_field;
+	int class_field;
+	int instance_field;
 	int seqno_field;
 	int global_seqno_field;
 	int plane_field;
@@ -151,8 +152,10 @@  tracepoint_id(int tp_id)
 				tp->device_field = f;
 			} else if (!strcmp(tp->fields[f].name, "ctx")) {
 				tp->ctx_field = f;
-			} else if (!strcmp(tp->fields[f].name, "ring")) {
-				tp->ring_field = f;
+			} else if (!strcmp(tp->fields[f].name, "class")) {
+				tp->class_field = f;
+			} else if (!strcmp(tp->fields[f].name, "instance")) {
+				tp->instance_field = f;
 			} else if (!strcmp(tp->fields[f].name, "seqno")) {
 				tp->seqno_field = f;
 			} else if (!strcmp(tp->fields[f].name, "global_seqno")) {
@@ -175,6 +178,23 @@  tracepoint_id(int tp_id)
 			     tracepoints[tp_id].fields[			\
 				     tracepoints[tp_id].field_name##_field].offset))
 
+#define READ_TP_FIELD_U16(sample, tp_id, field_name)			\
+	(*(const uint16_t *)((sample)->tracepoint_data +		\
+			     tracepoints[tp_id].fields[			\
+				     tracepoints[tp_id].field_name##_field].offset))
+
+#define GET_RING_ID(sample, tp_id) \
+({ \
+	unsigned char class, instance, ring; \
+\
+	class = READ_TP_FIELD_U16(sample, tp_id, class); \
+	instance = READ_TP_FIELD_U16(sample, tp_id, instance); \
+\
+	ring = class * 2 + instance; \
+\
+	ring; \
+})
+
 static int perf_tracepoint_open(struct gpu_perf *gp, int tp_id,
 				int (*func)(struct gpu_perf *, const void *))
 {
@@ -313,7 +333,7 @@  static int request_add(struct gpu_perf *gp, const void *event)
 	if (comm == NULL)
 		return 0;
 
-	comm->nr_requests[READ_TP_FIELD_U32(sample, TP_GEM_REQUEST_ADD, ring)]++;
+	comm->nr_requests[GET_RING_ID(sample, TP_GEM_REQUEST_ADD)]++;
 	return 1;
 }
 
@@ -329,7 +349,7 @@  static int ctx_switch(struct gpu_perf *gp, const void *event)
 {
 	const struct sample_event *sample = event;
 
-	gp->ctx_switch[READ_TP_FIELD_U32(sample, TP_GEM_RING_SWITCH_CONTEXT, ring)]++;
+	gp->ctx_switch[GET_RING_ID(sample, TP_GEM_RING_SWITCH_CONTEXT)]++;
 	return 1;
 }
 
@@ -367,8 +387,8 @@  static int wait_begin(struct gpu_perf *gp, const void *event)
 	wait->context = READ_TP_FIELD_U32(sample, TP_GEM_REQUEST_WAIT_BEGIN, ctx);
 	wait->seqno = READ_TP_FIELD_U32(sample, TP_GEM_REQUEST_WAIT_BEGIN, seqno);
 	wait->time = sample->time;
-	wait->next = gp->wait[READ_TP_FIELD_U32(sample, TP_GEM_REQUEST_WAIT_BEGIN, ring)];
-	gp->wait[READ_TP_FIELD_U32(sample, TP_GEM_REQUEST_WAIT_BEGIN, ring)] = wait;
+	wait->next = gp->wait[GET_RING_ID(sample, TP_GEM_REQUEST_WAIT_BEGIN)];
+	gp->wait[GET_RING_ID(sample, TP_GEM_REQUEST_WAIT_BEGIN)] = wait;
 
 	return 0;
 }
@@ -377,7 +397,7 @@  static int wait_end(struct gpu_perf *gp, const void *event)
 {
 	const struct sample_event *sample = event;
 	struct gpu_perf_time *wait, **prev;
-	uint32_t engine = READ_TP_FIELD_U32(sample, TP_GEM_REQUEST_WAIT_END, ring);
+	uint32_t engine = GET_RING_ID(sample, TP_GEM_REQUEST_WAIT_END);
 	uint32_t context = READ_TP_FIELD_U32(sample, TP_GEM_REQUEST_WAIT_END, ctx);
 	uint32_t seqno = READ_TP_FIELD_U32(sample, TP_GEM_REQUEST_WAIT_END, seqno);