Message ID | 20180605165051.29136-2-tvrtko.ursulin@linux.intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On 05/06/18 17:50, Tvrtko Ursulin wrote: > From: Tvrtko Ursulin <tvrtko.ursulin@intel.com> > > A miminal hack to parse the new tracepoint format and invent new "ring > id's" based on engine class and instance. > > Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> > Cc: Lionel Landwerlin <lionel.g.landwerlin@intel.com> > --- > overlay/gpu-perf.c | 36 ++++++++++++++++++++++++++++-------- > 1 file changed, 28 insertions(+), 8 deletions(-) > > diff --git a/overlay/gpu-perf.c b/overlay/gpu-perf.c > index ea3480050ab9..e77125672088 100644 > --- a/overlay/gpu-perf.c > +++ b/overlay/gpu-perf.c > @@ -85,7 +85,8 @@ struct tracepoint { > > int device_field; > int ctx_field; > - int ring_field; > + int class_field; > + int instance_field; > int seqno_field; > int global_seqno_field; > int plane_field; > @@ -151,8 +152,10 @@ tracepoint_id(int tp_id) > tp->device_field = f; > } else if (!strcmp(tp->fields[f].name, "ctx")) { > tp->ctx_field = f; > - } else if (!strcmp(tp->fields[f].name, "ring")) { > - tp->ring_field = f; > + } else if (!strcmp(tp->fields[f].name, "class")) { > + tp->class_field = f; > + } else if (!strcmp(tp->fields[f].name, "instance")) { > + tp->instance_field = f; That looks good to me. We only support the most recent kernel? > } else if (!strcmp(tp->fields[f].name, "seqno")) { > tp->seqno_field = f; > } else if (!strcmp(tp->fields[f].name, "global_seqno")) { > @@ -175,6 +178,23 @@ tracepoint_id(int tp_id) > tracepoints[tp_id].fields[ \ > tracepoints[tp_id].field_name##_field].offset)) > > +#define READ_TP_FIELD_U16(sample, tp_id, field_name) \ > + (*(const uint16_t *)((sample)->tracepoint_data + \ > + tracepoints[tp_id].fields[ \ > + tracepoints[tp_id].field_name##_field].offset)) > + > +#define GET_RING_ID(sample, tp_id) \ > +({ \ > + unsigned char class, instance, ring; \ > +\ > + class = READ_TP_FIELD_U16(sample, tp_id, class); \ > + instance = READ_TP_FIELD_U16(sample, tp_id, instance); \ > +\ > + ring = class * 2 + instance; \ Do you want to make it clear that we cannot have more than 2 instances per class? > +\ > + ring; \ > +}) > + > static int perf_tracepoint_open(struct gpu_perf *gp, int tp_id, > int (*func)(struct gpu_perf *, const void *)) > { > @@ -313,7 +333,7 @@ static int request_add(struct gpu_perf *gp, const void *event) > if (comm == NULL) > return 0; > > - comm->nr_requests[READ_TP_FIELD_U32(sample, TP_GEM_REQUEST_ADD, ring)]++; > + comm->nr_requests[GET_RING_ID(sample, TP_GEM_REQUEST_ADD)]++; > return 1; > } > > @@ -329,7 +349,7 @@ static int ctx_switch(struct gpu_perf *gp, const void *event) > { > const struct sample_event *sample = event; > > - gp->ctx_switch[READ_TP_FIELD_U32(sample, TP_GEM_RING_SWITCH_CONTEXT, ring)]++; > + gp->ctx_switch[GET_RING_ID(sample, TP_GEM_RING_SWITCH_CONTEXT)]++; > return 1; > } > > @@ -367,8 +387,8 @@ static int wait_begin(struct gpu_perf *gp, const void *event) > wait->context = READ_TP_FIELD_U32(sample, TP_GEM_REQUEST_WAIT_BEGIN, ctx); > wait->seqno = READ_TP_FIELD_U32(sample, TP_GEM_REQUEST_WAIT_BEGIN, seqno); > wait->time = sample->time; > - wait->next = gp->wait[READ_TP_FIELD_U32(sample, TP_GEM_REQUEST_WAIT_BEGIN, ring)]; > - gp->wait[READ_TP_FIELD_U32(sample, TP_GEM_REQUEST_WAIT_BEGIN, ring)] = wait; > + wait->next = gp->wait[GET_RING_ID(sample, TP_GEM_REQUEST_WAIT_BEGIN)]; > + gp->wait[GET_RING_ID(sample, TP_GEM_REQUEST_WAIT_BEGIN)] = wait; > > return 0; > } > @@ -377,7 +397,7 @@ static int wait_end(struct gpu_perf *gp, const void *event) > { > const struct sample_event *sample = event; > struct gpu_perf_time *wait, **prev; > - uint32_t engine = READ_TP_FIELD_U32(sample, TP_GEM_REQUEST_WAIT_END, ring); > + uint32_t engine = GET_RING_ID(sample, TP_GEM_REQUEST_WAIT_END); > uint32_t context = READ_TP_FIELD_U32(sample, TP_GEM_REQUEST_WAIT_END, ctx); > uint32_t seqno = READ_TP_FIELD_U32(sample, TP_GEM_REQUEST_WAIT_END, seqno); >
Quoting Lionel Landwerlin (2018-06-05 18:14:58) > On 05/06/18 17:50, Tvrtko Ursulin wrote: > > From: Tvrtko Ursulin <tvrtko.ursulin@intel.com> > > > > A miminal hack to parse the new tracepoint format and invent new "ring > > id's" based on engine class and instance. > > > > Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> > > Cc: Lionel Landwerlin <lionel.g.landwerlin@intel.com> > > --- > > overlay/gpu-perf.c | 36 ++++++++++++++++++++++++++++-------- > > 1 file changed, 28 insertions(+), 8 deletions(-) > > > > diff --git a/overlay/gpu-perf.c b/overlay/gpu-perf.c > > index ea3480050ab9..e77125672088 100644 > > --- a/overlay/gpu-perf.c > > +++ b/overlay/gpu-perf.c > > @@ -85,7 +85,8 @@ struct tracepoint { > > > > int device_field; > > int ctx_field; > > - int ring_field; > > + int class_field; > > + int instance_field; > > int seqno_field; > > int global_seqno_field; > > int plane_field; > > @@ -151,8 +152,10 @@ tracepoint_id(int tp_id) > > tp->device_field = f; > > } else if (!strcmp(tp->fields[f].name, "ctx")) { > > tp->ctx_field = f; > > - } else if (!strcmp(tp->fields[f].name, "ring")) { > > - tp->ring_field = f; > > + } else if (!strcmp(tp->fields[f].name, "class")) { > > + tp->class_field = f; > > + } else if (!strcmp(tp->fields[f].name, "instance")) { > > + tp->instance_field = f; > > That looks good to me. We only support the most recent kernel? Yes. It's a devtool using a dev interface. It's sole purpose is for debugging the current kernel, or userspace in conjunction with drm-tip. It's a temporary hack... > > } else if (!strcmp(tp->fields[f].name, "seqno")) { > > tp->seqno_field = f; > > } else if (!strcmp(tp->fields[f].name, "global_seqno")) { > > @@ -175,6 +178,23 @@ tracepoint_id(int tp_id) > > tracepoints[tp_id].fields[ \ > > tracepoints[tp_id].field_name##_field].offset)) > > > > +#define READ_TP_FIELD_U16(sample, tp_id, field_name) \ > > + (*(const uint16_t *)((sample)->tracepoint_data + \ > > + tracepoints[tp_id].fields[ \ > > + tracepoints[tp_id].field_name##_field].offset)) > > + > > +#define GET_RING_ID(sample, tp_id) \ > > +({ \ > > + unsigned char class, instance, ring; \ > > +\ > > + class = READ_TP_FIELD_U16(sample, tp_id, class); \ > > + instance = READ_TP_FIELD_U16(sample, tp_id, instance); \ > > +\ > > + ring = class * 2 + instance; \ > > Do you want to make it clear that we cannot have more than 2 instances > per class? Or make it easier to spot and expand in future. -Chris
diff --git a/overlay/gpu-perf.c b/overlay/gpu-perf.c index ea3480050ab9..e77125672088 100644 --- a/overlay/gpu-perf.c +++ b/overlay/gpu-perf.c @@ -85,7 +85,8 @@ struct tracepoint { int device_field; int ctx_field; - int ring_field; + int class_field; + int instance_field; int seqno_field; int global_seqno_field; int plane_field; @@ -151,8 +152,10 @@ tracepoint_id(int tp_id) tp->device_field = f; } else if (!strcmp(tp->fields[f].name, "ctx")) { tp->ctx_field = f; - } else if (!strcmp(tp->fields[f].name, "ring")) { - tp->ring_field = f; + } else if (!strcmp(tp->fields[f].name, "class")) { + tp->class_field = f; + } else if (!strcmp(tp->fields[f].name, "instance")) { + tp->instance_field = f; } else if (!strcmp(tp->fields[f].name, "seqno")) { tp->seqno_field = f; } else if (!strcmp(tp->fields[f].name, "global_seqno")) { @@ -175,6 +178,23 @@ tracepoint_id(int tp_id) tracepoints[tp_id].fields[ \ tracepoints[tp_id].field_name##_field].offset)) +#define READ_TP_FIELD_U16(sample, tp_id, field_name) \ + (*(const uint16_t *)((sample)->tracepoint_data + \ + tracepoints[tp_id].fields[ \ + tracepoints[tp_id].field_name##_field].offset)) + +#define GET_RING_ID(sample, tp_id) \ +({ \ + unsigned char class, instance, ring; \ +\ + class = READ_TP_FIELD_U16(sample, tp_id, class); \ + instance = READ_TP_FIELD_U16(sample, tp_id, instance); \ +\ + ring = class * 2 + instance; \ +\ + ring; \ +}) + static int perf_tracepoint_open(struct gpu_perf *gp, int tp_id, int (*func)(struct gpu_perf *, const void *)) { @@ -313,7 +333,7 @@ static int request_add(struct gpu_perf *gp, const void *event) if (comm == NULL) return 0; - comm->nr_requests[READ_TP_FIELD_U32(sample, TP_GEM_REQUEST_ADD, ring)]++; + comm->nr_requests[GET_RING_ID(sample, TP_GEM_REQUEST_ADD)]++; return 1; } @@ -329,7 +349,7 @@ static int ctx_switch(struct gpu_perf *gp, const void *event) { const struct sample_event *sample = event; - gp->ctx_switch[READ_TP_FIELD_U32(sample, TP_GEM_RING_SWITCH_CONTEXT, ring)]++; + gp->ctx_switch[GET_RING_ID(sample, TP_GEM_RING_SWITCH_CONTEXT)]++; return 1; } @@ -367,8 +387,8 @@ static int wait_begin(struct gpu_perf *gp, const void *event) wait->context = READ_TP_FIELD_U32(sample, TP_GEM_REQUEST_WAIT_BEGIN, ctx); wait->seqno = READ_TP_FIELD_U32(sample, TP_GEM_REQUEST_WAIT_BEGIN, seqno); wait->time = sample->time; - wait->next = gp->wait[READ_TP_FIELD_U32(sample, TP_GEM_REQUEST_WAIT_BEGIN, ring)]; - gp->wait[READ_TP_FIELD_U32(sample, TP_GEM_REQUEST_WAIT_BEGIN, ring)] = wait; + wait->next = gp->wait[GET_RING_ID(sample, TP_GEM_REQUEST_WAIT_BEGIN)]; + gp->wait[GET_RING_ID(sample, TP_GEM_REQUEST_WAIT_BEGIN)] = wait; return 0; } @@ -377,7 +397,7 @@ static int wait_end(struct gpu_perf *gp, const void *event) { const struct sample_event *sample = event; struct gpu_perf_time *wait, **prev; - uint32_t engine = READ_TP_FIELD_U32(sample, TP_GEM_REQUEST_WAIT_END, ring); + uint32_t engine = GET_RING_ID(sample, TP_GEM_REQUEST_WAIT_END); uint32_t context = READ_TP_FIELD_U32(sample, TP_GEM_REQUEST_WAIT_END, ctx); uint32_t seqno = READ_TP_FIELD_U32(sample, TP_GEM_REQUEST_WAIT_END, seqno);