Message ID | 20180606090233.6646-1-tvrtko.ursulin@linux.intel.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
On 06/06/18 10:02, Tvrtko Ursulin wrote: > From: Tvrtko Ursulin <tvrtko.ursulin@intel.com> > > A miminal hack to parse the new tracepoint format and invent new "ring > id's" based on engine class and instance. > > v2: > * Make it a bit more future proof. (Lionel, Chris) > * Some assorted fixups to show forgotten engines. > > Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> > Cc: Lionel Landwerlin <lionel.g.landwerlin@intel.com> > --- > overlay/gpu-perf.c | 40 ++++++++++++++++++++++++++++++++-------- > overlay/overlay.c | 17 +++++++++-------- > 2 files changed, 41 insertions(+), 16 deletions(-) > > diff --git a/overlay/gpu-perf.c b/overlay/gpu-perf.c > index ea3480050ab9..5629f826765e 100644 > --- a/overlay/gpu-perf.c > +++ b/overlay/gpu-perf.c > @@ -33,6 +33,7 @@ > #include <string.h> > #include <fcntl.h> > #include <errno.h> > +#include <assert.h> > > #include "igt_perf.h" > > @@ -85,7 +86,8 @@ struct tracepoint { > > int device_field; > int ctx_field; > - int ring_field; > + int class_field; > + int instance_field; > int seqno_field; > int global_seqno_field; > int plane_field; > @@ -151,8 +153,10 @@ tracepoint_id(int tp_id) > tp->device_field = f; > } else if (!strcmp(tp->fields[f].name, "ctx")) { > tp->ctx_field = f; > - } else if (!strcmp(tp->fields[f].name, "ring")) { > - tp->ring_field = f; > + } else if (!strcmp(tp->fields[f].name, "class")) { > + tp->class_field = f; > + } else if (!strcmp(tp->fields[f].name, "instance")) { > + tp->instance_field = f; > } else if (!strcmp(tp->fields[f].name, "seqno")) { > tp->seqno_field = f; > } else if (!strcmp(tp->fields[f].name, "global_seqno")) { > @@ -175,6 +179,26 @@ tracepoint_id(int tp_id) > tracepoints[tp_id].fields[ \ > tracepoints[tp_id].field_name##_field].offset)) > > +#define READ_TP_FIELD_U16(sample, tp_id, field_name) \ > + (*(const uint16_t *)((sample)->tracepoint_data + \ > + tracepoints[tp_id].fields[ \ > + tracepoints[tp_id].field_name##_field].offset)) > + > +#define GET_RING_ID(sample, tp_id) \ > +({ \ > + unsigned char class, instance, ring; \ > +\ > + class = READ_TP_FIELD_U16(sample, tp_id, class); \ > + instance = READ_TP_FIELD_U16(sample, tp_id, instance); \ > +\ > + assert(class <= I915_ENGINE_CLASS_VIDEO_ENHANCE); \ > + assert(instance <= 4); \ > +\ > + ring = class * 4 + instance; \ > +\ > + ring; \ > +}) > + > static int perf_tracepoint_open(struct gpu_perf *gp, int tp_id, > int (*func)(struct gpu_perf *, const void *)) > { > @@ -313,7 +337,7 @@ static int request_add(struct gpu_perf *gp, const void *event) > if (comm == NULL) > return 0; > > - comm->nr_requests[READ_TP_FIELD_U32(sample, TP_GEM_REQUEST_ADD, ring)]++; > + comm->nr_requests[GET_RING_ID(sample, TP_GEM_REQUEST_ADD)]++; > return 1; > } > > @@ -329,7 +353,7 @@ static int ctx_switch(struct gpu_perf *gp, const void *event) > { > const struct sample_event *sample = event; > > - gp->ctx_switch[READ_TP_FIELD_U32(sample, TP_GEM_RING_SWITCH_CONTEXT, ring)]++; > + gp->ctx_switch[GET_RING_ID(sample, TP_GEM_RING_SWITCH_CONTEXT)]++; > return 1; > } > > @@ -367,8 +391,8 @@ static int wait_begin(struct gpu_perf *gp, const void *event) > wait->context = READ_TP_FIELD_U32(sample, TP_GEM_REQUEST_WAIT_BEGIN, ctx); > wait->seqno = READ_TP_FIELD_U32(sample, TP_GEM_REQUEST_WAIT_BEGIN, seqno); > wait->time = sample->time; > - wait->next = gp->wait[READ_TP_FIELD_U32(sample, TP_GEM_REQUEST_WAIT_BEGIN, ring)]; > - gp->wait[READ_TP_FIELD_U32(sample, TP_GEM_REQUEST_WAIT_BEGIN, ring)] = wait; > + wait->next = gp->wait[GET_RING_ID(sample, TP_GEM_REQUEST_WAIT_BEGIN)]; > + gp->wait[GET_RING_ID(sample, TP_GEM_REQUEST_WAIT_BEGIN)] = wait; > > return 0; > } > @@ -377,7 +401,7 @@ static int wait_end(struct gpu_perf *gp, const void *event) > { > const struct sample_event *sample = event; > struct gpu_perf_time *wait, **prev; > - uint32_t engine = READ_TP_FIELD_U32(sample, TP_GEM_REQUEST_WAIT_END, ring); > + uint32_t engine = GET_RING_ID(sample, TP_GEM_REQUEST_WAIT_END); > uint32_t context = READ_TP_FIELD_U32(sample, TP_GEM_REQUEST_WAIT_END, ctx); > uint32_t seqno = READ_TP_FIELD_U32(sample, TP_GEM_REQUEST_WAIT_END, seqno); > > diff --git a/overlay/overlay.c b/overlay/overlay.c > index 545af7bcb2f5..eae5ddfa8823 100644 > --- a/overlay/overlay.c > +++ b/overlay/overlay.c > @@ -148,6 +148,7 @@ static void init_gpu_top(struct overlay_context *ctx, > { 0.25, 1, 0.25, 1 }, > { 0.25, 0.25, 1, 1 }, > { 1, 1, 1, 1 }, > + { 1, 1, 0.25, 1 }, > }; > int n; > > @@ -311,11 +312,11 @@ static void show_gpu_perf(struct overlay_context *ctx, struct overlay_gpu_perf * > { 1, 1, 1, 1 }, > }; > struct gpu_perf_comm *comm, **prev; > - const char *ring_name[] = { > - "R", > - "B", > - "V0", > - "V1", > + const char *ring_name[MAX_RINGS] = { > + "R", "?", "?", "?", > + "B", "?", "?", "?", > + "V0", "V1", "?", "?", > + "VE0", "?", "?", "?", > }; I guess if you defined a MAX_ENGINE_INSTANCES somewhere, you could just have an array of class names and then generate ring_name off the n variable further below. class = n / MAX_ENGINE_INSTANCES instance = n % MAX_ENGINE_INSTANCES snprintf(ring_name, sizeof(ring_name), "%s%i", class_names[class], instance); Just a suggestion, regardless : Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> > double range[2]; > char buf[1024]; > @@ -326,7 +327,7 @@ static void show_gpu_perf(struct overlay_context *ctx, struct overlay_gpu_perf * > > gpu_perf_update(&gp->gpu_perf); > > - for (n = 0; n < 4; n++) { > + for (n = 0; n < MAX_RINGS; n++) { > if (gp->gpu_perf.ctx_switch[n]) > has_ctx = n + 1; > if (gp->gpu_perf.flip_complete[n]) > @@ -389,7 +390,7 @@ static void show_gpu_perf(struct overlay_context *ctx, struct overlay_gpu_perf * > } > > total = 0; > - for (n = 0; n < 3; n++) > + for (n = 0; n < MAX_RINGS; n++) > total += comm->nr_requests[n]; > chart_add_sample(comm->user_data, total); > } > @@ -433,7 +434,7 @@ static void show_gpu_perf(struct overlay_context *ctx, struct overlay_gpu_perf * > goto skip_comm; > > len = sprintf(buf, "%s:", comm->name); > - for (n = 0; n < sizeof(ring_name)/sizeof(ring_name[0]); n++) { > + for (n = 0; n < MAX_RINGS; n++) { > if (comm->nr_requests[n] == 0) > continue; > len += sprintf(buf + len, "%s %d%s", need_comma ? "," : "", comm->nr_requests[n], ring_name[n]);
On 06/06/2018 11:29, Lionel Landwerlin wrote: > On 06/06/18 10:02, Tvrtko Ursulin wrote: >> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com> >> >> A miminal hack to parse the new tracepoint format and invent new "ring >> id's" based on engine class and instance. >> >> v2: >> * Make it a bit more future proof. (Lionel, Chris) >> * Some assorted fixups to show forgotten engines. >> >> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> >> Cc: Lionel Landwerlin <lionel.g.landwerlin@intel.com> >> --- >> overlay/gpu-perf.c | 40 ++++++++++++++++++++++++++++++++-------- >> overlay/overlay.c | 17 +++++++++-------- >> 2 files changed, 41 insertions(+), 16 deletions(-) >> >> diff --git a/overlay/gpu-perf.c b/overlay/gpu-perf.c >> index ea3480050ab9..5629f826765e 100644 >> --- a/overlay/gpu-perf.c >> +++ b/overlay/gpu-perf.c >> @@ -33,6 +33,7 @@ >> #include <string.h> >> #include <fcntl.h> >> #include <errno.h> >> +#include <assert.h> >> #include "igt_perf.h" >> @@ -85,7 +86,8 @@ struct tracepoint { >> int device_field; >> int ctx_field; >> - int ring_field; >> + int class_field; >> + int instance_field; >> int seqno_field; >> int global_seqno_field; >> int plane_field; >> @@ -151,8 +153,10 @@ tracepoint_id(int tp_id) >> tp->device_field = f; >> } else if (!strcmp(tp->fields[f].name, "ctx")) { >> tp->ctx_field = f; >> - } else if (!strcmp(tp->fields[f].name, "ring")) { >> - tp->ring_field = f; >> + } else if (!strcmp(tp->fields[f].name, "class")) { >> + tp->class_field = f; >> + } else if (!strcmp(tp->fields[f].name, "instance")) { >> + tp->instance_field = f; >> } else if (!strcmp(tp->fields[f].name, "seqno")) { >> tp->seqno_field = f; >> } else if (!strcmp(tp->fields[f].name, "global_seqno")) { >> @@ -175,6 +179,26 @@ tracepoint_id(int tp_id) >> tracepoints[tp_id].fields[ \ >> tracepoints[tp_id].field_name##_field].offset)) >> +#define READ_TP_FIELD_U16(sample, tp_id, field_name) \ >> + (*(const uint16_t *)((sample)->tracepoint_data + \ >> + tracepoints[tp_id].fields[ \ >> + tracepoints[tp_id].field_name##_field].offset)) >> + >> +#define GET_RING_ID(sample, tp_id) \ >> +({ \ >> + unsigned char class, instance, ring; \ >> +\ >> + class = READ_TP_FIELD_U16(sample, tp_id, class); \ >> + instance = READ_TP_FIELD_U16(sample, tp_id, instance); \ >> +\ >> + assert(class <= I915_ENGINE_CLASS_VIDEO_ENHANCE); \ >> + assert(instance <= 4); \ >> +\ >> + ring = class * 4 + instance; \ >> +\ >> + ring; \ >> +}) >> + >> static int perf_tracepoint_open(struct gpu_perf *gp, int tp_id, >> int (*func)(struct gpu_perf *, const void *)) >> { >> @@ -313,7 +337,7 @@ static int request_add(struct gpu_perf *gp, const >> void *event) >> if (comm == NULL) >> return 0; >> - comm->nr_requests[READ_TP_FIELD_U32(sample, TP_GEM_REQUEST_ADD, >> ring)]++; >> + comm->nr_requests[GET_RING_ID(sample, TP_GEM_REQUEST_ADD)]++; >> return 1; >> } >> @@ -329,7 +353,7 @@ static int ctx_switch(struct gpu_perf *gp, const >> void *event) >> { >> const struct sample_event *sample = event; >> - gp->ctx_switch[READ_TP_FIELD_U32(sample, >> TP_GEM_RING_SWITCH_CONTEXT, ring)]++; >> + gp->ctx_switch[GET_RING_ID(sample, TP_GEM_RING_SWITCH_CONTEXT)]++; >> return 1; >> } >> @@ -367,8 +391,8 @@ static int wait_begin(struct gpu_perf *gp, const >> void *event) >> wait->context = READ_TP_FIELD_U32(sample, >> TP_GEM_REQUEST_WAIT_BEGIN, ctx); >> wait->seqno = READ_TP_FIELD_U32(sample, >> TP_GEM_REQUEST_WAIT_BEGIN, seqno); >> wait->time = sample->time; >> - wait->next = gp->wait[READ_TP_FIELD_U32(sample, >> TP_GEM_REQUEST_WAIT_BEGIN, ring)]; >> - gp->wait[READ_TP_FIELD_U32(sample, TP_GEM_REQUEST_WAIT_BEGIN, >> ring)] = wait; >> + wait->next = gp->wait[GET_RING_ID(sample, >> TP_GEM_REQUEST_WAIT_BEGIN)]; >> + gp->wait[GET_RING_ID(sample, TP_GEM_REQUEST_WAIT_BEGIN)] = wait; >> return 0; >> } >> @@ -377,7 +401,7 @@ static int wait_end(struct gpu_perf *gp, const >> void *event) >> { >> const struct sample_event *sample = event; >> struct gpu_perf_time *wait, **prev; >> - uint32_t engine = READ_TP_FIELD_U32(sample, >> TP_GEM_REQUEST_WAIT_END, ring); >> + uint32_t engine = GET_RING_ID(sample, TP_GEM_REQUEST_WAIT_END); >> uint32_t context = READ_TP_FIELD_U32(sample, >> TP_GEM_REQUEST_WAIT_END, ctx); >> uint32_t seqno = READ_TP_FIELD_U32(sample, >> TP_GEM_REQUEST_WAIT_END, seqno); >> diff --git a/overlay/overlay.c b/overlay/overlay.c >> index 545af7bcb2f5..eae5ddfa8823 100644 >> --- a/overlay/overlay.c >> +++ b/overlay/overlay.c >> @@ -148,6 +148,7 @@ static void init_gpu_top(struct overlay_context *ctx, >> { 0.25, 1, 0.25, 1 }, >> { 0.25, 0.25, 1, 1 }, >> { 1, 1, 1, 1 }, >> + { 1, 1, 0.25, 1 }, >> }; >> int n; >> @@ -311,11 +312,11 @@ static void show_gpu_perf(struct overlay_context >> *ctx, struct overlay_gpu_perf * >> { 1, 1, 1, 1 }, >> }; >> struct gpu_perf_comm *comm, **prev; >> - const char *ring_name[] = { >> - "R", >> - "B", >> - "V0", >> - "V1", >> + const char *ring_name[MAX_RINGS] = { >> + "R", "?", "?", "?", >> + "B", "?", "?", "?", >> + "V0", "V1", "?", "?", >> + "VE0", "?", "?", "?", >> }; > > I guess if you defined a MAX_ENGINE_INSTANCES somewhere, you could just > have an array of class names and then generate ring_name off the n > variable further below. > > class = n / MAX_ENGINE_INSTANCES > instance = n % MAX_ENGINE_INSTANCES > > snprintf(ring_name, sizeof(ring_name), "%s%i", class_names[class], > instance); > > > Just a suggestion, regardless : > > Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Thanks! Above is a completely reasonable suggestion but I have pushed this for now just so the tool doesn't segfault for now. I suspect there are more issues lurking in there if it is to be made really future proof. Regards, Tvrtko
diff --git a/overlay/gpu-perf.c b/overlay/gpu-perf.c index ea3480050ab9..5629f826765e 100644 --- a/overlay/gpu-perf.c +++ b/overlay/gpu-perf.c @@ -33,6 +33,7 @@ #include <string.h> #include <fcntl.h> #include <errno.h> +#include <assert.h> #include "igt_perf.h" @@ -85,7 +86,8 @@ struct tracepoint { int device_field; int ctx_field; - int ring_field; + int class_field; + int instance_field; int seqno_field; int global_seqno_field; int plane_field; @@ -151,8 +153,10 @@ tracepoint_id(int tp_id) tp->device_field = f; } else if (!strcmp(tp->fields[f].name, "ctx")) { tp->ctx_field = f; - } else if (!strcmp(tp->fields[f].name, "ring")) { - tp->ring_field = f; + } else if (!strcmp(tp->fields[f].name, "class")) { + tp->class_field = f; + } else if (!strcmp(tp->fields[f].name, "instance")) { + tp->instance_field = f; } else if (!strcmp(tp->fields[f].name, "seqno")) { tp->seqno_field = f; } else if (!strcmp(tp->fields[f].name, "global_seqno")) { @@ -175,6 +179,26 @@ tracepoint_id(int tp_id) tracepoints[tp_id].fields[ \ tracepoints[tp_id].field_name##_field].offset)) +#define READ_TP_FIELD_U16(sample, tp_id, field_name) \ + (*(const uint16_t *)((sample)->tracepoint_data + \ + tracepoints[tp_id].fields[ \ + tracepoints[tp_id].field_name##_field].offset)) + +#define GET_RING_ID(sample, tp_id) \ +({ \ + unsigned char class, instance, ring; \ +\ + class = READ_TP_FIELD_U16(sample, tp_id, class); \ + instance = READ_TP_FIELD_U16(sample, tp_id, instance); \ +\ + assert(class <= I915_ENGINE_CLASS_VIDEO_ENHANCE); \ + assert(instance <= 4); \ +\ + ring = class * 4 + instance; \ +\ + ring; \ +}) + static int perf_tracepoint_open(struct gpu_perf *gp, int tp_id, int (*func)(struct gpu_perf *, const void *)) { @@ -313,7 +337,7 @@ static int request_add(struct gpu_perf *gp, const void *event) if (comm == NULL) return 0; - comm->nr_requests[READ_TP_FIELD_U32(sample, TP_GEM_REQUEST_ADD, ring)]++; + comm->nr_requests[GET_RING_ID(sample, TP_GEM_REQUEST_ADD)]++; return 1; } @@ -329,7 +353,7 @@ static int ctx_switch(struct gpu_perf *gp, const void *event) { const struct sample_event *sample = event; - gp->ctx_switch[READ_TP_FIELD_U32(sample, TP_GEM_RING_SWITCH_CONTEXT, ring)]++; + gp->ctx_switch[GET_RING_ID(sample, TP_GEM_RING_SWITCH_CONTEXT)]++; return 1; } @@ -367,8 +391,8 @@ static int wait_begin(struct gpu_perf *gp, const void *event) wait->context = READ_TP_FIELD_U32(sample, TP_GEM_REQUEST_WAIT_BEGIN, ctx); wait->seqno = READ_TP_FIELD_U32(sample, TP_GEM_REQUEST_WAIT_BEGIN, seqno); wait->time = sample->time; - wait->next = gp->wait[READ_TP_FIELD_U32(sample, TP_GEM_REQUEST_WAIT_BEGIN, ring)]; - gp->wait[READ_TP_FIELD_U32(sample, TP_GEM_REQUEST_WAIT_BEGIN, ring)] = wait; + wait->next = gp->wait[GET_RING_ID(sample, TP_GEM_REQUEST_WAIT_BEGIN)]; + gp->wait[GET_RING_ID(sample, TP_GEM_REQUEST_WAIT_BEGIN)] = wait; return 0; } @@ -377,7 +401,7 @@ static int wait_end(struct gpu_perf *gp, const void *event) { const struct sample_event *sample = event; struct gpu_perf_time *wait, **prev; - uint32_t engine = READ_TP_FIELD_U32(sample, TP_GEM_REQUEST_WAIT_END, ring); + uint32_t engine = GET_RING_ID(sample, TP_GEM_REQUEST_WAIT_END); uint32_t context = READ_TP_FIELD_U32(sample, TP_GEM_REQUEST_WAIT_END, ctx); uint32_t seqno = READ_TP_FIELD_U32(sample, TP_GEM_REQUEST_WAIT_END, seqno); diff --git a/overlay/overlay.c b/overlay/overlay.c index 545af7bcb2f5..eae5ddfa8823 100644 --- a/overlay/overlay.c +++ b/overlay/overlay.c @@ -148,6 +148,7 @@ static void init_gpu_top(struct overlay_context *ctx, { 0.25, 1, 0.25, 1 }, { 0.25, 0.25, 1, 1 }, { 1, 1, 1, 1 }, + { 1, 1, 0.25, 1 }, }; int n; @@ -311,11 +312,11 @@ static void show_gpu_perf(struct overlay_context *ctx, struct overlay_gpu_perf * { 1, 1, 1, 1 }, }; struct gpu_perf_comm *comm, **prev; - const char *ring_name[] = { - "R", - "B", - "V0", - "V1", + const char *ring_name[MAX_RINGS] = { + "R", "?", "?", "?", + "B", "?", "?", "?", + "V0", "V1", "?", "?", + "VE0", "?", "?", "?", }; double range[2]; char buf[1024]; @@ -326,7 +327,7 @@ static void show_gpu_perf(struct overlay_context *ctx, struct overlay_gpu_perf * gpu_perf_update(&gp->gpu_perf); - for (n = 0; n < 4; n++) { + for (n = 0; n < MAX_RINGS; n++) { if (gp->gpu_perf.ctx_switch[n]) has_ctx = n + 1; if (gp->gpu_perf.flip_complete[n]) @@ -389,7 +390,7 @@ static void show_gpu_perf(struct overlay_context *ctx, struct overlay_gpu_perf * } total = 0; - for (n = 0; n < 3; n++) + for (n = 0; n < MAX_RINGS; n++) total += comm->nr_requests[n]; chart_add_sample(comm->user_data, total); } @@ -433,7 +434,7 @@ static void show_gpu_perf(struct overlay_context *ctx, struct overlay_gpu_perf * goto skip_comm; len = sprintf(buf, "%s:", comm->name); - for (n = 0; n < sizeof(ring_name)/sizeof(ring_name[0]); n++) { + for (n = 0; n < MAX_RINGS; n++) { if (comm->nr_requests[n] == 0) continue; len += sprintf(buf + len, "%s %d%s", need_comma ? "," : "", comm->nr_requests[n], ring_name[n]);