Message ID | 20240606130629.214827-4-pierre-eric.pelloux-prayer@amd.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | Improve gpu_scheduler trace events | expand |
On Thu, 6 Jun 2024 15:06:24 +0200 Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> wrote: > Print identifiers instead of pointers: > * "fence=%p" is replaced by "fence=(context:%llu, seqno:%lld)" to have a > coherent way to print the fence. A possible follow up change would be > to use the same format in traces/../dma-fence.h. > * "entity=%p" is removed because the fence's context is already an > identifier of the job owner. > > Signed-off-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> > --- > .../gpu/drm/scheduler/gpu_scheduler_trace.h | 27 ++++++++++--------- > 1 file changed, 15 insertions(+), 12 deletions(-) > > diff --git a/drivers/gpu/drm/scheduler/gpu_scheduler_trace.h b/drivers/gpu/drm/scheduler/gpu_scheduler_trace.h > index 0a19c121bda5..2d7f2252eb5d 100644 > --- a/drivers/gpu/drm/scheduler/gpu_scheduler_trace.h > +++ b/drivers/gpu/drm/scheduler/gpu_scheduler_trace.h > @@ -58,9 +58,9 @@ DECLARE_EVENT_CLASS(drm_sched_job, > &sched_job->sched->credit_count); > __entry->dev_index = sched_job->sched->dev->primary->index; > ), > - TP_printk("entity=%p, id=%llu, fence=%p, ring=%s, job count:%u, hw job count:%d", > - __entry->entity, __entry->id, > - __entry->fence, __get_str(name), > + TP_printk("id=%llu, fence=(context:%llu, seqno:%lld), ring=%s, job count:%u, hw job count:%d", > + __entry->id, > + __entry->fence->context, __entry->fence->seqno, __get_str(name), > __entry->job_count, __entry->hw_job_count) > ); > > @@ -72,9 +72,9 @@ DEFINE_EVENT(drm_sched_job, drm_sched_job, > DEFINE_EVENT_PRINT(drm_sched_job, drm_run_job, > TP_PROTO(struct drm_sched_job *sched_job, struct drm_sched_entity *entity), > TP_ARGS(sched_job, entity), > - TP_printk("dev_index=%d entity=%p id=%llu, fence=%p, ring=%s, job count:%u, hw job count:%d", > - __entry->dev_index, __entry->entity, __entry->id, > - __entry->fence, __get_str(name), > + TP_printk("dev_index=%d id=%llu, fence=(context:%llu, seqno:%lld), ring=%s, job count:%u, hw job count:%d", > + __entry->dev_index, __entry->id, > + __entry->fence->context, __entry->fence->seqno, __get_str(name), > __entry->job_count, __entry->hw_job_count) NACK! You can't dereference pointers from TP_printk(). This is called seconds, minutes, hours, even days or months after that pointer was assigned. How do you know that pointer still points to anything? -- Steve > ); > > @@ -88,7 +88,8 @@ TRACE_EVENT(drm_sched_process_job, > TP_fast_assign( > __entry->fence = &fence->finished; > ), > - TP_printk("fence=%p signaled", __entry->fence) > + TP_printk("fence=(context:%llu, seqno:%lld) signaled", > + __entry->fence->context, __entry->fence->seqno) > ); > > TRACE_EVENT(drm_sched_job_wait_dep, > @@ -96,23 +97,25 @@ TRACE_EVENT(drm_sched_job_wait_dep, > TP_ARGS(sched_job, fence), > TP_STRUCT__entry( > __string(name, sched_job->sched->name) > + __field(uint64_t, fence_context) > __field(uint64_t, id) > __field(struct dma_fence *, fence) > __field(uint64_t, ctx) > - __field(unsigned, seqno) > + __field(uint64_t, seqno) > ), > > TP_fast_assign( > __assign_str(name); > + /* Store the hw exec fence context. */ > + __entry->fence_context = sched_job->entity->fence_context + 1; > __entry->id = sched_job->id; > __entry->fence = fence; > __entry->ctx = fence->context; > __entry->seqno = fence->seqno; > ), > - TP_printk("job ring=%s, id=%llu, depends fence=%p, context=%llu, seq=%u", > - __get_str(name), __entry->id, > - __entry->fence, __entry->ctx, > - __entry->seqno) > + TP_printk("job ring=%s, fence_context=%llu, id=%llu, depends fence=(context:%llu, seqno:%lld)", > + __get_str(name), __entry->fence_context, __entry->id, > + __entry->ctx, __entry->seqno) > ); > > #endif
Am 06.06.24 um 15:19 schrieb Steven Rostedt: > On Thu, 6 Jun 2024 15:06:24 +0200 > Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> wrote: > >> Print identifiers instead of pointers: >> * "fence=%p" is replaced by "fence=(context:%llu, seqno:%lld)" to have a >> coherent way to print the fence. A possible follow up change would be >> to use the same format in traces/../dma-fence.h. >> * "entity=%p" is removed because the fence's context is already an >> identifier of the job owner. >> >> Signed-off-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> >> --- >> .../gpu/drm/scheduler/gpu_scheduler_trace.h | 27 ++++++++++--------- >> 1 file changed, 15 insertions(+), 12 deletions(-) >> >> diff --git a/drivers/gpu/drm/scheduler/gpu_scheduler_trace.h b/drivers/gpu/drm/scheduler/gpu_scheduler_trace.h >> index 0a19c121bda5..2d7f2252eb5d 100644 >> --- a/drivers/gpu/drm/scheduler/gpu_scheduler_trace.h >> +++ b/drivers/gpu/drm/scheduler/gpu_scheduler_trace.h >> @@ -58,9 +58,9 @@ DECLARE_EVENT_CLASS(drm_sched_job, >> &sched_job->sched->credit_count); >> __entry->dev_index = sched_job->sched->dev->primary->index; >> ), >> - TP_printk("entity=%p, id=%llu, fence=%p, ring=%s, job count:%u, hw job count:%d", >> - __entry->entity, __entry->id, >> - __entry->fence, __get_str(name), >> + TP_printk("id=%llu, fence=(context:%llu, seqno:%lld), ring=%s, job count:%u, hw job count:%d", >> + __entry->id, >> + __entry->fence->context, __entry->fence->seqno, __get_str(name), >> __entry->job_count, __entry->hw_job_count) >> ); >> >> @@ -72,9 +72,9 @@ DEFINE_EVENT(drm_sched_job, drm_sched_job, >> DEFINE_EVENT_PRINT(drm_sched_job, drm_run_job, >> TP_PROTO(struct drm_sched_job *sched_job, struct drm_sched_entity *entity), >> TP_ARGS(sched_job, entity), >> - TP_printk("dev_index=%d entity=%p id=%llu, fence=%p, ring=%s, job count:%u, hw job count:%d", >> - __entry->dev_index, __entry->entity, __entry->id, >> - __entry->fence, __get_str(name), >> + TP_printk("dev_index=%d id=%llu, fence=(context:%llu, seqno:%lld), ring=%s, job count:%u, hw job count:%d", >> + __entry->dev_index, __entry->id, >> + __entry->fence->context, __entry->fence->seqno, __get_str(name), >> __entry->job_count, __entry->hw_job_count) > NACK! > > You can't dereference pointers from TP_printk(). This is called seconds, > minutes, hours, even days or months after that pointer was assigned. How do > you know that pointer still points to anything? Yeah, just wanted to reply the same thing. That is a really really bad idea. You could in theory grab a reference to the fence, but we usually try to avoid that as well since it prevents modules from unloading. Rather move the context and seqno directly as values into the trace event. Christian. > > -- Steve > > >> ); >> >> @@ -88,7 +88,8 @@ TRACE_EVENT(drm_sched_process_job, >> TP_fast_assign( >> __entry->fence = &fence->finished; >> ), >> - TP_printk("fence=%p signaled", __entry->fence) >> + TP_printk("fence=(context:%llu, seqno:%lld) signaled", >> + __entry->fence->context, __entry->fence->seqno) >> ); >> >> TRACE_EVENT(drm_sched_job_wait_dep, >> @@ -96,23 +97,25 @@ TRACE_EVENT(drm_sched_job_wait_dep, >> TP_ARGS(sched_job, fence), >> TP_STRUCT__entry( >> __string(name, sched_job->sched->name) >> + __field(uint64_t, fence_context) >> __field(uint64_t, id) >> __field(struct dma_fence *, fence) >> __field(uint64_t, ctx) >> - __field(unsigned, seqno) >> + __field(uint64_t, seqno) >> ), >> >> TP_fast_assign( >> __assign_str(name); >> + /* Store the hw exec fence context. */ >> + __entry->fence_context = sched_job->entity->fence_context + 1; >> __entry->id = sched_job->id; >> __entry->fence = fence; >> __entry->ctx = fence->context; >> __entry->seqno = fence->seqno; >> ), >> - TP_printk("job ring=%s, id=%llu, depends fence=%p, context=%llu, seq=%u", >> - __get_str(name), __entry->id, >> - __entry->fence, __entry->ctx, >> - __entry->seqno) >> + TP_printk("job ring=%s, fence_context=%llu, id=%llu, depends fence=(context:%llu, seqno:%lld)", >> + __get_str(name), __entry->fence_context, __entry->id, >> + __entry->ctx, __entry->seqno) >> ); >> >> #endif
Hi, Le 06/06/2024 à 15:19, Steven Rostedt a écrit : > On Thu, 6 Jun 2024 15:06:24 +0200 > Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> wrote: > >> Print identifiers instead of pointers: >> * "fence=%p" is replaced by "fence=(context:%llu, seqno:%lld)" to have a >> coherent way to print the fence. A possible follow up change would be >> to use the same format in traces/../dma-fence.h. >> * "entity=%p" is removed because the fence's context is already an >> identifier of the job owner. >> >> Signed-off-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> >> --- >> .../gpu/drm/scheduler/gpu_scheduler_trace.h | 27 ++++++++++--------- >> 1 file changed, 15 insertions(+), 12 deletions(-) >> >> diff --git a/drivers/gpu/drm/scheduler/gpu_scheduler_trace.h b/drivers/gpu/drm/scheduler/gpu_scheduler_trace.h >> index 0a19c121bda5..2d7f2252eb5d 100644 >> --- a/drivers/gpu/drm/scheduler/gpu_scheduler_trace.h >> +++ b/drivers/gpu/drm/scheduler/gpu_scheduler_trace.h >> @@ -58,9 +58,9 @@ DECLARE_EVENT_CLASS(drm_sched_job, >> &sched_job->sched->credit_count); >> __entry->dev_index = sched_job->sched->dev->primary->index; >> ), >> - TP_printk("entity=%p, id=%llu, fence=%p, ring=%s, job count:%u, hw job count:%d", >> - __entry->entity, __entry->id, >> - __entry->fence, __get_str(name), >> + TP_printk("id=%llu, fence=(context:%llu, seqno:%lld), ring=%s, job count:%u, hw job count:%d", >> + __entry->id, >> + __entry->fence->context, __entry->fence->seqno, __get_str(name), >> __entry->job_count, __entry->hw_job_count) >> ); >> >> @@ -72,9 +72,9 @@ DEFINE_EVENT(drm_sched_job, drm_sched_job, >> DEFINE_EVENT_PRINT(drm_sched_job, drm_run_job, >> TP_PROTO(struct drm_sched_job *sched_job, struct drm_sched_entity *entity), >> TP_ARGS(sched_job, entity), >> - TP_printk("dev_index=%d entity=%p id=%llu, fence=%p, ring=%s, job count:%u, hw job count:%d", >> - __entry->dev_index, __entry->entity, __entry->id, >> - __entry->fence, __get_str(name), >> + TP_printk("dev_index=%d id=%llu, fence=(context:%llu, seqno:%lld), ring=%s, job count:%u, hw job count:%d", >> + __entry->dev_index, __entry->id, >> + __entry->fence->context, __entry->fence->seqno, __get_str(name), >> __entry->job_count, __entry->hw_job_count) > > NACK! > > You can't dereference pointers from TP_printk(). This is called seconds, > minutes, hours, even days or months after that pointer was assigned. How do > you know that pointer still points to anything? Now that you pointed it out, the problem is obvious indeed. I have fixed it locally. Thanks! Pierre-Eric > > -- Steve > > >> ); >> >> @@ -88,7 +88,8 @@ TRACE_EVENT(drm_sched_process_job, >> TP_fast_assign( >> __entry->fence = &fence->finished; >> ), >> - TP_printk("fence=%p signaled", __entry->fence) >> + TP_printk("fence=(context:%llu, seqno:%lld) signaled", >> + __entry->fence->context, __entry->fence->seqno) >> ); >> >> TRACE_EVENT(drm_sched_job_wait_dep, >> @@ -96,23 +97,25 @@ TRACE_EVENT(drm_sched_job_wait_dep, >> TP_ARGS(sched_job, fence), >> TP_STRUCT__entry( >> __string(name, sched_job->sched->name) >> + __field(uint64_t, fence_context) >> __field(uint64_t, id) >> __field(struct dma_fence *, fence) >> __field(uint64_t, ctx) >> - __field(unsigned, seqno) >> + __field(uint64_t, seqno) >> ), >> >> TP_fast_assign( >> __assign_str(name); >> + /* Store the hw exec fence context. */ >> + __entry->fence_context = sched_job->entity->fence_context + 1; >> __entry->id = sched_job->id; >> __entry->fence = fence; >> __entry->ctx = fence->context; >> __entry->seqno = fence->seqno; >> ), >> - TP_printk("job ring=%s, id=%llu, depends fence=%p, context=%llu, seq=%u", >> - __get_str(name), __entry->id, >> - __entry->fence, __entry->ctx, >> - __entry->seqno) >> + TP_printk("job ring=%s, fence_context=%llu, id=%llu, depends fence=(context:%llu, seqno:%lld)", >> + __get_str(name), __entry->fence_context, __entry->id, >> + __entry->ctx, __entry->seqno) >> ); >> >> #endif
diff --git a/drivers/gpu/drm/scheduler/gpu_scheduler_trace.h b/drivers/gpu/drm/scheduler/gpu_scheduler_trace.h index 0a19c121bda5..2d7f2252eb5d 100644 --- a/drivers/gpu/drm/scheduler/gpu_scheduler_trace.h +++ b/drivers/gpu/drm/scheduler/gpu_scheduler_trace.h @@ -58,9 +58,9 @@ DECLARE_EVENT_CLASS(drm_sched_job, &sched_job->sched->credit_count); __entry->dev_index = sched_job->sched->dev->primary->index; ), - TP_printk("entity=%p, id=%llu, fence=%p, ring=%s, job count:%u, hw job count:%d", - __entry->entity, __entry->id, - __entry->fence, __get_str(name), + TP_printk("id=%llu, fence=(context:%llu, seqno:%lld), ring=%s, job count:%u, hw job count:%d", + __entry->id, + __entry->fence->context, __entry->fence->seqno, __get_str(name), __entry->job_count, __entry->hw_job_count) ); @@ -72,9 +72,9 @@ DEFINE_EVENT(drm_sched_job, drm_sched_job, DEFINE_EVENT_PRINT(drm_sched_job, drm_run_job, TP_PROTO(struct drm_sched_job *sched_job, struct drm_sched_entity *entity), TP_ARGS(sched_job, entity), - TP_printk("dev_index=%d entity=%p id=%llu, fence=%p, ring=%s, job count:%u, hw job count:%d", - __entry->dev_index, __entry->entity, __entry->id, - __entry->fence, __get_str(name), + TP_printk("dev_index=%d id=%llu, fence=(context:%llu, seqno:%lld), ring=%s, job count:%u, hw job count:%d", + __entry->dev_index, __entry->id, + __entry->fence->context, __entry->fence->seqno, __get_str(name), __entry->job_count, __entry->hw_job_count) ); @@ -88,7 +88,8 @@ TRACE_EVENT(drm_sched_process_job, TP_fast_assign( __entry->fence = &fence->finished; ), - TP_printk("fence=%p signaled", __entry->fence) + TP_printk("fence=(context:%llu, seqno:%lld) signaled", + __entry->fence->context, __entry->fence->seqno) ); TRACE_EVENT(drm_sched_job_wait_dep, @@ -96,23 +97,25 @@ TRACE_EVENT(drm_sched_job_wait_dep, TP_ARGS(sched_job, fence), TP_STRUCT__entry( __string(name, sched_job->sched->name) + __field(uint64_t, fence_context) __field(uint64_t, id) __field(struct dma_fence *, fence) __field(uint64_t, ctx) - __field(unsigned, seqno) + __field(uint64_t, seqno) ), TP_fast_assign( __assign_str(name); + /* Store the hw exec fence context. */ + __entry->fence_context = sched_job->entity->fence_context + 1; __entry->id = sched_job->id; __entry->fence = fence; __entry->ctx = fence->context; __entry->seqno = fence->seqno; ), - TP_printk("job ring=%s, id=%llu, depends fence=%p, context=%llu, seq=%u", - __get_str(name), __entry->id, - __entry->fence, __entry->ctx, - __entry->seqno) + TP_printk("job ring=%s, fence_context=%llu, id=%llu, depends fence=(context:%llu, seqno:%lld)", + __get_str(name), __entry->fence_context, __entry->id, + __entry->ctx, __entry->seqno) ); #endif
Print identifiers instead of pointers: * "fence=%p" is replaced by "fence=(context:%llu, seqno:%lld)" to have a coherent way to print the fence. A possible follow up change would be to use the same format in traces/../dma-fence.h. * "entity=%p" is removed because the fence's context is already an identifier of the job owner. Signed-off-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> --- .../gpu/drm/scheduler/gpu_scheduler_trace.h | 27 ++++++++++--------- 1 file changed, 15 insertions(+), 12 deletions(-)