@@ -26,15 +26,41 @@
#include <linux/stringify.h>
#include <linux/types.h>
+#include <linux/trace_seq.h>
#include <linux/tracepoint.h>
#undef TRACE_SYSTEM
#define TRACE_SYSTEM gpu_scheduler
#define TRACE_INCLUDE_FILE gpu_scheduler_trace
+#ifndef __TRACE_EVENT_GPU_SCHEDULER_PRINT_FN
+#define __TRACE_EVENT_GPU_SCHEDULER_PRINT_FN
+/* Similar to trace_print_array_seq but for fences. */
+static inline const char *__print_dma_fence_array(struct trace_seq *p, const void *buf, int count)
+{
+ const char *ret = trace_seq_buffer_ptr(p);
+ u64 *fences = (u64 *) buf;
+ const char *prefix = "";
+
+ trace_seq_putc(p, '{');
+ for (int i = 0; i < count; i++) {
+ u64 context = fences[2 * i], seqno = fences[2 * i + 1];
+
+ trace_seq_printf(p, "%s(context:%llu, seqno:%lld)",
+ prefix, context, seqno);
+ prefix = ",";
+ }
+ trace_seq_putc(p, '}');
+ trace_seq_putc(p, 0);
+
+ return ret;
+}
+#endif
+
DECLARE_EVENT_CLASS(drm_sched_job,
- TP_PROTO(struct drm_sched_job *sched_job, struct drm_sched_entity *entity),
- TP_ARGS(sched_job, entity),
+ TP_PROTO(struct drm_sched_job *sched_job, struct drm_sched_entity *entity,
+ unsigned int dep_count),
+ TP_ARGS(sched_job, entity, dep_count),
TP_STRUCT__entry(
__field(struct drm_sched_entity *, entity)
__string(name, sched_job->sched->name)
@@ -44,9 +70,14 @@ DECLARE_EVENT_CLASS(drm_sched_job,
__string(dev, dev_name(sched_job->sched->dev))
__field(uint64_t, fence_context)
__field(uint64_t, fence_seqno)
+ __field(int, n_deps)
+ __dynamic_array(u64, deps, dep_count * 2)
),
TP_fast_assign(
+ unsigned long idx;
+ struct dma_fence *fence;
+ u64 *dyn_arr;
__entry->entity = entity;
__entry->id = sched_job->id;
__assign_str(name);
@@ -56,22 +87,32 @@ DECLARE_EVENT_CLASS(drm_sched_job,
__assign_str(dev);
__entry->fence_context = sched_job->s_fence->finished.context;
__entry->fence_seqno = sched_job->s_fence->finished.seqno;
-
+ __entry->n_deps = dep_count;
+ if (dep_count) {
+ dyn_arr = __get_dynamic_array(deps);
+ xa_for_each(&sched_job->dependencies, idx, fence) {
+ dyn_arr[2 * idx] = fence->context;
+ dyn_arr[2 * idx + 1] = fence->seqno;
+ }
+ }
),
- TP_printk("id=%llu, fence=(context:%llu, seqno:%lld), ring=%s, job count:%u, hw job count:%d",
+ TP_printk("id=%llu, fence=(context:%llu, seqno:%lld), ring=%s, job count:%u, hw job count:%d, dependencies:%s",
__entry->id,
__entry->fence_context, __entry->fence_seqno, __get_str(name),
- __entry->job_count, __entry->hw_job_count)
+ __entry->job_count, __entry->hw_job_count,
+ __print_dma_fence_array(p, __get_dynamic_array(deps), __entry->n_deps))
);
DEFINE_EVENT(drm_sched_job, drm_sched_job,
- TP_PROTO(struct drm_sched_job *sched_job, struct drm_sched_entity *entity),
- TP_ARGS(sched_job, entity)
+ TP_PROTO(struct drm_sched_job *sched_job, struct drm_sched_entity *entity,
+ unsigned int dep_count),
+ TP_ARGS(sched_job, entity, dep_count)
);
DEFINE_EVENT_PRINT(drm_sched_job, drm_run_job,
- TP_PROTO(struct drm_sched_job *sched_job, struct drm_sched_entity *entity),
- TP_ARGS(sched_job, entity),
+ TP_PROTO(struct drm_sched_job *sched_job, struct drm_sched_entity *entity,
+ unsigned int dep_count),
+ TP_ARGS(sched_job, entity, 0),
TP_printk("dev=%s, id=%llu, fence=(context:%llu, seqno:%lld), ring=%s, job count:%u, hw job count:%d",
__get_str(dev), __entry->id,
__entry->fence_context, __entry->fence_seqno, __get_str(name),
@@ -583,7 +583,13 @@ void drm_sched_entity_push_job(struct drm_sched_job *sched_job)
bool first;
ktime_t submit_ts;
- trace_drm_sched_job(sched_job, entity);
+ if (trace_drm_sched_job_enabled()) {
+ unsigned long index;
+ void *f;
+
+ xa_for_each(&sched_job->dependencies, index, f) { }
+ trace_drm_sched_job(sched_job, entity, index);
+ }
atomic_inc(entity->rq->sched->score);
WRITE_ONCE(entity->last_user, current->group_leader);
@@ -1201,7 +1201,7 @@ static void drm_sched_run_job_work(struct work_struct *w)
atomic_add(sched_job->credits, &sched->credit_count);
drm_sched_job_begin(sched_job);
- trace_drm_run_job(sched_job, entity);
+ trace_drm_run_job(sched_job, entity, 0);
fence = sched->ops->run_job(sched_job);
complete_all(&entity->entity_idle);
drm_sched_fence_scheduled(s_fence, fence);
Trace the fence dependencies similarly to how we print fences: ... , dependencies:{(context:606, seqno:38006)} This allows tools to analyze the dependencies between the jobs (previously it was only possible for fences traced by drm_sched_job_wait_dep). Since drm_sched_job and drm_run_job use the same base event class, the caller of trace_drm_run_job have to pass a dep_count of 0 (which is ignored because dependencies are only considered at submit time). Signed-off-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> --- .../gpu/drm/scheduler/gpu_scheduler_trace.h | 59 ++++++++++++++++--- drivers/gpu/drm/scheduler/sched_entity.c | 8 ++- drivers/gpu/drm/scheduler/sched_main.c | 2 +- 3 files changed, 58 insertions(+), 11 deletions(-)