diff mbox series

[1/2] drm/scheduler: Add job dependency trace.

Message ID 1533049014-6934-1-git-send-email-andrey.grodzovsky@amd.com (mailing list archive)
State New, archived
Headers show
Series [1/2] drm/scheduler: Add job dependency trace. | expand

Commit Message

Andrey Grodzovsky July 31, 2018, 2:56 p.m. UTC
During debug sessions I encountered a need to trace
back a job dependecy a few steps back to the first failing
job. This trace helpped me a lot.

Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>
---
 drivers/gpu/drm/scheduler/gpu_scheduler.c       |  8 ++++++--
 drivers/gpu/drm/scheduler/gpu_scheduler_trace.h | 24 ++++++++++++++++++++++++
 2 files changed, 30 insertions(+), 2 deletions(-)

Comments

Christian König July 31, 2018, 3:23 p.m. UTC | #1
Am 31.07.2018 um 16:56 schrieb Andrey Grodzovsky:
> During debug sessions I encountered a need to trace
> back a job dependecy a few steps back to the first failing
> job. This trace helpped me a lot.
>
> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>

Reviewed-by: Christian König <christian.koenig@amd.com> for the series.

> ---
>   drivers/gpu/drm/scheduler/gpu_scheduler.c       |  8 ++++++--
>   drivers/gpu/drm/scheduler/gpu_scheduler_trace.h | 24 ++++++++++++++++++++++++
>   2 files changed, 30 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/scheduler/gpu_scheduler.c b/drivers/gpu/drm/scheduler/gpu_scheduler.c
> index 3f2fc5e..45703b9 100644
> --- a/drivers/gpu/drm/scheduler/gpu_scheduler.c
> +++ b/drivers/gpu/drm/scheduler/gpu_scheduler.c
> @@ -506,9 +506,13 @@ drm_sched_entity_pop_job(struct drm_sched_entity *entity)
>   	if (!sched_job)
>   		return NULL;
>   
> -	while ((entity->dependency = sched->ops->dependency(sched_job, entity)))
> -		if (drm_sched_entity_add_dependency_cb(entity))
> +	while ((entity->dependency = sched->ops->dependency(sched_job, entity))) {
> +		if (drm_sched_entity_add_dependency_cb(entity)) {
> +
> +			trace_drm_sched_job_wait_dep(sched_job, entity->dependency);
>   			return NULL;
> +		}
> +	}
>   
>   	/* skip jobs from entity that marked guilty */
>   	if (entity->guilty && atomic_read(entity->guilty))
> diff --git a/drivers/gpu/drm/scheduler/gpu_scheduler_trace.h b/drivers/gpu/drm/scheduler/gpu_scheduler_trace.h
> index 4998ad9..1626f39 100644
> --- a/drivers/gpu/drm/scheduler/gpu_scheduler_trace.h
> +++ b/drivers/gpu/drm/scheduler/gpu_scheduler_trace.h
> @@ -74,6 +74,30 @@ TRACE_EVENT(drm_sched_process_job,
>   	    TP_printk("fence=%p signaled", __entry->fence)
>   );
>   
> +TRACE_EVENT(drm_sched_job_wait_dep,
> +	    TP_PROTO(struct drm_sched_job *sched_job, struct dma_fence *fence),
> +	    TP_ARGS(sched_job, fence),
> +	    TP_STRUCT__entry(
> +			     __field(const char *,name)
> +			     __field(uint64_t, id)
> +			     __field(struct dma_fence *, fence)
> +			     __field(uint64_t, ctx)
> +			     __field(unsigned, seqno)
> +			     ),
> +
> +	    TP_fast_assign(
> +			   __entry->name = sched_job->sched->name;
> +			   __entry->id = sched_job->id;
> +			   __entry->fence = fence;
> +			   __entry->ctx = fence->context;
> +			   __entry->seqno = fence->seqno;
> +			   ),
> +	    TP_printk("job ring=%s, id=%llu, depends fence=%p, context=%llu, seq=%u",
> +		      __entry->name, __entry->id,
> +		      __entry->fence, __entry->ctx,
> +		      __entry->seqno)
> +);
> +
>   #endif
>   
>   /* This part must be outside protection */
Huang Rui Aug. 1, 2018, 6:30 a.m. UTC | #2
On Tue, Jul 31, 2018 at 10:56:53AM -0400, Andrey Grodzovsky wrote:
> During debug sessions I encountered a need to trace
> back a job dependecy a few steps back to the first failing
> job. This trace helpped me a lot.
> 
> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@amd.com>

Series are Reviewed-by: Huang Rui <ray.huang@amd.com>

> ---
>  drivers/gpu/drm/scheduler/gpu_scheduler.c       |  8 ++++++--
>  drivers/gpu/drm/scheduler/gpu_scheduler_trace.h | 24 ++++++++++++++++++++++++
>  2 files changed, 30 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/scheduler/gpu_scheduler.c b/drivers/gpu/drm/scheduler/gpu_scheduler.c
> index 3f2fc5e..45703b9 100644
> --- a/drivers/gpu/drm/scheduler/gpu_scheduler.c
> +++ b/drivers/gpu/drm/scheduler/gpu_scheduler.c
> @@ -506,9 +506,13 @@ drm_sched_entity_pop_job(struct drm_sched_entity *entity)
>  	if (!sched_job)
>  		return NULL;
>  
> -	while ((entity->dependency = sched->ops->dependency(sched_job, entity)))
> -		if (drm_sched_entity_add_dependency_cb(entity))
> +	while ((entity->dependency = sched->ops->dependency(sched_job, entity))) {
> +		if (drm_sched_entity_add_dependency_cb(entity)) {
> +
> +			trace_drm_sched_job_wait_dep(sched_job, entity->dependency);
>  			return NULL;
> +		}
> +	}
>  
>  	/* skip jobs from entity that marked guilty */
>  	if (entity->guilty && atomic_read(entity->guilty))
> diff --git a/drivers/gpu/drm/scheduler/gpu_scheduler_trace.h b/drivers/gpu/drm/scheduler/gpu_scheduler_trace.h
> index 4998ad9..1626f39 100644
> --- a/drivers/gpu/drm/scheduler/gpu_scheduler_trace.h
> +++ b/drivers/gpu/drm/scheduler/gpu_scheduler_trace.h
> @@ -74,6 +74,30 @@ TRACE_EVENT(drm_sched_process_job,
>  	    TP_printk("fence=%p signaled", __entry->fence)
>  );
>  
> +TRACE_EVENT(drm_sched_job_wait_dep,
> +	    TP_PROTO(struct drm_sched_job *sched_job, struct dma_fence *fence),
> +	    TP_ARGS(sched_job, fence),
> +	    TP_STRUCT__entry(
> +			     __field(const char *,name)
> +			     __field(uint64_t, id)
> +			     __field(struct dma_fence *, fence)
> +			     __field(uint64_t, ctx)
> +			     __field(unsigned, seqno)
> +			     ),
> +
> +	    TP_fast_assign(
> +			   __entry->name = sched_job->sched->name;
> +			   __entry->id = sched_job->id;
> +			   __entry->fence = fence;
> +			   __entry->ctx = fence->context;
> +			   __entry->seqno = fence->seqno;
> +			   ),
> +	    TP_printk("job ring=%s, id=%llu, depends fence=%p, context=%llu, seq=%u",
> +		      __entry->name, __entry->id,
> +		      __entry->fence, __entry->ctx,
> +		      __entry->seqno)
> +);
> +
>  #endif
>  
>  /* This part must be outside protection */
> -- 
> 2.7.4
> 
> _______________________________________________
> amd-gfx mailing list
> amd-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
diff mbox series

Patch

diff --git a/drivers/gpu/drm/scheduler/gpu_scheduler.c b/drivers/gpu/drm/scheduler/gpu_scheduler.c
index 3f2fc5e..45703b9 100644
--- a/drivers/gpu/drm/scheduler/gpu_scheduler.c
+++ b/drivers/gpu/drm/scheduler/gpu_scheduler.c
@@ -506,9 +506,13 @@  drm_sched_entity_pop_job(struct drm_sched_entity *entity)
 	if (!sched_job)
 		return NULL;
 
-	while ((entity->dependency = sched->ops->dependency(sched_job, entity)))
-		if (drm_sched_entity_add_dependency_cb(entity))
+	while ((entity->dependency = sched->ops->dependency(sched_job, entity))) {
+		if (drm_sched_entity_add_dependency_cb(entity)) {
+
+			trace_drm_sched_job_wait_dep(sched_job, entity->dependency);
 			return NULL;
+		}
+	}
 
 	/* skip jobs from entity that marked guilty */
 	if (entity->guilty && atomic_read(entity->guilty))
diff --git a/drivers/gpu/drm/scheduler/gpu_scheduler_trace.h b/drivers/gpu/drm/scheduler/gpu_scheduler_trace.h
index 4998ad9..1626f39 100644
--- a/drivers/gpu/drm/scheduler/gpu_scheduler_trace.h
+++ b/drivers/gpu/drm/scheduler/gpu_scheduler_trace.h
@@ -74,6 +74,30 @@  TRACE_EVENT(drm_sched_process_job,
 	    TP_printk("fence=%p signaled", __entry->fence)
 );
 
+TRACE_EVENT(drm_sched_job_wait_dep,
+	    TP_PROTO(struct drm_sched_job *sched_job, struct dma_fence *fence),
+	    TP_ARGS(sched_job, fence),
+	    TP_STRUCT__entry(
+			     __field(const char *,name)
+			     __field(uint64_t, id)
+			     __field(struct dma_fence *, fence)
+			     __field(uint64_t, ctx)
+			     __field(unsigned, seqno)
+			     ),
+
+	    TP_fast_assign(
+			   __entry->name = sched_job->sched->name;
+			   __entry->id = sched_job->id;
+			   __entry->fence = fence;
+			   __entry->ctx = fence->context;
+			   __entry->seqno = fence->seqno;
+			   ),
+	    TP_printk("job ring=%s, id=%llu, depends fence=%p, context=%llu, seq=%u",
+		      __entry->name, __entry->id,
+		      __entry->fence, __entry->ctx,
+		      __entry->seqno)
+);
+
 #endif
 
 /* This part must be outside protection */