diff mbox series

[v3,3/4] drm/sched: cleanup gpu_scheduler trace events

Message ID 20240606130629.214827-4-pierre-eric.pelloux-prayer@amd.com (mailing list archive)
State New, archived
Headers show
Series Improve gpu_scheduler trace events | expand

Commit Message

Pierre-Eric Pelloux-Prayer June 6, 2024, 1:06 p.m. UTC
Print identifiers instead of pointers:
* "fence=%p" is replaced by "fence=(context:%llu, seqno:%lld)" to have a
coherent way to print the fence. A possible follow up change would be
to use the same format in traces/../dma-fence.h.
* "entity=%p" is removed because the fence's context is already an
identifier of the job owner.

Signed-off-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
---
 .../gpu/drm/scheduler/gpu_scheduler_trace.h   | 27 ++++++++++---------
 1 file changed, 15 insertions(+), 12 deletions(-)

Comments

Steven Rostedt June 6, 2024, 1:19 p.m. UTC | #1
On Thu, 6 Jun 2024 15:06:24 +0200
Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> wrote:

> Print identifiers instead of pointers:
> * "fence=%p" is replaced by "fence=(context:%llu, seqno:%lld)" to have a
> coherent way to print the fence. A possible follow up change would be
> to use the same format in traces/../dma-fence.h.
> * "entity=%p" is removed because the fence's context is already an
> identifier of the job owner.
> 
> Signed-off-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
> ---
>  .../gpu/drm/scheduler/gpu_scheduler_trace.h   | 27 ++++++++++---------
>  1 file changed, 15 insertions(+), 12 deletions(-)
> 
> diff --git a/drivers/gpu/drm/scheduler/gpu_scheduler_trace.h b/drivers/gpu/drm/scheduler/gpu_scheduler_trace.h
> index 0a19c121bda5..2d7f2252eb5d 100644
> --- a/drivers/gpu/drm/scheduler/gpu_scheduler_trace.h
> +++ b/drivers/gpu/drm/scheduler/gpu_scheduler_trace.h
> @@ -58,9 +58,9 @@ DECLARE_EVENT_CLASS(drm_sched_job,
>  				   &sched_job->sched->credit_count);
>  			   __entry->dev_index = sched_job->sched->dev->primary->index;
>  			   ),
> -	    TP_printk("entity=%p, id=%llu, fence=%p, ring=%s, job count:%u, hw job count:%d",
> -		      __entry->entity, __entry->id,
> -		      __entry->fence, __get_str(name),
> +	    TP_printk("id=%llu, fence=(context:%llu, seqno:%lld), ring=%s, job count:%u, hw job count:%d",
> +		      __entry->id,
> +		      __entry->fence->context, __entry->fence->seqno, __get_str(name),
>  		      __entry->job_count, __entry->hw_job_count)
>  );
>  
> @@ -72,9 +72,9 @@ DEFINE_EVENT(drm_sched_job, drm_sched_job,
>  DEFINE_EVENT_PRINT(drm_sched_job, drm_run_job,
>  	    TP_PROTO(struct drm_sched_job *sched_job, struct drm_sched_entity *entity),
>  	    TP_ARGS(sched_job, entity),
> -	    TP_printk("dev_index=%d entity=%p id=%llu, fence=%p, ring=%s, job count:%u, hw job count:%d",
> -		      __entry->dev_index, __entry->entity, __entry->id,
> -		      __entry->fence, __get_str(name),
> +	    TP_printk("dev_index=%d id=%llu, fence=(context:%llu, seqno:%lld), ring=%s, job count:%u, hw job count:%d",
> +		      __entry->dev_index, __entry->id,
> +		      __entry->fence->context, __entry->fence->seqno, __get_str(name),
>  		      __entry->job_count, __entry->hw_job_count)

NACK!

You can't dereference pointers from TP_printk(). This is called seconds,
minutes, hours, even days or months after that pointer was assigned. How do
you know that pointer still points to anything?

-- Steve


>  );
>  
> @@ -88,7 +88,8 @@ TRACE_EVENT(drm_sched_process_job,
>  	    TP_fast_assign(
>  		    __entry->fence = &fence->finished;
>  		    ),
> -	    TP_printk("fence=%p signaled", __entry->fence)
> +	    TP_printk("fence=(context:%llu, seqno:%lld) signaled",
> +		      __entry->fence->context, __entry->fence->seqno)
>  );
>  
>  TRACE_EVENT(drm_sched_job_wait_dep,
> @@ -96,23 +97,25 @@ TRACE_EVENT(drm_sched_job_wait_dep,
>  	    TP_ARGS(sched_job, fence),
>  	    TP_STRUCT__entry(
>  			     __string(name, sched_job->sched->name)
> +			     __field(uint64_t, fence_context)
>  			     __field(uint64_t, id)
>  			     __field(struct dma_fence *, fence)
>  			     __field(uint64_t, ctx)
> -			     __field(unsigned, seqno)
> +			     __field(uint64_t, seqno)
>  			     ),
>  
>  	    TP_fast_assign(
>  			   __assign_str(name);
> +			   /* Store the hw exec fence context. */
> +			   __entry->fence_context = sched_job->entity->fence_context + 1;
>  			   __entry->id = sched_job->id;
>  			   __entry->fence = fence;
>  			   __entry->ctx = fence->context;
>  			   __entry->seqno = fence->seqno;
>  			   ),
> -	    TP_printk("job ring=%s, id=%llu, depends fence=%p, context=%llu, seq=%u",
> -		      __get_str(name), __entry->id,
> -		      __entry->fence, __entry->ctx,
> -		      __entry->seqno)
> +	    TP_printk("job ring=%s, fence_context=%llu, id=%llu, depends fence=(context:%llu, seqno:%lld)",
> +		      __get_str(name), __entry->fence_context, __entry->id,
> +		      __entry->ctx, __entry->seqno)
>  );
>  
>  #endif
Christian König June 6, 2024, 1:23 p.m. UTC | #2
Am 06.06.24 um 15:19 schrieb Steven Rostedt:
> On Thu, 6 Jun 2024 15:06:24 +0200
> Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> wrote:
>
>> Print identifiers instead of pointers:
>> * "fence=%p" is replaced by "fence=(context:%llu, seqno:%lld)" to have a
>> coherent way to print the fence. A possible follow up change would be
>> to use the same format in traces/../dma-fence.h.
>> * "entity=%p" is removed because the fence's context is already an
>> identifier of the job owner.
>>
>> Signed-off-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
>> ---
>>   .../gpu/drm/scheduler/gpu_scheduler_trace.h   | 27 ++++++++++---------
>>   1 file changed, 15 insertions(+), 12 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/scheduler/gpu_scheduler_trace.h b/drivers/gpu/drm/scheduler/gpu_scheduler_trace.h
>> index 0a19c121bda5..2d7f2252eb5d 100644
>> --- a/drivers/gpu/drm/scheduler/gpu_scheduler_trace.h
>> +++ b/drivers/gpu/drm/scheduler/gpu_scheduler_trace.h
>> @@ -58,9 +58,9 @@ DECLARE_EVENT_CLASS(drm_sched_job,
>>   				   &sched_job->sched->credit_count);
>>   			   __entry->dev_index = sched_job->sched->dev->primary->index;
>>   			   ),
>> -	    TP_printk("entity=%p, id=%llu, fence=%p, ring=%s, job count:%u, hw job count:%d",
>> -		      __entry->entity, __entry->id,
>> -		      __entry->fence, __get_str(name),
>> +	    TP_printk("id=%llu, fence=(context:%llu, seqno:%lld), ring=%s, job count:%u, hw job count:%d",
>> +		      __entry->id,
>> +		      __entry->fence->context, __entry->fence->seqno, __get_str(name),
>>   		      __entry->job_count, __entry->hw_job_count)
>>   );
>>   
>> @@ -72,9 +72,9 @@ DEFINE_EVENT(drm_sched_job, drm_sched_job,
>>   DEFINE_EVENT_PRINT(drm_sched_job, drm_run_job,
>>   	    TP_PROTO(struct drm_sched_job *sched_job, struct drm_sched_entity *entity),
>>   	    TP_ARGS(sched_job, entity),
>> -	    TP_printk("dev_index=%d entity=%p id=%llu, fence=%p, ring=%s, job count:%u, hw job count:%d",
>> -		      __entry->dev_index, __entry->entity, __entry->id,
>> -		      __entry->fence, __get_str(name),
>> +	    TP_printk("dev_index=%d id=%llu, fence=(context:%llu, seqno:%lld), ring=%s, job count:%u, hw job count:%d",
>> +		      __entry->dev_index, __entry->id,
>> +		      __entry->fence->context, __entry->fence->seqno, __get_str(name),
>>   		      __entry->job_count, __entry->hw_job_count)
> NACK!
>
> You can't dereference pointers from TP_printk(). This is called seconds,
> minutes, hours, even days or months after that pointer was assigned. How do
> you know that pointer still points to anything?

Yeah, just wanted to reply the same thing. That is a really really bad idea.

You could in theory grab a reference to the fence, but we usually try to 
avoid that as well since it prevents modules from unloading.

Rather move the context and seqno directly as values into the trace event.

Christian.

>
> -- Steve
>
>
>>   );
>>   
>> @@ -88,7 +88,8 @@ TRACE_EVENT(drm_sched_process_job,
>>   	    TP_fast_assign(
>>   		    __entry->fence = &fence->finished;
>>   		    ),
>> -	    TP_printk("fence=%p signaled", __entry->fence)
>> +	    TP_printk("fence=(context:%llu, seqno:%lld) signaled",
>> +		      __entry->fence->context, __entry->fence->seqno)
>>   );
>>   
>>   TRACE_EVENT(drm_sched_job_wait_dep,
>> @@ -96,23 +97,25 @@ TRACE_EVENT(drm_sched_job_wait_dep,
>>   	    TP_ARGS(sched_job, fence),
>>   	    TP_STRUCT__entry(
>>   			     __string(name, sched_job->sched->name)
>> +			     __field(uint64_t, fence_context)
>>   			     __field(uint64_t, id)
>>   			     __field(struct dma_fence *, fence)
>>   			     __field(uint64_t, ctx)
>> -			     __field(unsigned, seqno)
>> +			     __field(uint64_t, seqno)
>>   			     ),
>>   
>>   	    TP_fast_assign(
>>   			   __assign_str(name);
>> +			   /* Store the hw exec fence context. */
>> +			   __entry->fence_context = sched_job->entity->fence_context + 1;
>>   			   __entry->id = sched_job->id;
>>   			   __entry->fence = fence;
>>   			   __entry->ctx = fence->context;
>>   			   __entry->seqno = fence->seqno;
>>   			   ),
>> -	    TP_printk("job ring=%s, id=%llu, depends fence=%p, context=%llu, seq=%u",
>> -		      __get_str(name), __entry->id,
>> -		      __entry->fence, __entry->ctx,
>> -		      __entry->seqno)
>> +	    TP_printk("job ring=%s, fence_context=%llu, id=%llu, depends fence=(context:%llu, seqno:%lld)",
>> +		      __get_str(name), __entry->fence_context, __entry->id,
>> +		      __entry->ctx, __entry->seqno)
>>   );
>>   
>>   #endif
Pierre-Eric Pelloux-Prayer June 7, 2024, 1:21 p.m. UTC | #3
Hi,

Le 06/06/2024 à 15:19, Steven Rostedt a écrit :
> On Thu, 6 Jun 2024 15:06:24 +0200
> Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> wrote:
> 
>> Print identifiers instead of pointers:
>> * "fence=%p" is replaced by "fence=(context:%llu, seqno:%lld)" to have a
>> coherent way to print the fence. A possible follow up change would be
>> to use the same format in traces/../dma-fence.h.
>> * "entity=%p" is removed because the fence's context is already an
>> identifier of the job owner.
>>
>> Signed-off-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
>> ---
>>   .../gpu/drm/scheduler/gpu_scheduler_trace.h   | 27 ++++++++++---------
>>   1 file changed, 15 insertions(+), 12 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/scheduler/gpu_scheduler_trace.h b/drivers/gpu/drm/scheduler/gpu_scheduler_trace.h
>> index 0a19c121bda5..2d7f2252eb5d 100644
>> --- a/drivers/gpu/drm/scheduler/gpu_scheduler_trace.h
>> +++ b/drivers/gpu/drm/scheduler/gpu_scheduler_trace.h
>> @@ -58,9 +58,9 @@ DECLARE_EVENT_CLASS(drm_sched_job,
>>   				   &sched_job->sched->credit_count);
>>   			   __entry->dev_index = sched_job->sched->dev->primary->index;
>>   			   ),
>> -	    TP_printk("entity=%p, id=%llu, fence=%p, ring=%s, job count:%u, hw job count:%d",
>> -		      __entry->entity, __entry->id,
>> -		      __entry->fence, __get_str(name),
>> +	    TP_printk("id=%llu, fence=(context:%llu, seqno:%lld), ring=%s, job count:%u, hw job count:%d",
>> +		      __entry->id,
>> +		      __entry->fence->context, __entry->fence->seqno, __get_str(name),
>>   		      __entry->job_count, __entry->hw_job_count)
>>   );
>>   
>> @@ -72,9 +72,9 @@ DEFINE_EVENT(drm_sched_job, drm_sched_job,
>>   DEFINE_EVENT_PRINT(drm_sched_job, drm_run_job,
>>   	    TP_PROTO(struct drm_sched_job *sched_job, struct drm_sched_entity *entity),
>>   	    TP_ARGS(sched_job, entity),
>> -	    TP_printk("dev_index=%d entity=%p id=%llu, fence=%p, ring=%s, job count:%u, hw job count:%d",
>> -		      __entry->dev_index, __entry->entity, __entry->id,
>> -		      __entry->fence, __get_str(name),
>> +	    TP_printk("dev_index=%d id=%llu, fence=(context:%llu, seqno:%lld), ring=%s, job count:%u, hw job count:%d",
>> +		      __entry->dev_index, __entry->id,
>> +		      __entry->fence->context, __entry->fence->seqno, __get_str(name),
>>   		      __entry->job_count, __entry->hw_job_count)
> 
> NACK!
> 
> You can't dereference pointers from TP_printk(). This is called seconds,
> minutes, hours, even days or months after that pointer was assigned. How do
> you know that pointer still points to anything?

Now that you pointed it out, the problem is obvious indeed.
I have fixed it locally.

Thanks!
Pierre-Eric

> 
> -- Steve
> 
> 
>>   );
>>   
>> @@ -88,7 +88,8 @@ TRACE_EVENT(drm_sched_process_job,
>>   	    TP_fast_assign(
>>   		    __entry->fence = &fence->finished;
>>   		    ),
>> -	    TP_printk("fence=%p signaled", __entry->fence)
>> +	    TP_printk("fence=(context:%llu, seqno:%lld) signaled",
>> +		      __entry->fence->context, __entry->fence->seqno)
>>   );
>>   
>>   TRACE_EVENT(drm_sched_job_wait_dep,
>> @@ -96,23 +97,25 @@ TRACE_EVENT(drm_sched_job_wait_dep,
>>   	    TP_ARGS(sched_job, fence),
>>   	    TP_STRUCT__entry(
>>   			     __string(name, sched_job->sched->name)
>> +			     __field(uint64_t, fence_context)
>>   			     __field(uint64_t, id)
>>   			     __field(struct dma_fence *, fence)
>>   			     __field(uint64_t, ctx)
>> -			     __field(unsigned, seqno)
>> +			     __field(uint64_t, seqno)
>>   			     ),
>>   
>>   	    TP_fast_assign(
>>   			   __assign_str(name);
>> +			   /* Store the hw exec fence context. */
>> +			   __entry->fence_context = sched_job->entity->fence_context + 1;
>>   			   __entry->id = sched_job->id;
>>   			   __entry->fence = fence;
>>   			   __entry->ctx = fence->context;
>>   			   __entry->seqno = fence->seqno;
>>   			   ),
>> -	    TP_printk("job ring=%s, id=%llu, depends fence=%p, context=%llu, seq=%u",
>> -		      __get_str(name), __entry->id,
>> -		      __entry->fence, __entry->ctx,
>> -		      __entry->seqno)
>> +	    TP_printk("job ring=%s, fence_context=%llu, id=%llu, depends fence=(context:%llu, seqno:%lld)",
>> +		      __get_str(name), __entry->fence_context, __entry->id,
>> +		      __entry->ctx, __entry->seqno)
>>   );
>>   
>>   #endif
diff mbox series

Patch

diff --git a/drivers/gpu/drm/scheduler/gpu_scheduler_trace.h b/drivers/gpu/drm/scheduler/gpu_scheduler_trace.h
index 0a19c121bda5..2d7f2252eb5d 100644
--- a/drivers/gpu/drm/scheduler/gpu_scheduler_trace.h
+++ b/drivers/gpu/drm/scheduler/gpu_scheduler_trace.h
@@ -58,9 +58,9 @@  DECLARE_EVENT_CLASS(drm_sched_job,
 				   &sched_job->sched->credit_count);
 			   __entry->dev_index = sched_job->sched->dev->primary->index;
 			   ),
-	    TP_printk("entity=%p, id=%llu, fence=%p, ring=%s, job count:%u, hw job count:%d",
-		      __entry->entity, __entry->id,
-		      __entry->fence, __get_str(name),
+	    TP_printk("id=%llu, fence=(context:%llu, seqno:%lld), ring=%s, job count:%u, hw job count:%d",
+		      __entry->id,
+		      __entry->fence->context, __entry->fence->seqno, __get_str(name),
 		      __entry->job_count, __entry->hw_job_count)
 );
 
@@ -72,9 +72,9 @@  DEFINE_EVENT(drm_sched_job, drm_sched_job,
 DEFINE_EVENT_PRINT(drm_sched_job, drm_run_job,
 	    TP_PROTO(struct drm_sched_job *sched_job, struct drm_sched_entity *entity),
 	    TP_ARGS(sched_job, entity),
-	    TP_printk("dev_index=%d entity=%p id=%llu, fence=%p, ring=%s, job count:%u, hw job count:%d",
-		      __entry->dev_index, __entry->entity, __entry->id,
-		      __entry->fence, __get_str(name),
+	    TP_printk("dev_index=%d id=%llu, fence=(context:%llu, seqno:%lld), ring=%s, job count:%u, hw job count:%d",
+		      __entry->dev_index, __entry->id,
+		      __entry->fence->context, __entry->fence->seqno, __get_str(name),
 		      __entry->job_count, __entry->hw_job_count)
 );
 
@@ -88,7 +88,8 @@  TRACE_EVENT(drm_sched_process_job,
 	    TP_fast_assign(
 		    __entry->fence = &fence->finished;
 		    ),
-	    TP_printk("fence=%p signaled", __entry->fence)
+	    TP_printk("fence=(context:%llu, seqno:%lld) signaled",
+		      __entry->fence->context, __entry->fence->seqno)
 );
 
 TRACE_EVENT(drm_sched_job_wait_dep,
@@ -96,23 +97,25 @@  TRACE_EVENT(drm_sched_job_wait_dep,
 	    TP_ARGS(sched_job, fence),
 	    TP_STRUCT__entry(
 			     __string(name, sched_job->sched->name)
+			     __field(uint64_t, fence_context)
 			     __field(uint64_t, id)
 			     __field(struct dma_fence *, fence)
 			     __field(uint64_t, ctx)
-			     __field(unsigned, seqno)
+			     __field(uint64_t, seqno)
 			     ),
 
 	    TP_fast_assign(
 			   __assign_str(name);
+			   /* Store the hw exec fence context. */
+			   __entry->fence_context = sched_job->entity->fence_context + 1;
 			   __entry->id = sched_job->id;
 			   __entry->fence = fence;
 			   __entry->ctx = fence->context;
 			   __entry->seqno = fence->seqno;
 			   ),
-	    TP_printk("job ring=%s, id=%llu, depends fence=%p, context=%llu, seq=%u",
-		      __get_str(name), __entry->id,
-		      __entry->fence, __entry->ctx,
-		      __entry->seqno)
+	    TP_printk("job ring=%s, fence_context=%llu, id=%llu, depends fence=(context:%llu, seqno:%lld)",
+		      __get_str(name), __entry->fence_context, __entry->id,
+		      __entry->ctx, __entry->seqno)
 );
 
 #endif