diff mbox series

[i-g-t,01/21] scripts/trace.pl: Fix after intel_engine_notify removal

Message ID 20190508121058.27038-2-tvrtko.ursulin@linux.intel.com (mailing list archive)
State New, archived
Headers show
Series Media scalability tooling | expand

Commit Message

Tvrtko Ursulin May 8, 2019, 12:10 p.m. UTC
From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

After the removal of engine global seqnos and the corresponding
intel_engine_notify tracepoints the script needs to be adjusted to cope
with the new state of things.

To keep working it switches over using the dma_fence:dma_fence_signaled:
tracepoint and keeps one extra internal map to connect the ctx-seqno pairs
with engines.

It also needs to key the completion events on the full engine/ctx/seqno
tokens, and adjust correspondingly the timeline sorting logic.

v2:
 * Do not use late notifications (received after context complete) when
   splitting up coalesced requests. They are now much more likely and can
   not be used.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
 scripts/trace.pl | 82 ++++++++++++++++++++++++------------------------
 1 file changed, 41 insertions(+), 41 deletions(-)

Comments

Chris Wilson May 8, 2019, 12:17 p.m. UTC | #1
Quoting Tvrtko Ursulin (2019-05-08 13:10:38)
> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> 
> After the removal of engine global seqnos and the corresponding
> intel_engine_notify tracepoints the script needs to be adjusted to cope
> with the new state of things.
> 
> To keep working it switches over using the dma_fence:dma_fence_signaled:
> tracepoint and keeps one extra internal map to connect the ctx-seqno pairs
> with engines.

Is the map suitable for the planned (by me)

	s/i915_request_wait_begin/dma_fence_wait_begin/

I guess it should be.
-Chris
Tvrtko Ursulin May 9, 2019, 9:27 a.m. UTC | #2
On 08/05/2019 13:17, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2019-05-08 13:10:38)
>> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>
>> After the removal of engine global seqnos and the corresponding
>> intel_engine_notify tracepoints the script needs to be adjusted to cope
>> with the new state of things.
>>
>> To keep working it switches over using the dma_fence:dma_fence_signaled:
>> tracepoint and keeps one extra internal map to connect the ctx-seqno pairs
>> with engines.
> 
> Is the map suitable for the planned (by me)
> 
> 	s/i915_request_wait_begin/dma_fence_wait_begin/
> 
> I guess it should be.

I think it would be workable. One complication would be that engine is 
not guaranteed to be known ahead of the wait, like it is ahead of the 
signal. But since ctx.seqno is unique it can be tracked and added later 
I think.

Regards,

Tvrtko
Chris Wilson May 10, 2019, 12:33 p.m. UTC | #3
Quoting Tvrtko Ursulin (2019-05-08 13:10:38)
> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> 
> After the removal of engine global seqnos and the corresponding
> intel_engine_notify tracepoints the script needs to be adjusted to cope
> with the new state of things.
> 
> To keep working it switches over using the dma_fence:dma_fence_signaled:
> tracepoint and keeps one extra internal map to connect the ctx-seqno pairs
> with engines.
> 
> It also needs to key the completion events on the full engine/ctx/seqno
> tokens, and adjust correspondingly the timeline sorting logic.
> 
> v2:
>  * Do not use late notifications (received after context complete) when
>    splitting up coalesced requests. They are now much more likely and can
>    not be used.
> 
> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
>  scripts/trace.pl | 82 ++++++++++++++++++++++++------------------------
>  1 file changed, 41 insertions(+), 41 deletions(-)
> 
> diff --git a/scripts/trace.pl b/scripts/trace.pl
> index 18f9f3b18396..95dc3a645e8e 100755
> --- a/scripts/trace.pl
> +++ b/scripts/trace.pl
> @@ -27,7 +27,8 @@ use warnings;
>  use 5.010;
>  
>  my $gid = 0;
> -my (%db, %queue, %submit, %notify, %rings, %ctxdb, %ringmap, %reqwait, %ctxtimelines);
> +my (%db, %queue, %submit, %notify, %rings, %ctxdb, %ringmap, %reqwait,
> +    %ctxtimelines, %ctxengines);
>  my @freqs;

So what's ctxengines? Or rings for that matter?

I take it ctxengines is really the last engine which we saw this context
execute on?

>  
>  my $max_items = 3000;
> @@ -66,7 +67,7 @@ Notes:
>                                i915:i915_request_submit, \
>                                i915:i915_request_in, \
>                                i915:i915_request_out, \
> -                              i915:intel_engine_notify, \
> +                              dma_fence:dma_fence_signaled, \
>                                i915:i915_request_wait_begin, \
>                                i915:i915_request_wait_end \
>                                [command-to-be-profiled]
> @@ -161,7 +162,7 @@ sub arg_trace
>                        'i915:i915_request_submit',
>                        'i915:i915_request_in',
>                        'i915:i915_request_out',
> -                      'i915:intel_engine_notify',
> +                      'dma_fence:dma_fence_signaled',
>                        'i915:i915_request_wait_begin',
>                        'i915:i915_request_wait_end' );
>  
> @@ -312,13 +313,6 @@ sub db_key
>         return $ring . '/' . $ctx . '/' . $seqno;
>  }
>  
> -sub global_key
> -{
> -       my ($ring, $seqno) = @_;
> -
> -       return $ring . '/' . $seqno;
> -}
> -
>  sub sanitize_ctx
>  {
>         my ($ctx, $ring) = @_;
> @@ -419,6 +413,8 @@ while (<>) {
>                 $req{'ring'} = $ring;
>                 $req{'seqno'} = $seqno;
>                 $req{'ctx'} = $ctx;
> +               die if exists $ctxengines{$ctx} and $ctxengines{$ctx} ne $ring;
> +               $ctxengines{$ctx} = $ring;
>                 $ctxtimelines{$ctx . '/' . $ring} = 1;
>                 $req{'name'} = $ctx . '/' . $seqno;
>                 $req{'global'} = $tp{'global'};
> @@ -429,16 +425,29 @@ while (<>) {
>                 $ringmap{$rings{$ring}} = $ring;
>                 $db{$key} = \%req;
>         } elsif ($tp_name eq 'i915:i915_request_out:') {
> -               my $gkey = global_key($ring, $tp{'global'});
> +               my $gkey;
> +

# Must be paired with a previous i915_request_in
> +               die unless exists $ctxengines{$ctx};

I'd suggest next unless, because there's always a change the capture is
started part way though someone's workload.

> +               $gkey = db_key($ctxengines{$ctx}, $ctx, $seqno);
> +
> +               if ($tp{'completed?'}) {
> +                       die unless exists $db{$key};
> +                       die unless exists $db{$key}->{'start'};
> +                       die if exists $db{$key}->{'end'};
> +
> +                       $db{$key}->{'end'} = $time;
> +                       $db{$key}->{'notify'} = $notify{$gkey}
> +                                               if exists $notify{$gkey};

Hmm. With preempt-to-busy, a request can complete when we are no longer
tracking it (it completes before we preempt it).

They will still get the schedule-out tracepoint, but marked as
incomplete, and there will be a signaled tp later before we try and
resubmit.

> +               } else {
> +                       delete $db{$key};
> +               }
> +       } elsif ($tp_name eq 'dma_fence:dma_fence_signaled:') {
> +               my $gkey;
>  
> -               die unless exists $db{$key};
> -               die unless exists $db{$key}->{'start'};
> -               die if exists $db{$key}->{'end'};
> +               die unless exists $ctxengines{$tp{'context'}};
>  
> -               $db{$key}->{'end'} = $time;
> -               $db{$key}->{'notify'} = $notify{$gkey} if exists $notify{$gkey};
> -       } elsif ($tp_name eq 'i915:intel_engine_notify:') {
> -               my $gkey = global_key($ring, $seqno);
> +               $gkey = db_key($ctxengines{$tp{'context'}}, $tp{'context'}, $tp{'seqno'});
>  
>                 $notify{$gkey} = $time unless exists $notify{$gkey};
>         } elsif ($tp_name eq 'i915:intel_gpu_freq_change:') {
> @@ -452,7 +461,7 @@ while (<>) {
>  # find the largest seqno to be used for timeline sorting purposes.
>  my $max_seqno = 0;
>  foreach my $key (keys %db) {
> -       my $gkey = global_key($db{$key}->{'ring'}, $db{$key}->{'global'});
> +       my $gkey = db_key($db{$key}->{'ring'}, $db{$key}->{'ctx'}, $db{$key}->{'seqno'});
>  
>         die unless exists $db{$key}->{'start'};
>  
> @@ -478,14 +487,13 @@ my $key_count = scalar(keys %db);
>  
>  my %engine_timelines;
>  
> -sub sortEngine {
> -       my $as = $db{$a}->{'global'};
> -       my $bs = $db{$b}->{'global'};
> +sub sortStart {
> +       my $as = $db{$a}->{'start'};
> +       my $bs = $db{$b}->{'start'};
>         my $val;
>  
>         $val = $as <=> $bs;
> -
> -       die if $val == 0;
> +       $val = $a cmp $b if $val == 0;
>  
>         return $val;
>  }
> @@ -497,9 +505,7 @@ sub get_engine_timeline {
>         return $engine_timelines{$ring} if exists $engine_timelines{$ring};
>  
>         @timeline = grep { $db{$_}->{'ring'} eq $ring } keys %db;
> -       # FIXME seqno restart
> -       @timeline = sort sortEngine @timeline;
> -
> +       @timeline = sort sortStart @timeline;
>         $engine_timelines{$ring} = \@timeline;
>  
>         return \@timeline;
> @@ -561,20 +567,10 @@ foreach my $gid (sort keys %rings) {
>                         $db{$key}->{'no-notify'} = 1;
>                 }
>                 $db{$key}->{'end'} = $end;
> +               $db{$key}->{'notify'} = $end if $db{$key}->{'notify'} > $end;
>         }
>  }
>  
> -sub sortStart {
> -       my $as = $db{$a}->{'start'};
> -       my $bs = $db{$b}->{'start'};
> -       my $val;
> -
> -       $val = $as <=> $bs;
> -       $val = $a cmp $b if $val == 0;
> -
> -       return $val;
> -}
> -
>  my $re_sort = 1;
>  my @sorted_keys;
>  
> @@ -670,9 +666,13 @@ if ($correct_durations) {
>                         next unless exists $db{$key}->{'no-end'};
>                         last if $pos == $#{$timeline};
>  
> -                       # Shift following request to start after the current one
> +                       # Shift following request to start after the current
> +                       # one, but only if that wouldn't make it zero duration,
> +                       # which would indicate notify arrived after context
> +                       # complete.
>                         $next_key = ${$timeline}[$pos + 1];
> -                       if (exists $db{$key}->{'notify'}) {
> +                       if (exists $db{$key}->{'notify'} and
> +                           $db{$key}->{'notify'} < $db{$key}->{'end'}) {
>                                 $db{$next_key}->{'engine-start'} = $db{$next_key}->{'start'};
>                                 $db{$next_key}->{'start'} = $db{$key}->{'notify'};
>                                 $re_sort = 1;
> @@ -750,9 +750,9 @@ foreach my $gid (sort keys %rings) {
>         # Extract all GPU busy intervals and sort them.
>         foreach my $key (@sorted_keys) {
>                 next unless $db{$key}->{'ring'} eq $ring;
> +               die if $db{$key}->{'start'} > $db{$key}->{'end'};

Heh, we're out of luck if we want to trace across seqno wraparound.

It makes enough sense,
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
-Chris
Tvrtko Ursulin May 13, 2019, 12:16 p.m. UTC | #4
On 10/05/2019 13:33, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2019-05-08 13:10:38)
>> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>
>> After the removal of engine global seqnos and the corresponding
>> intel_engine_notify tracepoints the script needs to be adjusted to cope
>> with the new state of things.
>>
>> To keep working it switches over using the dma_fence:dma_fence_signaled:
>> tracepoint and keeps one extra internal map to connect the ctx-seqno pairs
>> with engines.
>>
>> It also needs to key the completion events on the full engine/ctx/seqno
>> tokens, and adjust correspondingly the timeline sorting logic.
>>
>> v2:
>>   * Do not use late notifications (received after context complete) when
>>     splitting up coalesced requests. They are now much more likely and can
>>     not be used.
>>
>> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>> ---
>>   scripts/trace.pl | 82 ++++++++++++++++++++++++------------------------
>>   1 file changed, 41 insertions(+), 41 deletions(-)
>>
>> diff --git a/scripts/trace.pl b/scripts/trace.pl
>> index 18f9f3b18396..95dc3a645e8e 100755
>> --- a/scripts/trace.pl
>> +++ b/scripts/trace.pl
>> @@ -27,7 +27,8 @@ use warnings;
>>   use 5.010;
>>   
>>   my $gid = 0;
>> -my (%db, %queue, %submit, %notify, %rings, %ctxdb, %ringmap, %reqwait, %ctxtimelines);
>> +my (%db, %queue, %submit, %notify, %rings, %ctxdb, %ringmap, %reqwait,
>> +    %ctxtimelines, %ctxengines);
>>   my @freqs;
> 
> So what's ctxengines? Or rings for that matter?

rings go back to the beginnings of the tool when I think the 
visualizaiton library needed an unique integer value for every timeline 
(so engine). And there is a ringmap from this id back to our engine 
name. Perhaps this would be clearer if reversed, but I am not sure how 
much churn would that be without actually doing it. Renaming rings to 
engines would also make sense.

> I take it ctxengines is really the last engine which we saw this context
> execute on?

Correct.

I guess there is a problem if dma_fence_signaled is delayed past another 
request_in. Hm but I also have a die if engine is different.. that 
cannot be right, but why it didn't fail.. I need to double check this.

> 
>>   
>>   my $max_items = 3000;
>> @@ -66,7 +67,7 @@ Notes:
>>                                 i915:i915_request_submit, \
>>                                 i915:i915_request_in, \
>>                                 i915:i915_request_out, \
>> -                              i915:intel_engine_notify, \
>> +                              dma_fence:dma_fence_signaled, \
>>                                 i915:i915_request_wait_begin, \
>>                                 i915:i915_request_wait_end \
>>                                 [command-to-be-profiled]
>> @@ -161,7 +162,7 @@ sub arg_trace
>>                         'i915:i915_request_submit',
>>                         'i915:i915_request_in',
>>                         'i915:i915_request_out',
>> -                      'i915:intel_engine_notify',
>> +                      'dma_fence:dma_fence_signaled',
>>                         'i915:i915_request_wait_begin',
>>                         'i915:i915_request_wait_end' );
>>   
>> @@ -312,13 +313,6 @@ sub db_key
>>          return $ring . '/' . $ctx . '/' . $seqno;
>>   }
>>   
>> -sub global_key
>> -{
>> -       my ($ring, $seqno) = @_;
>> -
>> -       return $ring . '/' . $seqno;
>> -}
>> -
>>   sub sanitize_ctx
>>   {
>>          my ($ctx, $ring) = @_;
>> @@ -419,6 +413,8 @@ while (<>) {
>>                  $req{'ring'} = $ring;
>>                  $req{'seqno'} = $seqno;
>>                  $req{'ctx'} = $ctx;
>> +               die if exists $ctxengines{$ctx} and $ctxengines{$ctx} ne $ring;
>> +               $ctxengines{$ctx} = $ring;
>>                  $ctxtimelines{$ctx . '/' . $ring} = 1;
>>                  $req{'name'} = $ctx . '/' . $seqno;
>>                  $req{'global'} = $tp{'global'};
>> @@ -429,16 +425,29 @@ while (<>) {
>>                  $ringmap{$rings{$ring}} = $ring;
>>                  $db{$key} = \%req;
>>          } elsif ($tp_name eq 'i915:i915_request_out:') {
>> -               my $gkey = global_key($ring, $tp{'global'});
>> +               my $gkey;
>> +
> 
> # Must be paired with a previous i915_request_in
>> +               die unless exists $ctxengines{$ctx};
> 
> I'd suggest next unless, because there's always a change the capture is
> started part way though someone's workload.

That would need much more work.

> 
>> +               $gkey = db_key($ctxengines{$ctx}, $ctx, $seqno);
>> +
>> +               if ($tp{'completed?'}) {
>> +                       die unless exists $db{$key};
>> +                       die unless exists $db{$key}->{'start'};
>> +                       die if exists $db{$key}->{'end'};
>> +
>> +                       $db{$key}->{'end'} = $time;
>> +                       $db{$key}->{'notify'} = $notify{$gkey}
>> +                                               if exists $notify{$gkey};
> 
> Hmm. With preempt-to-busy, a request can complete when we are no longer
> tracking it (it completes before we preempt it).
> 
> They will still get the schedule-out tracepoint, but marked as
> incomplete, and there will be a signaled tp later before we try and
> resubmit.

This sounds like the request would disappear from the scripts view.

> 
>> +               } else {
>> +                       delete $db{$key};
>> +               }
>> +       } elsif ($tp_name eq 'dma_fence:dma_fence_signaled:') {
>> +               my $gkey;
>>   
>> -               die unless exists $db{$key};
>> -               die unless exists $db{$key}->{'start'};
>> -               die if exists $db{$key}->{'end'};
>> +               die unless exists $ctxengines{$tp{'context'}};
>>   
>> -               $db{$key}->{'end'} = $time;
>> -               $db{$key}->{'notify'} = $notify{$gkey} if exists $notify{$gkey};
>> -       } elsif ($tp_name eq 'i915:intel_engine_notify:') {
>> -               my $gkey = global_key($ring, $seqno);
>> +               $gkey = db_key($ctxengines{$tp{'context'}}, $tp{'context'}, $tp{'seqno'});
>>   
>>                  $notify{$gkey} = $time unless exists $notify{$gkey};
>>          } elsif ($tp_name eq 'i915:intel_gpu_freq_change:') {
>> @@ -452,7 +461,7 @@ while (<>) {
>>   # find the largest seqno to be used for timeline sorting purposes.
>>   my $max_seqno = 0;
>>   foreach my $key (keys %db) {
>> -       my $gkey = global_key($db{$key}->{'ring'}, $db{$key}->{'global'});
>> +       my $gkey = db_key($db{$key}->{'ring'}, $db{$key}->{'ctx'}, $db{$key}->{'seqno'});
>>   
>>          die unless exists $db{$key}->{'start'};
>>   
>> @@ -478,14 +487,13 @@ my $key_count = scalar(keys %db);
>>   
>>   my %engine_timelines;
>>   
>> -sub sortEngine {
>> -       my $as = $db{$a}->{'global'};
>> -       my $bs = $db{$b}->{'global'};
>> +sub sortStart {
>> +       my $as = $db{$a}->{'start'};
>> +       my $bs = $db{$b}->{'start'};
>>          my $val;
>>   
>>          $val = $as <=> $bs;
>> -
>> -       die if $val == 0;
>> +       $val = $a cmp $b if $val == 0;
>>   
>>          return $val;
>>   }
>> @@ -497,9 +505,7 @@ sub get_engine_timeline {
>>          return $engine_timelines{$ring} if exists $engine_timelines{$ring};
>>   
>>          @timeline = grep { $db{$_}->{'ring'} eq $ring } keys %db;
>> -       # FIXME seqno restart
>> -       @timeline = sort sortEngine @timeline;
>> -
>> +       @timeline = sort sortStart @timeline;
>>          $engine_timelines{$ring} = \@timeline;
>>   
>>          return \@timeline;
>> @@ -561,20 +567,10 @@ foreach my $gid (sort keys %rings) {
>>                          $db{$key}->{'no-notify'} = 1;
>>                  }
>>                  $db{$key}->{'end'} = $end;
>> +               $db{$key}->{'notify'} = $end if $db{$key}->{'notify'} > $end;
>>          }
>>   }
>>   
>> -sub sortStart {
>> -       my $as = $db{$a}->{'start'};
>> -       my $bs = $db{$b}->{'start'};
>> -       my $val;
>> -
>> -       $val = $as <=> $bs;
>> -       $val = $a cmp $b if $val == 0;
>> -
>> -       return $val;
>> -}
>> -
>>   my $re_sort = 1;
>>   my @sorted_keys;
>>   
>> @@ -670,9 +666,13 @@ if ($correct_durations) {
>>                          next unless exists $db{$key}->{'no-end'};
>>                          last if $pos == $#{$timeline};
>>   
>> -                       # Shift following request to start after the current one
>> +                       # Shift following request to start after the current
>> +                       # one, but only if that wouldn't make it zero duration,
>> +                       # which would indicate notify arrived after context
>> +                       # complete.
>>                          $next_key = ${$timeline}[$pos + 1];
>> -                       if (exists $db{$key}->{'notify'}) {
>> +                       if (exists $db{$key}->{'notify'} and
>> +                           $db{$key}->{'notify'} < $db{$key}->{'end'}) {
>>                                  $db{$next_key}->{'engine-start'} = $db{$next_key}->{'start'};
>>                                  $db{$next_key}->{'start'} = $db{$key}->{'notify'};
>>                                  $re_sort = 1;
>> @@ -750,9 +750,9 @@ foreach my $gid (sort keys %rings) {
>>          # Extract all GPU busy intervals and sort them.
>>          foreach my $key (@sorted_keys) {
>>                  next unless $db{$key}->{'ring'} eq $ring;
>> +               die if $db{$key}->{'start'} > $db{$key}->{'end'};
> 
> Heh, we're out of luck if we want to trace across seqno wraparound.

Yeah, that's another missing thing.

> 
> It makes enough sense,
> Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>

Thanks. Overall the script could use a cleanup so I'll try to find some 
time towards it when this settles.

Regards,

Tvrtko
diff mbox series

Patch

diff --git a/scripts/trace.pl b/scripts/trace.pl
index 18f9f3b18396..95dc3a645e8e 100755
--- a/scripts/trace.pl
+++ b/scripts/trace.pl
@@ -27,7 +27,8 @@  use warnings;
 use 5.010;
 
 my $gid = 0;
-my (%db, %queue, %submit, %notify, %rings, %ctxdb, %ringmap, %reqwait, %ctxtimelines);
+my (%db, %queue, %submit, %notify, %rings, %ctxdb, %ringmap, %reqwait,
+    %ctxtimelines, %ctxengines);
 my @freqs;
 
 my $max_items = 3000;
@@ -66,7 +67,7 @@  Notes:
 			       i915:i915_request_submit, \
 			       i915:i915_request_in, \
 			       i915:i915_request_out, \
-			       i915:intel_engine_notify, \
+			       dma_fence:dma_fence_signaled, \
 			       i915:i915_request_wait_begin, \
 			       i915:i915_request_wait_end \
 			       [command-to-be-profiled]
@@ -161,7 +162,7 @@  sub arg_trace
 		       'i915:i915_request_submit',
 		       'i915:i915_request_in',
 		       'i915:i915_request_out',
-		       'i915:intel_engine_notify',
+		       'dma_fence:dma_fence_signaled',
 		       'i915:i915_request_wait_begin',
 		       'i915:i915_request_wait_end' );
 
@@ -312,13 +313,6 @@  sub db_key
 	return $ring . '/' . $ctx . '/' . $seqno;
 }
 
-sub global_key
-{
-	my ($ring, $seqno) = @_;
-
-	return $ring . '/' . $seqno;
-}
-
 sub sanitize_ctx
 {
 	my ($ctx, $ring) = @_;
@@ -419,6 +413,8 @@  while (<>) {
 		$req{'ring'} = $ring;
 		$req{'seqno'} = $seqno;
 		$req{'ctx'} = $ctx;
+		die if exists $ctxengines{$ctx} and $ctxengines{$ctx} ne $ring;
+		$ctxengines{$ctx} = $ring;
 		$ctxtimelines{$ctx . '/' . $ring} = 1;
 		$req{'name'} = $ctx . '/' . $seqno;
 		$req{'global'} = $tp{'global'};
@@ -429,16 +425,29 @@  while (<>) {
 		$ringmap{$rings{$ring}} = $ring;
 		$db{$key} = \%req;
 	} elsif ($tp_name eq 'i915:i915_request_out:') {
-		my $gkey = global_key($ring, $tp{'global'});
+		my $gkey;
+
+		die unless exists $ctxengines{$ctx};
+
+		$gkey = db_key($ctxengines{$ctx}, $ctx, $seqno);
+
+		if ($tp{'completed?'}) {
+			die unless exists $db{$key};
+			die unless exists $db{$key}->{'start'};
+			die if exists $db{$key}->{'end'};
+
+			$db{$key}->{'end'} = $time;
+			$db{$key}->{'notify'} = $notify{$gkey}
+						if exists $notify{$gkey};
+		} else {
+			delete $db{$key};
+		}
+	} elsif ($tp_name eq 'dma_fence:dma_fence_signaled:') {
+		my $gkey;
 
-		die unless exists $db{$key};
-		die unless exists $db{$key}->{'start'};
-		die if exists $db{$key}->{'end'};
+		die unless exists $ctxengines{$tp{'context'}};
 
-		$db{$key}->{'end'} = $time;
-		$db{$key}->{'notify'} = $notify{$gkey} if exists $notify{$gkey};
-	} elsif ($tp_name eq 'i915:intel_engine_notify:') {
-		my $gkey = global_key($ring, $seqno);
+		$gkey = db_key($ctxengines{$tp{'context'}}, $tp{'context'}, $tp{'seqno'});
 
 		$notify{$gkey} = $time unless exists $notify{$gkey};
 	} elsif ($tp_name eq 'i915:intel_gpu_freq_change:') {
@@ -452,7 +461,7 @@  while (<>) {
 # find the largest seqno to be used for timeline sorting purposes.
 my $max_seqno = 0;
 foreach my $key (keys %db) {
-	my $gkey = global_key($db{$key}->{'ring'}, $db{$key}->{'global'});
+	my $gkey = db_key($db{$key}->{'ring'}, $db{$key}->{'ctx'}, $db{$key}->{'seqno'});
 
 	die unless exists $db{$key}->{'start'};
 
@@ -478,14 +487,13 @@  my $key_count = scalar(keys %db);
 
 my %engine_timelines;
 
-sub sortEngine {
-	my $as = $db{$a}->{'global'};
-	my $bs = $db{$b}->{'global'};
+sub sortStart {
+	my $as = $db{$a}->{'start'};
+	my $bs = $db{$b}->{'start'};
 	my $val;
 
 	$val = $as <=> $bs;
-
-	die if $val == 0;
+	$val = $a cmp $b if $val == 0;
 
 	return $val;
 }
@@ -497,9 +505,7 @@  sub get_engine_timeline {
 	return $engine_timelines{$ring} if exists $engine_timelines{$ring};
 
 	@timeline = grep { $db{$_}->{'ring'} eq $ring } keys %db;
-	# FIXME seqno restart
-	@timeline = sort sortEngine @timeline;
-
+	@timeline = sort sortStart @timeline;
 	$engine_timelines{$ring} = \@timeline;
 
 	return \@timeline;
@@ -561,20 +567,10 @@  foreach my $gid (sort keys %rings) {
 			$db{$key}->{'no-notify'} = 1;
 		}
 		$db{$key}->{'end'} = $end;
+		$db{$key}->{'notify'} = $end if $db{$key}->{'notify'} > $end;
 	}
 }
 
-sub sortStart {
-	my $as = $db{$a}->{'start'};
-	my $bs = $db{$b}->{'start'};
-	my $val;
-
-	$val = $as <=> $bs;
-	$val = $a cmp $b if $val == 0;
-
-	return $val;
-}
-
 my $re_sort = 1;
 my @sorted_keys;
 
@@ -670,9 +666,13 @@  if ($correct_durations) {
 			next unless exists $db{$key}->{'no-end'};
 			last if $pos == $#{$timeline};
 
-			# Shift following request to start after the current one
+			# Shift following request to start after the current
+			# one, but only if that wouldn't make it zero duration,
+			# which would indicate notify arrived after context
+			# complete.
 			$next_key = ${$timeline}[$pos + 1];
-			if (exists $db{$key}->{'notify'}) {
+			if (exists $db{$key}->{'notify'} and
+			    $db{$key}->{'notify'} < $db{$key}->{'end'}) {
 				$db{$next_key}->{'engine-start'} = $db{$next_key}->{'start'};
 				$db{$next_key}->{'start'} = $db{$key}->{'notify'};
 				$re_sort = 1;
@@ -750,9 +750,9 @@  foreach my $gid (sort keys %rings) {
 	# Extract all GPU busy intervals and sort them.
 	foreach my $key (@sorted_keys) {
 		next unless $db{$key}->{'ring'} eq $ring;
+		die if $db{$key}->{'start'} > $db{$key}->{'end'};
 		push @s_, $db{$key}->{'start'};
 		push @e_, $db{$key}->{'end'};
-		die if $db{$key}->{'start'} > $db{$key}->{'end'};
 	}
 
 	die unless $#s_ == $#e_;