diff mbox

[i-g-t,3/4] trace.pl: Fix incomplete request handling

Message ID 20180423095238.29238-4-tvrtko.ursulin@linux.intel.com (mailing list archive)
State New, archived
Headers show

Commit Message

Tvrtko Ursulin April 23, 2018, 9:52 a.m. UTC
From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>

Incomplete requests (no notify, no context complete) have to be corrected
by looking at the engine timeline, and not the sorted-by-start-time view
as was previously used.

Per-engine timelines are generated on demand and cached for later use.

v2: Find end of current context on the engine timeline instead of just
    using the next request for adjusting the incomplete start time.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: John Harrison <John.C.Harrison@intel.com>
---
 scripts/trace.pl | 86 ++++++++++++++++++++++++++++++++++++++++++--------------
 1 file changed, 65 insertions(+), 21 deletions(-)

Comments

John Harrison May 7, 2018, 11:58 p.m. UTC | #1
On 4/23/2018 2:52 AM, Tvrtko Ursulin wrote:
> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>
> Incomplete requests (no notify, no context complete) have to be corrected
> by looking at the engine timeline, and not the sorted-by-start-time view
> as was previously used.
>
> Per-engine timelines are generated on demand and cached for later use.
>
> v2: Find end of current context on the engine timeline instead of just
>      using the next request for adjusting the incomplete start time.
>
> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: John Harrison <John.C.Harrison@intel.com>
> ---
>   scripts/trace.pl | 86 ++++++++++++++++++++++++++++++++++++++++++--------------
>   1 file changed, 65 insertions(+), 21 deletions(-)
>
> diff --git a/scripts/trace.pl b/scripts/trace.pl
> index eb5a36b91a5c..b48f43225fc1 100755
> --- a/scripts/trace.pl
> +++ b/scripts/trace.pl
> @@ -26,6 +26,8 @@ use strict;
>   use warnings;
>   use 5.010;
>   
> +use List::Util;
> +
>   my $gid = 0;
>   my (%db, %queue, %submit, %notify, %rings, %ctxdb, %ringmap, %reqwait);
>   my @freqs;
> @@ -516,29 +518,71 @@ foreach my $key (keys %db) {
>   	}
>   }
>   
> -# Fix up incompletes
>   my $key_count = scalar(keys %db);
> -foreach my $key (keys %db) {
> -	next unless exists $db{$key}->{'incomplete'};
>   
> -	# End the incomplete batch at the time next one starts
> -	my $ring = $db{$key}->{'ring'};
> -	my $ctx = $db{$key}->{'ctx'};
> -	my $seqno = $db{$key}->{'seqno'};
> -	my $next_key;
> -	my $i = 1;
> +my %engine_timelines;
> +
> +sub sortEngine {
> +	my $as = $db{$a}->{'global'};
> +	my $bs = $db{$b}->{'global'};
> +	my $val;
> +
> +	$val = $as <=> $bs;
> +
> +	die if $val == 0;
> +
> +	return $val;
> +}
> +
> +sub get_engine_timeline {
> +	my ($ring) = @_;
> +	my @timeline;
> +
> +	return $engine_timelines{$ring} if exists $engine_timelines{$ring};
> +
> +	@timeline = grep { $db{$_}->{'ring'} == $ring } keys %db;
> +	# FIXME seqno restart
> +	@timeline = sort sortEngine @timeline;
> +
> +	$engine_timelines{$ring} = \@timeline;
> +
> +	return \@timeline;
> +}
> +
> +# Fix up incompletes by ending them when the last request of a coalesced group
> +# ends. Start by filtering out the incomplete requests.
> +my @incompletes = sort grep { exists $db{$_}->{'incomplete'} } keys %db;
> +
> +foreach my $key (@incompletes) {
> +	my $timeline;
> +	my $last_key;
>   	my $end;
> +	my $i;
>   
> -	do {
> -		$next_key = db_key($ring, $ctx, $seqno + $i);
> -		$i++;
> -	} until ((exists $db{$next_key} and not exists $db{$next_key}->{'incomplete'})
> -		 or $i > $key_count);  # ugly stop hack
> +	# Find the next request on the engine timeline.
> +	$timeline = get_engine_timeline($db{$key}->{'ring'});
> +	$i = List::Util::first { ${$timeline}[$_] eq $key } 0..$#{$timeline};
This line in particular massively slows the process down! With one of my 
longer trace files, the incomplete fix-up step took 30 seconds prior to 
this. After this patch, it took 11205 seconds! Just shy of four hours!

Caching the start point of the search greatly speeds things up but it is 
still a lot slower than previous. With the following, it is down to 4068 
seconds, or a little over one hour.
+my $lastIndex = 0;
...
-       $i = List::Util::first { ${$timeline}[$_] eq $key } 
0..$#{$timeline};
+       $i = List::Util::first { ${$timeline}[$_] eq $key } 
${lastIndex}..$#{$timeline};
+       if( !defined($i) ) {
+               $i = List::Util::first { ${$timeline}[$_] eq $key } 
0..$#{$timeline};
+               defined($i) || die "Failed to find '$key'!\n";
+       }
+       $lastIndex = $i;

If I sort the '@incompletes' array numerically by seqno rather than as a 
string sort, then I get rid of all the first level match failures 
(except when rolling from one ring to the next) and the time drops to 
1819 seconds or half an hour.

+sub sortKeys {
+       my( $aR, $aC, $aS ) = split( '/', $a );
+       my( $bR, $bC, $bS ) = split( '/', $b );
+       my $val;
+
+       $val = $aR <=> $bR;
+       return $val if $val != 0;
+
+       $val = $aC <=> $bC;
+       return $val if $val != 0;
+
+       return $aS <=> $bS;
+}
+

-my @incompletes = sort grep { exists $db{$_}->{'incomplete'} } keys %db;
+my @incompletes = sort sortKeys grep { exists $db{$_}->{'incomplete'} } 
keys %db;



> +
> +	while ($i < $#{$timeline}) {
> +		my $next;
> +
> +		$i = $i + 1;
$i++; ?

> +		$next = ${$timeline}[$i];
> +
> +		next if exists $db{$next}->{'incomplete'};
>   
> -	if (exists $db{$next_key}) {
> -		$end = $db{$next_key}->{'end'};
> +		$last_key = $next;
> +		last;
> +	}
> +
> +	if (defined $last_key) {
> +		if ($db{$key}->{'ctx'} eq $db{$last_key}->{'ctx'}) {
> +			$end = $db{$last_key}->{'end'};
> +		} else {
> +			$end = $db{$last_key}->{'start'};
> +		}
>   	} else {
> -		# No info at all, fake it:
> +		# No next submission, fake it.
>   		$end = $db{$key}->{'start'} + 999;
>   	}
>   
> @@ -546,10 +590,6 @@ foreach my $key (keys %db) {
>   	$db{$key}->{'end'} = $end;
>   }
>   
> -# GPU time accounting
> -my (%running, %runnable, %queued, %batch_avg, %batch_total_avg, %batch_count);
> -my (%submit_avg, %execute_avg, %ctxsave_avg);
> -
>   sub sortStart {
>   	my $as = $db{$a}->{'start'};
>   	my $bs = $db{$b}->{'start'};
> @@ -597,6 +637,10 @@ foreach my $key (@sorted_keys) {
>   
>   @sorted_keys = sort sortStart keys %db if $re_sort;
>   
> +# GPU time accounting
> +my (%running, %runnable, %queued, %batch_avg, %batch_total_avg, %batch_count);
> +my (%submit_avg, %execute_avg, %ctxsave_avg);
> +

This is just a prettification type change? Moving declarations to be 
more local to their usage? It doesn't seem to be related to the above 
changes or explicitly mentioned in the commit message.

>   my $last_ts = 0;
>   my $first_ts;
>   my ($min_ctx, $max_ctx);
Tvrtko Ursulin May 8, 2018, 12:07 p.m. UTC | #2
On 08/05/2018 00:58, John Harrison wrote:
> On 4/23/2018 2:52 AM, Tvrtko Ursulin wrote:
>> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>
>> Incomplete requests (no notify, no context complete) have to be corrected
>> by looking at the engine timeline, and not the sorted-by-start-time view
>> as was previously used.
>>
>> Per-engine timelines are generated on demand and cached for later use.
>>
>> v2: Find end of current context on the engine timeline instead of just
>>      using the next request for adjusting the incomplete start time.
>>
>> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>> Cc: John Harrison <John.C.Harrison@intel.com>
>> ---
>>   scripts/trace.pl | 86 
>> ++++++++++++++++++++++++++++++++++++++++++--------------
>>   1 file changed, 65 insertions(+), 21 deletions(-)
>>
>> diff --git a/scripts/trace.pl b/scripts/trace.pl
>> index eb5a36b91a5c..b48f43225fc1 100755
>> --- a/scripts/trace.pl
>> +++ b/scripts/trace.pl
>> @@ -26,6 +26,8 @@ use strict;
>>   use warnings;
>>   use 5.010;
>> +use List::Util;
>> +
>>   my $gid = 0;
>>   my (%db, %queue, %submit, %notify, %rings, %ctxdb, %ringmap, %reqwait);
>>   my @freqs;
>> @@ -516,29 +518,71 @@ foreach my $key (keys %db) {
>>       }
>>   }
>> -# Fix up incompletes
>>   my $key_count = scalar(keys %db);
>> -foreach my $key (keys %db) {
>> -    next unless exists $db{$key}->{'incomplete'};
>> -    # End the incomplete batch at the time next one starts
>> -    my $ring = $db{$key}->{'ring'};
>> -    my $ctx = $db{$key}->{'ctx'};
>> -    my $seqno = $db{$key}->{'seqno'};
>> -    my $next_key;
>> -    my $i = 1;
>> +my %engine_timelines;
>> +
>> +sub sortEngine {
>> +    my $as = $db{$a}->{'global'};
>> +    my $bs = $db{$b}->{'global'};
>> +    my $val;
>> +
>> +    $val = $as <=> $bs;
>> +
>> +    die if $val == 0;
>> +
>> +    return $val;
>> +}
>> +
>> +sub get_engine_timeline {
>> +    my ($ring) = @_;
>> +    my @timeline;
>> +
>> +    return $engine_timelines{$ring} if exists $engine_timelines{$ring};
>> +
>> +    @timeline = grep { $db{$_}->{'ring'} == $ring } keys %db;
>> +    # FIXME seqno restart
>> +    @timeline = sort sortEngine @timeline;
>> +
>> +    $engine_timelines{$ring} = \@timeline;
>> +
>> +    return \@timeline;
>> +}
>> +
>> +# Fix up incompletes by ending them when the last request of a 
>> coalesced group
>> +# ends. Start by filtering out the incomplete requests.
>> +my @incompletes = sort grep { exists $db{$_}->{'incomplete'} } keys %db;
>> +
>> +foreach my $key (@incompletes) {
>> +    my $timeline;
>> +    my $last_key;
>>       my $end;
>> +    my $i;
>> -    do {
>> -        $next_key = db_key($ring, $ctx, $seqno + $i);
>> -        $i++;
>> -    } until ((exists $db{$next_key} and not exists 
>> $db{$next_key}->{'incomplete'})
>> -         or $i > $key_count);  # ugly stop hack
>> +    # Find the next request on the engine timeline.
>> +    $timeline = get_engine_timeline($db{$key}->{'ring'});
>> +    $i = List::Util::first { ${$timeline}[$_] eq $key } 
>> 0..$#{$timeline};
> This line in particular massively slows the process down! With one of my 
> longer trace files, the incomplete fix-up step took 30 seconds prior to 
> this. After this patch, it took 11205 seconds! Just shy of four hours!

Gee, awful. :(

> Caching the start point of the search greatly speeds things up but it is 
> still a lot slower than previous. With the following, it is down to 4068 
> seconds, or a little over one hour.
> +my $lastIndex = 0;
> ...
> -       $i = List::Util::first { ${$timeline}[$_] eq $key } 
> 0..$#{$timeline};
> +       $i = List::Util::first { ${$timeline}[$_] eq $key } 
> ${lastIndex}..$#{$timeline};
> +       if( !defined($i) ) {
> +               $i = List::Util::first { ${$timeline}[$_] eq $key } 
> 0..$#{$timeline};
> +               defined($i) || die "Failed to find '$key'!\n";
> +       }
> +       $lastIndex = $i;

This definitely makes sense and I'll take it.

> If I sort the '@incompletes' array numerically by seqno rather than as a 
> string sort, then I get rid of all the first level match failures 
> (except when rolling from one ring to the next) and the time drops to 
> 1819 seconds or half an hour.
> 
> +sub sortKeys {
> +       my( $aR, $aC, $aS ) = split( '/', $a );
> +       my( $bR, $bC, $bS ) = split( '/', $b );
> +       my $val;
> +
> +       $val = $aR <=> $bR;
> +       return $val if $val != 0;
> +
> +       $val = $aC <=> $bC;
> +       return $val if $val != 0;
> +
> +       return $aS <=> $bS;
> +}

For here I am thinking to try processing incompletes per-ring instead. 
That should limit the walks and enable some more numerical comparisons.

I'll also try adding cachine to the loop after the List::Util::first 
lookup to avoid repeatedly looking for the next complete, in cases when 
there are a lot of bunched up incompletes.

> +
> 
> -my @incompletes = sort grep { exists $db{$_}->{'incomplete'} } keys %db;
> +my @incompletes = sort sortKeys grep { exists $db{$_}->{'incomplete'} } 
> keys %db;
> 
> 
> 
>> +
>> +    while ($i < $#{$timeline}) {
>> +        my $next;
>> +
>> +        $i = $i + 1;
> $i++; ?

Yeah.. there was a language which didn't support increments and my brain 
got confused that's Perl, when it is actually Python.. :)

> 
>> +        $next = ${$timeline}[$i];
>> +
>> +        next if exists $db{$next}->{'incomplete'};
>> -    if (exists $db{$next_key}) {
>> -        $end = $db{$next_key}->{'end'};
>> +        $last_key = $next;
>> +        last;
>> +    }
>> +
>> +    if (defined $last_key) {
>> +        if ($db{$key}->{'ctx'} eq $db{$last_key}->{'ctx'}) {
>> +            $end = $db{$last_key}->{'end'};
>> +        } else {
>> +            $end = $db{$last_key}->{'start'};
>> +        }
>>       } else {
>> -        # No info at all, fake it:
>> +        # No next submission, fake it.
>>           $end = $db{$key}->{'start'} + 999;
>>       }
>> @@ -546,10 +590,6 @@ foreach my $key (keys %db) {
>>       $db{$key}->{'end'} = $end;
>>   }
>> -# GPU time accounting
>> -my (%running, %runnable, %queued, %batch_avg, %batch_total_avg, 
>> %batch_count);
>> -my (%submit_avg, %execute_avg, %ctxsave_avg);
>> -
>>   sub sortStart {
>>       my $as = $db{$a}->{'start'};
>>       my $bs = $db{$b}->{'start'};
>> @@ -597,6 +637,10 @@ foreach my $key (@sorted_keys) {
>>   @sorted_keys = sort sortStart keys %db if $re_sort;
>> +# GPU time accounting
>> +my (%running, %runnable, %queued, %batch_avg, %batch_total_avg, 
>> %batch_count);
>> +my (%submit_avg, %execute_avg, %ctxsave_avg);
>> +
> 
> This is just a prettification type change? Moving declarations to be 
> more local to their usage? It doesn't seem to be related to the above 
> changes or explicitly mentioned in the commit message.

Yes, just moving closer to use site.

Regards,

Tvrtko
diff mbox

Patch

diff --git a/scripts/trace.pl b/scripts/trace.pl
index eb5a36b91a5c..b48f43225fc1 100755
--- a/scripts/trace.pl
+++ b/scripts/trace.pl
@@ -26,6 +26,8 @@  use strict;
 use warnings;
 use 5.010;
 
+use List::Util;
+
 my $gid = 0;
 my (%db, %queue, %submit, %notify, %rings, %ctxdb, %ringmap, %reqwait);
 my @freqs;
@@ -516,29 +518,71 @@  foreach my $key (keys %db) {
 	}
 }
 
-# Fix up incompletes
 my $key_count = scalar(keys %db);
-foreach my $key (keys %db) {
-	next unless exists $db{$key}->{'incomplete'};
 
-	# End the incomplete batch at the time next one starts
-	my $ring = $db{$key}->{'ring'};
-	my $ctx = $db{$key}->{'ctx'};
-	my $seqno = $db{$key}->{'seqno'};
-	my $next_key;
-	my $i = 1;
+my %engine_timelines;
+
+sub sortEngine {
+	my $as = $db{$a}->{'global'};
+	my $bs = $db{$b}->{'global'};
+	my $val;
+
+	$val = $as <=> $bs;
+
+	die if $val == 0;
+
+	return $val;
+}
+
+sub get_engine_timeline {
+	my ($ring) = @_;
+	my @timeline;
+
+	return $engine_timelines{$ring} if exists $engine_timelines{$ring};
+
+	@timeline = grep { $db{$_}->{'ring'} == $ring } keys %db;
+	# FIXME seqno restart
+	@timeline = sort sortEngine @timeline;
+
+	$engine_timelines{$ring} = \@timeline;
+
+	return \@timeline;
+}
+
+# Fix up incompletes by ending them when the last request of a coalesced group
+# ends. Start by filtering out the incomplete requests.
+my @incompletes = sort grep { exists $db{$_}->{'incomplete'} } keys %db;
+
+foreach my $key (@incompletes) {
+	my $timeline;
+	my $last_key;
 	my $end;
+	my $i;
 
-	do {
-		$next_key = db_key($ring, $ctx, $seqno + $i);
-		$i++;
-	} until ((exists $db{$next_key} and not exists $db{$next_key}->{'incomplete'})
-		 or $i > $key_count);  # ugly stop hack
+	# Find the next request on the engine timeline.
+	$timeline = get_engine_timeline($db{$key}->{'ring'});
+	$i = List::Util::first { ${$timeline}[$_] eq $key } 0..$#{$timeline};
+
+	while ($i < $#{$timeline}) {
+		my $next;
+
+		$i = $i + 1;
+		$next = ${$timeline}[$i];
+
+		next if exists $db{$next}->{'incomplete'};
 
-	if (exists $db{$next_key}) {
-		$end = $db{$next_key}->{'end'};
+		$last_key = $next;
+		last;
+	}
+
+	if (defined $last_key) {
+		if ($db{$key}->{'ctx'} eq $db{$last_key}->{'ctx'}) {
+			$end = $db{$last_key}->{'end'};
+		} else {
+			$end = $db{$last_key}->{'start'};
+		}
 	} else {
-		# No info at all, fake it:
+		# No next submission, fake it.
 		$end = $db{$key}->{'start'} + 999;
 	}
 
@@ -546,10 +590,6 @@  foreach my $key (keys %db) {
 	$db{$key}->{'end'} = $end;
 }
 
-# GPU time accounting
-my (%running, %runnable, %queued, %batch_avg, %batch_total_avg, %batch_count);
-my (%submit_avg, %execute_avg, %ctxsave_avg);
-
 sub sortStart {
 	my $as = $db{$a}->{'start'};
 	my $bs = $db{$b}->{'start'};
@@ -597,6 +637,10 @@  foreach my $key (@sorted_keys) {
 
 @sorted_keys = sort sortStart keys %db if $re_sort;
 
+# GPU time accounting
+my (%running, %runnable, %queued, %batch_avg, %batch_total_avg, %batch_count);
+my (%submit_avg, %execute_avg, %ctxsave_avg);
+
 my $last_ts = 0;
 my $first_ts;
 my ($min_ctx, $max_ctx);