diff mbox

[OSSTEST,2/3] mg-bisect-activity-*: New rather ad-hoc scripts for wall clock timing analysys

Message ID 1474908154-25908-2-git-send-email-ian.jackson@eu.citrix.com (mailing list archive)
State New, archived
Headers show

Commit Message

Ian Jackson Sept. 26, 2016, 4:42 p.m. UTC
Signed-off-by: Ian Jackson <ian.jackson@eu.citrix.com>
---
 .gitignore                        |   2 +
 README.bisect-wallclock-analysis  |  85 ++++++++++++++++++
 mg-bisect-activity-collect-emails |  55 ++++++++++++
 mg-bisect-activity-collect-tmpdir |  94 ++++++++++++++++++++
 mg-bisect-activity-create-db      |  17 ++++
 mg-bisect-activity-dump-periods   | 181 ++++++++++++++++++++++++++++++++++++++
 6 files changed, 434 insertions(+)
 create mode 100644 README.bisect-wallclock-analysis
 create mode 100755 mg-bisect-activity-collect-emails
 create mode 100755 mg-bisect-activity-collect-tmpdir
 create mode 100755 mg-bisect-activity-create-db
 create mode 100755 mg-bisect-activity-dump-periods
diff mbox

Patch

diff --git a/.gitignore b/.gitignore
index 69f7743..425506b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -33,3 +33,5 @@  images
 diskusage-[A-Z]*.html
 diskusage-[A-Z]*.png
 diskusage-[A-Z]*.pdf
+bisect-timing.db
+*.tmp
diff --git a/README.bisect-wallclock-analysis b/README.bisect-wallclock-analysis
new file mode 100644
index 0000000..856ee2c
--- /dev/null
+++ b/README.bisect-wallclock-analysis
@@ -0,0 +1,85 @@ 
+General approach
+----------------
+
+ * Decide what branch to analyise.
+
+ * Save all the relevant emails generated by osstest (including the
+   main flight reports and the bisection reports) to an mbox called
+   eg bisect-data/report-emails
+
+ * Make a copy (with timestamps!) the tmp/ directory of the relevant
+   tree on the controller, eg in bisect-data/tmp with something like
+ rsync -aH osstest.test-lab:/home/osstest/bisects/for-xen-4.5-testing.git/tmp  bisect-data/.
+
+ * Run
+     ./mg-bisect-activity-create-db
+     ./mg-bisect-activity-collect-emails <bisect-data/report-emails
+     ./mg-bisect-activity-collect-tmpdir bisect-data/tmp
+   This will result in a sqlite3 datbase called bisect-timing.db
+
+ * Run ./mg-bisect-activity-dump-periods
+   and deal with any complaints "missing" and "cyclic" (see below).
+
+ * Decide what bisection to look at and find in the saved mbox:
+    A  The main report flight
+    B  The last subsequent email relating to any previous bisection
+        attempt on the same branch (ie of a different job)
+    C  The final bisection email for the bisection of interest6
+
+   Convert the Date: headers to time_ts with
+      TZ=UTC date +%s -d 'Tue, 20 Sep 2016 00:13:21 GMT'
+   or whatever.
+
+   Then run some or all of 
+      ./mg-bisect-activity-dump-periods A B
+      ./mg-bisect-activity-dump-periods B C
+      ./mg-bisect-activity-dump-periods A C
+   as you like.  (If there is no C, leave it off.)
+
+
+Event ordering
+--------------
+
+mg-bisect-activity-dump-periods produces a report which identifies
+"periods" in the data, classifies those "periods", and adds up the
+elapsed time for each class.
+
+A period is the time between two events.  An event is something
+evident from the mbox or the tmp/ directory.  Events are shown by
+mg-bisect-activity-dump-periods as "some - series - of - words".
+
+They do not contain " | ": this is used as a separtor.
+
+The raw data collection is done only to second resolution, so often
+the data contains events which are "simultaneous".
+
+For events A and B both occurring at timestamp T
+mg-bisect-activity-dump-periods looks at the data to see if it can
+figure out what order those events (A and B) "usually" come in.
+
+It will report "missing" for two events if that can't be determined.
+It will report "cyclic" if the data seems to suggest a cycle (ie two
+or more events on which relation "data suggests P occurs before Q"
+forms a cyclic graph).
+
+In each case the answer is to add to the __DATA__ section of
+mg-bisect-activity-dump-periods a line specifying "X | Y" where X is
+an event which you would expect (from your knowledge of the system)
+would generally precede Y.  This will be used both to remove incorrect
+edges (sorting out cycles) and to add missing edges.
+
+If there is a whole category of such, consider whether to do something
+programmating (like "m/^flight - step start ..." which is there
+already).
+
+
+Period squashing
+----------------
+
+The final part of __DATA_ is a list of regexps which are to match
+periods.  They are to match on
+
+  event | event
+
+and whenever one matches, the matching period is replaced by the text
+of the regexp.  This allows boringly-similar periods to be aggregated.
diff --git a/mg-bisect-activity-collect-emails b/mg-bisect-activity-collect-emails
new file mode 100755
index 0000000..e459aa8
--- /dev/null
+++ b/mg-bisect-activity-collect-emails
@@ -0,0 +1,55 @@ 
+#!/bin/bash
+
+set -e
+set -o pipefail
+set -o posix
+
+# usage: ./mg-bisect-activity-collect-emails <MBOX 'FROMPATTERN'
+
+OSSTEST_BACEF="$1" formail -c -s perl -we '
+	use strict;
+	our %h;
+	while (<>) {
+		chomp;
+		last if m/^$/;
+		if (m/^([^:]+)\:\s/) {
+	        	$h{lc $1} = $'\'';
+		}
+	}
+	while (<>) { }
+	die unless $h{subject};
+	my $date = $h{"x-original-date"} || $h{date};
+	die unless $date;
+	exit 0 unless $h{from} =~ m/$ENV{OSSTEST_BACEF}/o;
+	$_ = $h{subject} ;
+	s/\s+/ /g;
+	if (m/^\[[^][]+ bisection\](?: \d+\:)? (\w+) /) {
+		print "$1";
+	} elsif (m/^\[[^][ ]+ test\] \d+\: /) {
+		print "MAINREPORT";
+	} else {
+		chomp;
+		warn "$_\n";
+		exit 0;
+	}
+	print "\t";
+	flush STDOUT or die $!;
+	system(qw(date -d), $date, "+\%s") and die "$? $!";
+' | perl -we '
+	use strict;
+	use DBI;
+	#use Data::Dumper;
+	my $dbh = DBI->connect("dbi:SQLite:dbname=bisect-timing.db","","",
+			       { RaiseError => 1, PrintError => 1 });
+	$dbh->begin_work();
+	my $stmt = $dbh->prepare(
+		"INSERT INTO events (evt,ts,cat) VALUES (?,?,'\''email'\'')"
+	);
+	while (<>) {
+	      chomp or die;
+	      my @v = split /\t/;
+	      #print Dumper($_, \@v);
+	      $stmt->execute(@v);
+	}
+	$dbh->commit();
+'
diff --git a/mg-bisect-activity-collect-tmpdir b/mg-bisect-activity-collect-tmpdir
new file mode 100755
index 0000000..1ba92bc
--- /dev/null
+++ b/mg-bisect-activity-collect-tmpdir
@@ -0,0 +1,94 @@ 
+#!/usr/bin/perl -w
+#
+# usage: ./mg-bisect-activity-collect-tmpdir PATH/TO/tmp
+
+use strict;
+use DBI;
+
+my $dbh = DBI->connect("dbi:SQLite:dbname=bisect-timing.db","","",
+		       { RaiseError => 1, PrintError => 1 });
+
+$dbh->begin_work();
+
+my $stmt = $dbh->prepare("INSERT INTO events (ts,cat,evt) VALUES (?,?,?)");
+
+my ($inputdir) = @ARGV;
+
+sub date2ts ($) {
+    my ($date) = @_;
+    open S, "-|", qw(date -d), $date, qw(+%s) or die $!;
+    my $ts = <S>;
+    close S;
+    chomp $ts or die "$? $!";
+    $ts;
+}
+
+foreach my $f (<$inputdir/*>) {
+    $_ = $f; s#.*/##;
+
+    my $mtime = sub {
+	my ($evt,$cat) = @_;
+
+	lstat $f or die $!;
+	die unless -f _;
+	my $ts = ((stat _)[9]);
+
+	$stmt->execute($ts, ($cat // 'mtime'), $evt);
+    };
+
+    my @ec;
+    if (m/^t\./) {
+    } elsif (m/^\d+\.
+	       ( transcript
+	       | bisection-summary
+	       | bisection-report
+	       | email\.sent
+	       )$/x) {
+	$mtime->($1);
+    } elsif (m/^bisection-start-stamp$/) {
+	$mtime->($&, 'unique');
+    } elsif (m/^bisection-start-stamp\.new$/) {
+    } elsif (m/^cr-for-branches.log[-.0-9a-z]*$/) {
+	if (m/\.gz$/) {
+	    open L, "zcat $f |" or die $!;
+	} else {
+	    open L, "<", $f or die $!;
+	}
+	my $flightfirst;
+	while (<L>) {
+	    if (m/^\[(\d\d\d\d\-\d\d\-\d\d \d\d:\d\d:\d\d UTC) \d+\] /) {
+		my $ts = date2ts($1);
+		if (m/\.\.\.$/) {
+		    $stmt->execute($ts, 'crlog', 'begin');
+		} elsif (m/ status=\d+\s*$/) {
+		    $stmt->execute($ts, 'crlog', 'end');
+		} else {
+		    $stmt->execute($ts, 'crlog', 'unknown');
+		}
+	    } elsif (m/^export OSSTEST_RESOURCE_WAITSTART=(\d+)$/) {
+		$stmt->execute($1, 'waitstart', 'export');
+	    } elsif (s/^(\d\d\d\d\-\d\d\-\d\d \d\d:\d\d:\d\d Z )//) {
+		my $ts = date2ts($1);
+		if (m/^flight \d+ / && !($flightfirst++)) {
+		    $stmt->execute($ts, 'flight', 'flight start');
+		} elsif (m/^flight \d+ spawning/) {
+		    $stmt->execute($ts, 'flight','job start');
+		} elsif (m/^flight \d+ reaped/) {
+		    $stmt->execute($ts, 'flight','job finish');
+		} elsif (m/^flight \d+ (ended|ending) finished\.$/) {
+		    $stmt->execute($ts, 'flight',"flight $1");
+		} elsif (m/^\[(\w+)[^][]+\] starting .* (\S+)$/) {
+		    $stmt->execute($ts, 'flight', "step start $1 $2");
+		} elsif (m/^\[(\w+)[^][]+\] finished/) {
+		    $stmt->execute($ts, 'flight', "step finish $1");
+		}
+	    }
+	}
+    } elsif (m/^bisected\./) {
+	$mtime->('bisected');
+    } else {
+	warn $f;
+    }
+}
+
+$dbh->commit();
diff --git a/mg-bisect-activity-create-db b/mg-bisect-activity-create-db
new file mode 100755
index 0000000..7e54f52
--- /dev/null
+++ b/mg-bisect-activity-create-db
@@ -0,0 +1,17 @@ 
+#!/bin/bash
+set -e
+set -o pipefail
+set -o posix
+
+# usage: ./mg-bisect-activity-create-db
+# overwrites bisect-timing.db unconditionally
+
+rm -f bisect-timing.db
+sqlite3 bisect-timing.db -batch </dev/null -cmd '
+	CREATE TABLE events (
+       	      ts INTEGER NOT NULL,
+	      cat TEXT NOT NULL,
+	      evt TEXT NOT NULL
+	);
+	CREATE INDEX events_ts_ix ON events (ts);
+'
diff --git a/mg-bisect-activity-dump-periods b/mg-bisect-activity-dump-periods
new file mode 100755
index 0000000..0c04720
--- /dev/null
+++ b/mg-bisect-activity-dump-periods
@@ -0,0 +1,181 @@ 
+#!/usr/bin/perl -w
+#
+# usage: ./mg-bisect-activity-collect-tmpdir PATH/TO/tmp
+
+use strict;
+use DBI;
+use Data::Dumper;
+use Graph::Directed;
+
+my ($begin,$end) = @ARGV;
+
+my $dbh = DBI->connect("dbi:SQLite:dbname=bisect-timing.db","","",
+		       { RaiseError => 1, PrintError => 1, ReadOnly => 1 });
+
+$dbh->begin_work();
+
+my $exclude_cond = <<END;
+       (
+          (cat = 'email' AND evt = 'MAINREPORT')
+       OR (cat = 'unique')
+       )
+END
+
+my $time_cond = '1=1';
+$time_cond .= " AND ts >= $begin" if defined $begin;
+$time_cond .= " AND ts <= $end"   if defined $end;
+$time_cond = "($time_cond)";
+
+my $tq = $dbh->prepare(<<END);
+    SELECT DISTINCT ts
+      FROM events
+     WHERE NOT $exclude_cond
+       AND $time_cond
+  ORDER BY ts ASC
+END
+
+my $eq = $dbh->prepare(<<END);
+    SELECT DISTINCT (cat || ' - ' || evt)
+      FROM events
+     WHERE ts = ?
+       AND NOT $exclude_cond
+  ORDER BY cat, evt
+END
+
+our %evts;
+our %thisorder;
+
+sub getevts ($) {
+    my ($ts) = @_;
+    $eq->execute($ts);
+    return map { $_->[0] } @{ $eq->fetchall_arrayref() };
+}
+
+sub evlk ($) { join ' | ', @{$_[0]} };
+
+$tq->execute();
+my $lastevts;
+while (my ($ts) = $tq->fetchrow_array()) {
+    my @evts = getevts($ts);
+
+    $evts{$_}++ foreach @evts;
+
+    if ($lastevts) {
+	if (@evts==1 && @$lastevts==1) {
+	    my @k = (@$lastevts, @evts);
+	    $thisorder{join "\t", @k}++;
+	}
+    }
+    $lastevts = \@evts;
+}
+
+sub forceorder ($$) {
+    my ($e,$f) = @_;
+    $thisorder{ join "\t", $e, $f }++;
+    delete $thisorder{ join "\t", $f, $e };
+}
+
+while (<DATA>) {
+    s/\s+$//;
+    next unless m/\S/;
+    last if m/^-/;
+    m/ \| / or die "$_ ?";
+    forceorder($`, $');
+}
+
+foreach (sort keys %evts) {
+    if (m/^flight - step start (\w+) /) {
+	forceorder($_, "flight - step finish $1");
+    }
+}
+
+our @squashes;
+
+while (<DATA>) {
+    s/\s+$//;
+    next unless m/\S/;
+    push @squashes, $_;
+}
+
+#print Dumper(\%thisorder);
+our %periods;
+
+$tq->execute();
+my $lastevt;
+my $lastts;
+while (my ($ts) = $tq->fetchrow_array()) {
+    my @evts = getevts($ts);
+    if (@evts > 1) {
+	my $g = new Graph::Directed;
+	foreach my $e (@evts) {
+	    $g->add_vertex($e);
+	    foreach my $f (@evts) {
+		next if $e eq $f;
+		next unless $thisorder{ join "\t", $e, $f };
+		$g->add_edge($e, $f);
+	    }
+	}
+	if ($g->is_cyclic) {
+	    my @cycle = $g->find_a_cycle;
+	    die "$ts cyclic ".(join " | ", @cycle);
+	}
+	$g = $g->transitive_closure;
+	my $exp_edges = (@evts/2 * (@evts-1));
+	if ($g->edges != $exp_edges) {
+	    foreach my $e (@evts) {
+		foreach my $f (@evts) {
+		    next if $e eq $f;
+		    next if $g->has_edge($e,$f);
+		    next if $g->has_edge($f,$e);
+		    die "$ts missing $e | $f";
+		}
+	    }
+	    die "$ts ".Dumper(\@evts, $g)." ?" if $@;
+	}
+	@evts = sort { $g->has_edge($a,$b) ? +1 : -1 } @evts;
+    }
+    foreach my $evt (@evts) {
+        $evt =~ s/^$_$/$_/ foreach (@squashes);
+	if (defined $lastevt) {
+	    $periods{"$lastevt | $evt"} += ($ts - $lastts);
+	}
+	$lastevt = $evt;
+	$lastts = $ts;
+    }
+}
+
+my $total = 0;
+$total += $_ foreach values %periods;
+
+sub prperiod () {
+    printf "%5.2f%% %10d %s\n", 100.0*$periods{$_}/$total, $periods{$_}, $_;
+}
+
+prperiod() foreach sort keys %periods;
+
+print "\n";
+
+prperiod() foreach sort { $periods{$b} <=> $periods{$a} } keys %periods;
+
+$dbh->rollback();
+
+# Two lots of stuff:
+#   - orderings of possibly-simultaenous events
+#     (see README.bisect-wallclock-analysis)
+#   - regexps for squashing periods; each entry in the list
+#     replaces all matching periods with a copy of the regexp
+__DATA__
+email - tested | email - testing
+mtime - email.sent | email - testing
+mtime - bisection-summary | mtime - bisection-report
+flight - flight ended | mtime - transcript
+flight - step finish build | flight - job finish
+flight - flight start | flight - job start
+mtime - bisection-report | mtime - bisected
+email - unreproducible | crlog - end
+email - complete | crlog - end
+--
+flight - step start build host-(?:install|build-prep)
+flight - step start build .*-build
+flight - step start test capture-logs.*
+flight - step start test (?!capture|host-install|hosts-allocate).*