@@ -33,3 +33,5 @@ images
diskusage-[A-Z]*.html
diskusage-[A-Z]*.png
diskusage-[A-Z]*.pdf
+bisect-timing.db
+*.tmp
new file mode 100644
@@ -0,0 +1,85 @@
+General approach
+----------------
+
+ * Decide what branch to analyise.
+
+ * Save all the relevant emails generated by osstest (including the
+ main flight reports and the bisection reports) to an mbox called
+ eg bisect-data/report-emails
+
+ * Make a copy (with timestamps!) the tmp/ directory of the relevant
+ tree on the controller, eg in bisect-data/tmp with something like
+ rsync -aH osstest.test-lab:/home/osstest/bisects/for-xen-4.5-testing.git/tmp bisect-data/.
+
+ * Run
+ ./mg-bisect-activity-create-db
+ ./mg-bisect-activity-collect-emails <bisect-data/report-emails
+ ./mg-bisect-activity-collect-tmpdir bisect-data/tmp
+ This will result in a sqlite3 datbase called bisect-timing.db
+
+ * Run ./mg-bisect-activity-dump-periods
+ and deal with any complaints "missing" and "cyclic" (see below).
+
+ * Decide what bisection to look at and find in the saved mbox:
+ A The main report flight
+ B The last subsequent email relating to any previous bisection
+ attempt on the same branch (ie of a different job)
+ C The final bisection email for the bisection of interest6
+
+ Convert the Date: headers to time_ts with
+ TZ=UTC date +%s -d 'Tue, 20 Sep 2016 00:13:21 GMT'
+ or whatever.
+
+ Then run some or all of
+ ./mg-bisect-activity-dump-periods A B
+ ./mg-bisect-activity-dump-periods B C
+ ./mg-bisect-activity-dump-periods A C
+ as you like. (If there is no C, leave it off.)
+
+
+Event ordering
+--------------
+
+mg-bisect-activity-dump-periods produces a report which identifies
+"periods" in the data, classifies those "periods", and adds up the
+elapsed time for each class.
+
+A period is the time between two events. An event is something
+evident from the mbox or the tmp/ directory. Events are shown by
+mg-bisect-activity-dump-periods as "some - series - of - words".
+
+They do not contain " | ": this is used as a separtor.
+
+The raw data collection is done only to second resolution, so often
+the data contains events which are "simultaneous".
+
+For events A and B both occurring at timestamp T
+mg-bisect-activity-dump-periods looks at the data to see if it can
+figure out what order those events (A and B) "usually" come in.
+
+It will report "missing" for two events if that can't be determined.
+It will report "cyclic" if the data seems to suggest a cycle (ie two
+or more events on which relation "data suggests P occurs before Q"
+forms a cyclic graph).
+
+In each case the answer is to add to the __DATA__ section of
+mg-bisect-activity-dump-periods a line specifying "X | Y" where X is
+an event which you would expect (from your knowledge of the system)
+would generally precede Y. This will be used both to remove incorrect
+edges (sorting out cycles) and to add missing edges.
+
+If there is a whole category of such, consider whether to do something
+programmating (like "m/^flight - step start ..." which is there
+already).
+
+
+Period squashing
+----------------
+
+The final part of __DATA_ is a list of regexps which are to match
+periods. They are to match on
+
+ event | event
+
+and whenever one matches, the matching period is replaced by the text
+of the regexp. This allows boringly-similar periods to be aggregated.
new file mode 100755
@@ -0,0 +1,55 @@
+#!/bin/bash
+
+set -e
+set -o pipefail
+set -o posix
+
+# usage: ./mg-bisect-activity-collect-emails <MBOX 'FROMPATTERN'
+
+OSSTEST_BACEF="$1" formail -c -s perl -we '
+ use strict;
+ our %h;
+ while (<>) {
+ chomp;
+ last if m/^$/;
+ if (m/^([^:]+)\:\s/) {
+ $h{lc $1} = $'\'';
+ }
+ }
+ while (<>) { }
+ die unless $h{subject};
+ my $date = $h{"x-original-date"} || $h{date};
+ die unless $date;
+ exit 0 unless $h{from} =~ m/$ENV{OSSTEST_BACEF}/o;
+ $_ = $h{subject} ;
+ s/\s+/ /g;
+ if (m/^\[[^][]+ bisection\](?: \d+\:)? (\w+) /) {
+ print "$1";
+ } elsif (m/^\[[^][ ]+ test\] \d+\: /) {
+ print "MAINREPORT";
+ } else {
+ chomp;
+ warn "$_\n";
+ exit 0;
+ }
+ print "\t";
+ flush STDOUT or die $!;
+ system(qw(date -d), $date, "+\%s") and die "$? $!";
+' | perl -we '
+ use strict;
+ use DBI;
+ #use Data::Dumper;
+ my $dbh = DBI->connect("dbi:SQLite:dbname=bisect-timing.db","","",
+ { RaiseError => 1, PrintError => 1 });
+ $dbh->begin_work();
+ my $stmt = $dbh->prepare(
+ "INSERT INTO events (evt,ts,cat) VALUES (?,?,'\''email'\'')"
+ );
+ while (<>) {
+ chomp or die;
+ my @v = split /\t/;
+ #print Dumper($_, \@v);
+ $stmt->execute(@v);
+ }
+ $dbh->commit();
+'
new file mode 100755
@@ -0,0 +1,94 @@
+#!/usr/bin/perl -w
+#
+# usage: ./mg-bisect-activity-collect-tmpdir PATH/TO/tmp
+
+use strict;
+use DBI;
+
+my $dbh = DBI->connect("dbi:SQLite:dbname=bisect-timing.db","","",
+ { RaiseError => 1, PrintError => 1 });
+
+$dbh->begin_work();
+
+my $stmt = $dbh->prepare("INSERT INTO events (ts,cat,evt) VALUES (?,?,?)");
+
+my ($inputdir) = @ARGV;
+
+sub date2ts ($) {
+ my ($date) = @_;
+ open S, "-|", qw(date -d), $date, qw(+%s) or die $!;
+ my $ts = <S>;
+ close S;
+ chomp $ts or die "$? $!";
+ $ts;
+}
+
+foreach my $f (<$inputdir/*>) {
+ $_ = $f; s#.*/##;
+
+ my $mtime = sub {
+ my ($evt,$cat) = @_;
+
+ lstat $f or die $!;
+ die unless -f _;
+ my $ts = ((stat _)[9]);
+
+ $stmt->execute($ts, ($cat // 'mtime'), $evt);
+ };
+
+ my @ec;
+ if (m/^t\./) {
+ } elsif (m/^\d+\.
+ ( transcript
+ | bisection-summary
+ | bisection-report
+ | email\.sent
+ )$/x) {
+ $mtime->($1);
+ } elsif (m/^bisection-start-stamp$/) {
+ $mtime->($&, 'unique');
+ } elsif (m/^bisection-start-stamp\.new$/) {
+ } elsif (m/^cr-for-branches.log[-.0-9a-z]*$/) {
+ if (m/\.gz$/) {
+ open L, "zcat $f |" or die $!;
+ } else {
+ open L, "<", $f or die $!;
+ }
+ my $flightfirst;
+ while (<L>) {
+ if (m/^\[(\d\d\d\d\-\d\d\-\d\d \d\d:\d\d:\d\d UTC) \d+\] /) {
+ my $ts = date2ts($1);
+ if (m/\.\.\.$/) {
+ $stmt->execute($ts, 'crlog', 'begin');
+ } elsif (m/ status=\d+\s*$/) {
+ $stmt->execute($ts, 'crlog', 'end');
+ } else {
+ $stmt->execute($ts, 'crlog', 'unknown');
+ }
+ } elsif (m/^export OSSTEST_RESOURCE_WAITSTART=(\d+)$/) {
+ $stmt->execute($1, 'waitstart', 'export');
+ } elsif (s/^(\d\d\d\d\-\d\d\-\d\d \d\d:\d\d:\d\d Z )//) {
+ my $ts = date2ts($1);
+ if (m/^flight \d+ / && !($flightfirst++)) {
+ $stmt->execute($ts, 'flight', 'flight start');
+ } elsif (m/^flight \d+ spawning/) {
+ $stmt->execute($ts, 'flight','job start');
+ } elsif (m/^flight \d+ reaped/) {
+ $stmt->execute($ts, 'flight','job finish');
+ } elsif (m/^flight \d+ (ended|ending) finished\.$/) {
+ $stmt->execute($ts, 'flight',"flight $1");
+ } elsif (m/^\[(\w+)[^][]+\] starting .* (\S+)$/) {
+ $stmt->execute($ts, 'flight', "step start $1 $2");
+ } elsif (m/^\[(\w+)[^][]+\] finished/) {
+ $stmt->execute($ts, 'flight', "step finish $1");
+ }
+ }
+ }
+ } elsif (m/^bisected\./) {
+ $mtime->('bisected');
+ } else {
+ warn $f;
+ }
+}
+
+$dbh->commit();
new file mode 100755
@@ -0,0 +1,17 @@
+#!/bin/bash
+set -e
+set -o pipefail
+set -o posix
+
+# usage: ./mg-bisect-activity-create-db
+# overwrites bisect-timing.db unconditionally
+
+rm -f bisect-timing.db
+sqlite3 bisect-timing.db -batch </dev/null -cmd '
+ CREATE TABLE events (
+ ts INTEGER NOT NULL,
+ cat TEXT NOT NULL,
+ evt TEXT NOT NULL
+ );
+ CREATE INDEX events_ts_ix ON events (ts);
+'
new file mode 100755
@@ -0,0 +1,181 @@
+#!/usr/bin/perl -w
+#
+# usage: ./mg-bisect-activity-collect-tmpdir PATH/TO/tmp
+
+use strict;
+use DBI;
+use Data::Dumper;
+use Graph::Directed;
+
+my ($begin,$end) = @ARGV;
+
+my $dbh = DBI->connect("dbi:SQLite:dbname=bisect-timing.db","","",
+ { RaiseError => 1, PrintError => 1, ReadOnly => 1 });
+
+$dbh->begin_work();
+
+my $exclude_cond = <<END;
+ (
+ (cat = 'email' AND evt = 'MAINREPORT')
+ OR (cat = 'unique')
+ )
+END
+
+my $time_cond = '1=1';
+$time_cond .= " AND ts >= $begin" if defined $begin;
+$time_cond .= " AND ts <= $end" if defined $end;
+$time_cond = "($time_cond)";
+
+my $tq = $dbh->prepare(<<END);
+ SELECT DISTINCT ts
+ FROM events
+ WHERE NOT $exclude_cond
+ AND $time_cond
+ ORDER BY ts ASC
+END
+
+my $eq = $dbh->prepare(<<END);
+ SELECT DISTINCT (cat || ' - ' || evt)
+ FROM events
+ WHERE ts = ?
+ AND NOT $exclude_cond
+ ORDER BY cat, evt
+END
+
+our %evts;
+our %thisorder;
+
+sub getevts ($) {
+ my ($ts) = @_;
+ $eq->execute($ts);
+ return map { $_->[0] } @{ $eq->fetchall_arrayref() };
+}
+
+sub evlk ($) { join ' | ', @{$_[0]} };
+
+$tq->execute();
+my $lastevts;
+while (my ($ts) = $tq->fetchrow_array()) {
+ my @evts = getevts($ts);
+
+ $evts{$_}++ foreach @evts;
+
+ if ($lastevts) {
+ if (@evts==1 && @$lastevts==1) {
+ my @k = (@$lastevts, @evts);
+ $thisorder{join "\t", @k}++;
+ }
+ }
+ $lastevts = \@evts;
+}
+
+sub forceorder ($$) {
+ my ($e,$f) = @_;
+ $thisorder{ join "\t", $e, $f }++;
+ delete $thisorder{ join "\t", $f, $e };
+}
+
+while (<DATA>) {
+ s/\s+$//;
+ next unless m/\S/;
+ last if m/^-/;
+ m/ \| / or die "$_ ?";
+ forceorder($`, $');
+}
+
+foreach (sort keys %evts) {
+ if (m/^flight - step start (\w+) /) {
+ forceorder($_, "flight - step finish $1");
+ }
+}
+
+our @squashes;
+
+while (<DATA>) {
+ s/\s+$//;
+ next unless m/\S/;
+ push @squashes, $_;
+}
+
+#print Dumper(\%thisorder);
+our %periods;
+
+$tq->execute();
+my $lastevt;
+my $lastts;
+while (my ($ts) = $tq->fetchrow_array()) {
+ my @evts = getevts($ts);
+ if (@evts > 1) {
+ my $g = new Graph::Directed;
+ foreach my $e (@evts) {
+ $g->add_vertex($e);
+ foreach my $f (@evts) {
+ next if $e eq $f;
+ next unless $thisorder{ join "\t", $e, $f };
+ $g->add_edge($e, $f);
+ }
+ }
+ if ($g->is_cyclic) {
+ my @cycle = $g->find_a_cycle;
+ die "$ts cyclic ".(join " | ", @cycle);
+ }
+ $g = $g->transitive_closure;
+ my $exp_edges = (@evts/2 * (@evts-1));
+ if ($g->edges != $exp_edges) {
+ foreach my $e (@evts) {
+ foreach my $f (@evts) {
+ next if $e eq $f;
+ next if $g->has_edge($e,$f);
+ next if $g->has_edge($f,$e);
+ die "$ts missing $e | $f";
+ }
+ }
+ die "$ts ".Dumper(\@evts, $g)." ?" if $@;
+ }
+ @evts = sort { $g->has_edge($a,$b) ? +1 : -1 } @evts;
+ }
+ foreach my $evt (@evts) {
+ $evt =~ s/^$_$/$_/ foreach (@squashes);
+ if (defined $lastevt) {
+ $periods{"$lastevt | $evt"} += ($ts - $lastts);
+ }
+ $lastevt = $evt;
+ $lastts = $ts;
+ }
+}
+
+my $total = 0;
+$total += $_ foreach values %periods;
+
+sub prperiod () {
+ printf "%5.2f%% %10d %s\n", 100.0*$periods{$_}/$total, $periods{$_}, $_;
+}
+
+prperiod() foreach sort keys %periods;
+
+print "\n";
+
+prperiod() foreach sort { $periods{$b} <=> $periods{$a} } keys %periods;
+
+$dbh->rollback();
+
+# Two lots of stuff:
+# - orderings of possibly-simultaenous events
+# (see README.bisect-wallclock-analysis)
+# - regexps for squashing periods; each entry in the list
+# replaces all matching periods with a copy of the regexp
+__DATA__
+email - tested | email - testing
+mtime - email.sent | email - testing
+mtime - bisection-summary | mtime - bisection-report
+flight - flight ended | mtime - transcript
+flight - step finish build | flight - job finish
+flight - flight start | flight - job start
+mtime - bisection-report | mtime - bisected
+email - unreproducible | crlog - end
+email - complete | crlog - end
+--
+flight - step start build host-(?:install|build-prep)
+flight - step start build .*-build
+flight - step start test capture-logs.*
+flight - step start test (?!capture|host-install|hosts-allocate).*
Signed-off-by: Ian Jackson <ian.jackson@eu.citrix.com> --- .gitignore | 2 + README.bisect-wallclock-analysis | 85 ++++++++++++++++++ mg-bisect-activity-collect-emails | 55 ++++++++++++ mg-bisect-activity-collect-tmpdir | 94 ++++++++++++++++++++ mg-bisect-activity-create-db | 17 ++++ mg-bisect-activity-dump-periods | 181 ++++++++++++++++++++++++++++++++++++++ 6 files changed, 434 insertions(+) create mode 100644 README.bisect-wallclock-analysis create mode 100755 mg-bisect-activity-collect-emails create mode 100755 mg-bisect-activity-collect-tmpdir create mode 100755 mg-bisect-activity-create-db create mode 100755 mg-bisect-activity-dump-periods