blob: a6afbe366ad3d63d4fcb906a3e28f8d68d7fe456 [file] [log] [blame]
florianb0f4ce82011-10-25 20:01:41 +00001#!/usr/bin/env perl
2
3#-----------------------------------------------------------------
4# Quick and dirty script to summarize build information for a
5# set of nightly runs.
6#
7# The results of the nighly regression runs are extracted from
8# the GMANE mail archive. The URL for a given mail sent to the
9# valgrind-developers mailing list is
10#
11# http://article.gmane.org/gmane.comp.debugging.valgrind.devel/<integer>
12#
13# The script extracts information about the regression run from a
14# block of information at the beginning of the mail. That information
15# was added beginning October 4, 2011. Therefore, only regression runs
16# from that date or later can be analyzed.
17#
18# There is unfortunately no good way of figuring out the interval
19# of integers in the above URL that include all nightly regression
20# runs.
21#
22# The function get_regtest_data does all the work. It returns a hash
23# whose keys are the dates at which nightly runs took place. The value
24# is in turn a hash.
25#
26# Each such hash has the following keys:
27# "builds" array of hashes
28# "num_builds" int
29# "num_failing_builds" int
30# "num_passing_builds" int
31# "num_testcase_failures" int
32# "num_failing_testcases" int
33# "failure_frequency" hash indexed by testcase name; value = int
34#
35# "builds" is an array of hashes with the following keys
36# "arch" string (architecture)
37# "distro" string (distribution, e.g. Fedora-15)
38# "failures" array of strings (failing testcases)
39# "valgrind revision" integer
40# "VEX revision" integer
41# "GCC version" string
42# "C library" string
43# "uname -mrs" string
44# "Vendor version" string
45#
46#-----------------------------------------------------------------
47use strict;
48use warnings;
49
50use LWP::Simple;
51use Getopt::Long;
52
53my $prog_name = "nightly-build-summary";
54
55my $debug = 0;
56my $keep = 0;
57
58my $usage=<<EOF;
59USAGE
60
61 $prog_name
62
63 --from=INTEGER beginning of mail interval; > 14800
64
65 [--to=INTEGER] end of mail interval; default = from + 100
66
67 [--debug] verbose mode (debugging)
68
69 [--keep] write individual emails to files (debugging)
70
71 [--dump] write results suitable for post-processing
72
73 [--readable] write results in human readable form (default)
74
75EOF
76
77
78#-----------------------------------------------------------------
79# Search for a line indicating that this is an email containing
80# the results of a valgrind regression run.
81# Return 1, if found and 0 oherwise.
82#-----------------------------------------------------------------
83sub is_regtest_result {
84 my (@lines) = @_;
85
86 foreach my $line (@lines) {
87 return 1 if ($line =~ "^valgrind revision:");
88 }
89
90 return 0;
91}
92
93
94#-----------------------------------------------------------------
95# Extract information from the run. Don't prep the data here. This
96# is done later on.
97#-----------------------------------------------------------------
98sub get_raw_data {
99 my (@lines, $msgno) = @_;
100 my ($i, $n, $line, $date);
101
102 $n = scalar @lines;
103
104 my %hash = ();
105
floriand8e32e22011-10-28 02:45:15 +0000106# 1) Locate the section with the info about the environment of this nightly run
floriand1e996a2013-09-10 14:00:45 +0000107 for ($i = 0; $i < $n; ++$i) {
florianb0f4ce82011-10-25 20:01:41 +0000108 last if ($lines[$i] =~ /^valgrind revision:/);
109 }
110 die "no info block in message $msgno" if ($i == $n);
111
floriand8e32e22011-10-28 02:45:15 +0000112# 2) Read the info about the build: compiler, valgrind revision etc.
florianb0f4ce82011-10-25 20:01:41 +0000113# and put it into a hash.
114 for ( ; $i < $n; ++$i) {
115 $line = $lines[$i];
116 last if ($line =~ /^$/); # empty line indicates end of section
117 my ($key, $value) = split(/:/, $line);
118 $value =~ s/^[ ]*//; # removing leading blanks
119 $hash{$key} = $value;
120 }
121
122 if ($debug) {
123 foreach my $key (keys %hash) {
124 my ($val) = $hash{$key};
125 print "regtest env: KEY = |$key| VAL = |$val|\n";
126 }
127 }
128
floriand8e32e22011-10-28 02:45:15 +0000129# 3) Get the date from when the build was kicked off.
130 for ( ; $i < $n; ++$i) {
131 $line = $lines[$i];
132
133 if ($line =~ /^Started at[ ]+([^ ]+)/) {
134 $date = $1;
135 print "DATE = $date\n";
136 last;
137 }
138 }
139 die "no date found in message $msgno" if ($i == $n);
140
141
florianb0f4ce82011-10-25 20:01:41 +0000142# 4) Find out if the regression run failed or passed
143 $hash{"failures"} = [];
144 for ($i = $i + 1; $i < $n; ++$i) {
145 $line = $lines[$i];
146 if ($line =~ /Running regression tests/) {
147 return %hash if ($line =~ /done$/); # regtest succeeded; no failures
148 die "cannot determine regtest outcome for message $msgno"
149 if (! ($line =~ /failed$/));
150 last;
151 }
152 }
153
154# 5) Regtest failed; locate the section with the list of failing testcases
155 for ($i = $i + 1; $i < $n; ++$i) {
156 $line = $lines[$i];
157# Match for end-of-line == because line might be split.
158 last if ($line =~ /==$/);
159 }
160 die "cannot locate failing testcases in message $msgno" if ($i == $n);
161
162# 6) Get list of failing testcases
163 for ($i = $i + 1; $i < $n; ++$i) {
164 $line = $lines[$i];
165
166 last if ($line =~ /^$/);
167
168 my ($testcase) = (split(/\s+/, $line))[0];
169 print "ADD failing testcase $testcase\n" if ($debug);
170 push @{$hash{"failures"}}, $testcase;
171 }
172
173 return ($date, %hash);
174}
175
176
177#-----------------------------------------------------------------
178# Extract architecture; get a pretty name for the distro
179#-----------------------------------------------------------------
180sub prep_regtest_data {
181 my (%hash) = @_;
182 my ($val, $arch, $distro);
183
184 $val = $hash{"uname -mrs"};
185 die "uname -mrs info is missing" if (! defined $val);
186 $arch = (split(/ /, $val))[2];
187
188 $val = $hash{"Vendor version"};
189 die "Vendor version info is missing" if (! defined $val);
190
191 if ($val =~ /Fedora release ([0-9]+)/) {
192 $distro = "Fedora-$1";
193 } elsif ($val =~ /openSUSE ([0-9]+)\.([0-9]+)/) {
194 $distro = "openSUSE-$1.$2";
195 } elsif ($val =~ /SUSE Linux Enterprise Server 11 SP1/) {
196 $distro = "SLES-11-SP1";
197 } elsif ($val =~ /Red Hat Enterprise Linux AS release 4/) {
198 $distro = "RHEL-4";
199 } else {
200 $distro = "UNKNOWN";
201 }
202
203# Add architecture and distribution to hash
204 $hash{"arch"} = $arch;
205 $hash{"distro"} = $distro;
206
207 return %hash;
208}
209
210
211#-----------------------------------------------------------------
212# Precompute some summary information and record it
213#-----------------------------------------------------------------
214sub precompute_summary_info
215{
216 my (%dates) = @_;
217
218 foreach my $date (sort keys %dates) {
219 my %failure_frequency = ();
220
221 my %nightly = %{ $dates{$date} };
222 my @builds = @{ $nightly{"builds"} };
223
224 $nightly{"num_builds"} = scalar (@builds);
225 $nightly{"num_failing_builds"} = 0;
226 $nightly{"num_testcase_failures"} = 0;
227
228 foreach my $build (@builds) {
229 my %regtest_data = %{ $build };
230
231 my @failures = @{ $regtest_data{"failures"} };
232 my $num_fail = scalar (@failures);
233
234 ++$nightly{"num_failing_builds"} if ($num_fail != 0);
235 $nightly{"num_testcase_failures"} += $num_fail;
236
237# Compute how often a testcase failed
238 foreach my $test ( @failures ) {
239 if (defined $failure_frequency{$test}) {
240 ++$failure_frequency{$test};
241 } else {
242 $failure_frequency{$test} = 1;
243 }
244 }
245 }
246
247 $nightly{"num_passing_builds"} =
248 $nightly{"num_builds"} - $nightly{"num_failing_builds"};
249
250 $nightly{"num_failing_testcases"} = scalar (keys %failure_frequency);
251
252 $nightly{"failure_frequency"} = { %failure_frequency };
253
254 $dates{$date} = { %nightly };
255 }
256
257 return %dates;
258}
259
260
261#-----------------------------------------------------------------
262# Get messages from GMANE, and build up a database of results.
263#-----------------------------------------------------------------
264sub get_regtest_data {
265 my ($from, $to) = @_;
266
267 my $url_base = "http://article.gmane.org/gmane.comp.debugging.valgrind.devel/";
268
269 my %dates = ();
270
271 my $old_date = "-1";
272 my @builds = ();
273
274 for (my $i = $from; $i <= $to; ++$i) {
275 my $url = "$url_base" . "$i";
276
277 my $page = get("$url");
278
279 if ($keep) {
280 open (EMAIL, ">$i");
281 print EMAIL $page;
282 close(EMAIL);
283 }
284
285# Detect if the article does not exist. Happens for too large --to= values
286 last if ($page eq "No such file.\n");
287
288# Split the page into lines
289 my @lines = split(/\n/, $page);
290
291# Check whether it contains a regression test result
292 next if (! is_regtest_result(@lines));
293 print "message $i is a regression test result\n" if ($debug);
294
295# Get the raw data
296 my ($date, %regtest_data) = get_raw_data(@lines);
297
298 %regtest_data = prep_regtest_data(%regtest_data);
299
300 if ($date ne $old_date) {
301 my %nightly = ();
302 $nightly{"builds"} = [ @builds ];
303 $dates{$old_date} = { %nightly } if ($old_date ne "-1");
304
305 $old_date = $date;
306 @builds = ();
307 }
308
309 push @builds, { %regtest_data };
310 }
311 my %nightly = ();
312 $nightly{"builds"} = [ @builds ];
313 $dates{$old_date} = { %nightly } if ($old_date ne "-1");
314
315# Convenience: precompute some info we'll be interested in
316 %dates = precompute_summary_info( %dates );
317
318 return %dates;
319}
320
321
322#-----------------------------------------------------------------
323# Write out the results in a form suitable for automatic post-processing
324#-----------------------------------------------------------------
325sub dump_results {
326 my (%dates) = @_;
327
328 foreach my $date (sort keys %dates) {
329
330 my %nightly = %{ $dates{$date} };
331 my @builds = @{ $nightly{"builds"} };
332
333 foreach my $build (@builds) {
334 my %regtest_data = %{ $build };
335
336 my $arch = $regtest_data{"arch"};
337 my $distro = $regtest_data{"distro"};
338 my @failures = @{ $regtest_data{"failures"} };
339 my $num_fail = scalar (@failures);
340 my $fails = join(":", sort @failures);
341
342 printf("Regrun: %s %3d %-10s %-20s %s\n",
343 $date, $num_fail, $arch, $distro, $fails);
344 }
345
346 my %failure_frequency = %{ $nightly{"failure_frequency"} };
347
348 foreach my $test (keys %failure_frequency) {
349 printf("Test: %s %3d %s\n",
350 $date, $failure_frequency{$test}, $test);
351 }
352
353 printf("Total: %s builds: %d %d fail %d pass tests: %d fail %d unique\n",
354 $date, $nightly{"num_builds"}, $nightly{"num_failing_builds"},
355 $nightly{"num_passing_builds"}, $nightly{"num_testcase_failures"},
356 $nightly{"num_failing_testcases"});
357 }
358}
359
360
361sub write_readable_results {
362 my (%dates) = @_;
363
364 foreach my $date (sort keys %dates) {
365 my %nightly = %{ $dates{$date} };
366
367 print "$date\n----------\n";
368
369 printf("%3d builds\n", $nightly{"num_builds"});
370 printf("%3d builds fail\n", $nightly{"num_failing_builds"});
371 printf("%3d builds pass\n", $nightly{"num_passing_builds"});
372 print "\n";
373 printf("%3d testcase failures (across all runs)\n",
374 $nightly{"num_testcase_failures"});
375 printf("%3d failing testcases (unique)\n",
376 $nightly{"num_failing_testcases"});
377 print "\n";
378
379 my @builds = @{ $nightly{"builds"} };
380
381 if ($nightly{"num_passing_builds"} != 0) {
382 print "Passing builds\n";
383 print "--------------\n";
384 foreach my $build (@builds) {
385 my %regtest_data = %{ $build };
386 my @failures = @{ $regtest_data{"failures"} };
387 my $num_fail = scalar (@failures);
388
389 if ($num_fail == 0) {
390 my $arch = $regtest_data{"arch"};
391 my $distro = $regtest_data{"distro"};
392
393 printf("%-8s %-15s\n", $arch, $distro);
394 }
395 print "\n";
396 }
397 print "\n";
398 }
399
400 if ($nightly{"num_failing_builds"} != 0) {
401 print "Failing builds\n";
402 print "--------------\n";
403 foreach my $build (@builds) {
404 my %regtest_data = %{ $build };
405 my @failures = @{ $regtest_data{"failures"} };
406 my $num_fail = scalar (@failures);
407
408 if ($num_fail != 0) {
409 my $arch = $regtest_data{"arch"};
410 my $distro = $regtest_data{"distro"};
411
412 printf("%-8s %-15s %d failures\n", $arch, $distro, $num_fail);
413 foreach my $test (@failures) {
414 print " $test\n";
415 }
416 print "\n";
417 }
418 }
419 print "\n";
420 }
421
422 print "Failing testcases and their frequency\n";
423 print "-------------------------------------\n";
424 my %failure_frequency = %{ $nightly{"failure_frequency"} };
425
426# Sorted in decreasing frequency
427 foreach my $test (sort {$failure_frequency{$b} cmp $failure_frequency{$a} }
428 keys %failure_frequency) {
429 printf("%3d %s\n", $failure_frequency{$test}, $test);
430 }
431 print "\n";
432 }
433}
434
435
436sub main
437{
438 my ($from, $to, $dump, $readable);
439
440 $from = $to = 0;
441 $dump = $readable = 0;
442
443 GetOptions( "from=i" => \$from,
444 "to=i" => \$to,
445 "debug" => \$debug,
446 "dump" => \$dump,
447 "keep" => \$keep,
448 "readable" => \$readable
449 ) || die $usage;
450
451# 14800 is about Oct 4, 2011 which is when we began including information
452# about the environment
453
454 die $usage if ($from < 14800);
455
456 $to = $from + 100 if ($to == 0);
457
458 if ($from > $to) {
459 print STDERR "*** invalid [from,to] interval. Try again\n";
460 die $usage;
461 }
462
463 $readable = 1 if ($dump == 0 && $readable == 0);
464
465 print "check message interval [$from...$to]\n" if ($debug);
466
467# Get mails from GMANE mail archive
468
469 my %dates = get_regtest_data($from, $to);
470
471 dump_results(%dates) if ($dump);
472
473 write_readable_results(%dates) if ($readable);
474}
475
476main();
477
478exit 0;