blob: a58b3c697bf2f105bbc70cd564bf6909b7d288ef [file] [log] [blame]
commit-bot@chromium.org517c1e22014-01-22 22:57:19 +00001#!/usr/bin/python2
2
3# Copyright 2014 Google Inc.
4#
5# Use of this source code is governed by a BSD-style license that can be
6# found in the LICENSE file.
7
8"""Skia's Chromium Codereview Comparison Script.
9
10This script takes two Codereview URLs, looks at the trybot results for
11the two codereviews and compares the results.
12
13Usage:
14 compare_codereview.py CONTROL_URL ROLL_URL
15"""
16
17import collections
18import os
19import re
20import sys
21import urllib2
22import HTMLParser
23
24
25class CodeReviewHTMLParser(HTMLParser.HTMLParser):
commit-bot@chromium.org2a1704f2014-03-25 18:02:17 +000026 """Parses CodeReview web page.
commit-bot@chromium.org517c1e22014-01-22 22:57:19 +000027
commit-bot@chromium.org2a1704f2014-03-25 18:02:17 +000028 Use the CodeReviewHTMLParser.parse static function to make use of
29 this class.
commit-bot@chromium.org517c1e22014-01-22 22:57:19 +000030
commit-bot@chromium.org2a1704f2014-03-25 18:02:17 +000031 This uses the HTMLParser class because it's the best thing in
32 Python's standard library. We need a little more power than a
33 regex. [Search for "You can't parse [X]HTML with regex." for more
34 information.
35 """
36 # pylint: disable=I0011,R0904
37 @staticmethod
38 def parse(url):
39 """Parses a CodeReview web pages.
40
41 Args:
42 url (string), a codereview URL like this:
43 'https://codereview.chromium.org/?????????'.
44
45 Returns:
46 A dictionary; the keys are bot_name strings, the values
47 are CodeReviewHTMLParser.Status objects
commit-bot@chromium.org517c1e22014-01-22 22:57:19 +000048 """
commit-bot@chromium.org2a1704f2014-03-25 18:02:17 +000049 parser = CodeReviewHTMLParser()
50 try:
51 parser.feed(urllib2.urlopen(url).read())
52 except (urllib2.URLError,):
53 print >> sys.stderr, 'Error getting', url
54 return None
55 parser.close()
56 return parser.statuses
commit-bot@chromium.org517c1e22014-01-22 22:57:19 +000057
commit-bot@chromium.org2a1704f2014-03-25 18:02:17 +000058 # namedtuples are like lightweight structs in Python. The low
59 # overhead of a tuple, but the ease of use of an object.
60 Status = collections.namedtuple('Status', ['status', 'url'])
commit-bot@chromium.org517c1e22014-01-22 22:57:19 +000061
commit-bot@chromium.org2a1704f2014-03-25 18:02:17 +000062 def __init__(self):
63 HTMLParser.HTMLParser.__init__(self)
64 self._id = None
65 self._status = None
66 self._href = None
67 self._anchor_data = ''
68 self._currently_parsing_trybotdiv = False
69 # statuses is a dictionary of CodeReviewHTMLParser.Status
70 self.statuses = {}
commit-bot@chromium.org517c1e22014-01-22 22:57:19 +000071
commit-bot@chromium.org2a1704f2014-03-25 18:02:17 +000072 def handle_starttag(self, tag, attrs):
73 """Overrides the HTMLParser method to implement functionality.
commit-bot@chromium.org517c1e22014-01-22 22:57:19 +000074
commit-bot@chromium.org2a1704f2014-03-25 18:02:17 +000075 [[begin standard library documentation]]
76 This method is called to handle the start of a tag
77 (e.g. <div id="main">).
commit-bot@chromium.org517c1e22014-01-22 22:57:19 +000078
commit-bot@chromium.org2a1704f2014-03-25 18:02:17 +000079 The tag argument is the name of the tag converted to lower
80 case. The attrs argument is a list of (name, value) pairs
81 containing the attributes found inside the tag's <>
82 brackets. The name will be translated to lower case, and
83 quotes in the value have been removed, and character and
84 entity references have been replaced.
commit-bot@chromium.org517c1e22014-01-22 22:57:19 +000085
commit-bot@chromium.org2a1704f2014-03-25 18:02:17 +000086 For instance, for the tag <A HREF="http://www.cwi.nl/">, this
87 method would be called as handle_starttag('a', [('href',
88 'http://www.cwi.nl/')]).
89 [[end standard library documentation]]
90 """
91 attrs = dict(attrs)
92 if tag == 'div':
93 # We are looking for <div id="tryjobdiv*">.
94 id_attr = attrs.get('id','')
95 if id_attr.startswith('tryjobdiv'):
96 self._id = id_attr
97 if (self._id and tag == 'a'
98 and 'build-result' in attrs.get('class', '').split()):
99 # If we are already inside a <div id="tryjobdiv*">, we
100 # look for a link if the form
101 # <a class="build-result" href="*">. Then we save the
102 # (non-standard) status attribute and the URL.
103 self._status = attrs.get('status')
104 self._href = attrs.get('href')
105 self._currently_parsing_trybotdiv = True
106 # Start saving anchor data.
commit-bot@chromium.org517c1e22014-01-22 22:57:19 +0000107
commit-bot@chromium.org2a1704f2014-03-25 18:02:17 +0000108 def handle_data(self, data):
109 """Overrides the HTMLParser method to implement functionality.
commit-bot@chromium.org517c1e22014-01-22 22:57:19 +0000110
commit-bot@chromium.org2a1704f2014-03-25 18:02:17 +0000111 [[begin standard library documentation]]
112 This method is called to process arbitrary data (e.g. text
113 nodes and the content of <script>...</script> and
114 <style>...</style>).
115 [[end standard library documentation]]
116 """
117 # Save the text inside the <a></a> tags. Assume <a> tags
118 # aren't nested.
119 if self._currently_parsing_trybotdiv:
120 self._anchor_data += data
commit-bot@chromium.org517c1e22014-01-22 22:57:19 +0000121
commit-bot@chromium.org2a1704f2014-03-25 18:02:17 +0000122 def handle_endtag(self, tag):
123 """Overrides the HTMLParser method to implement functionality.
commit-bot@chromium.org517c1e22014-01-22 22:57:19 +0000124
commit-bot@chromium.org2a1704f2014-03-25 18:02:17 +0000125 [[begin standard library documentation]]
126 This method is called to handle the end tag of an element
127 (e.g. </div>). The tag argument is the name of the tag
128 converted to lower case.
129 [[end standard library documentation]]
130 """
131 if tag == 'a' and self._status:
132 # We take the accumulated self._anchor_data and save it as
133 # the bot name.
134 bot = self._anchor_data.strip()
135 stat = CodeReviewHTMLParser.Status(status=self._status,
136 url=self._href)
137 if bot:
138 # Add to accumulating dictionary.
139 self.statuses[bot] = stat
140 # Reset state to search for the next bot.
141 self._currently_parsing_trybotdiv = False
142 self._anchor_data = ''
143 self._status = None
144 self._href = None
commit-bot@chromium.org517c1e22014-01-22 22:57:19 +0000145
146
147class BuilderHTMLParser(HTMLParser.HTMLParser):
commit-bot@chromium.org2a1704f2014-03-25 18:02:17 +0000148 """parses Trybot web pages.
commit-bot@chromium.org517c1e22014-01-22 22:57:19 +0000149
commit-bot@chromium.org2a1704f2014-03-25 18:02:17 +0000150 Use the BuilderHTMLParser.parse static function to make use of
151 this class.
commit-bot@chromium.org517c1e22014-01-22 22:57:19 +0000152
commit-bot@chromium.org2a1704f2014-03-25 18:02:17 +0000153 This uses the HTMLParser class because it's the best thing in
154 Python's standard library. We need a little more power than a
155 regex. [Search for "You can't parse [X]HTML with regex." for more
156 information.
157 """
158 # pylint: disable=I0011,R0904
159 @staticmethod
160 def parse(url):
161 """Parses a Trybot web page.
162
163 Args:
164 url (string), a trybot result URL.
165
166 Returns:
167 An array of BuilderHTMLParser.Results, each a description
168 of failure results, along with an optional url
commit-bot@chromium.org517c1e22014-01-22 22:57:19 +0000169 """
commit-bot@chromium.org2a1704f2014-03-25 18:02:17 +0000170 parser = BuilderHTMLParser()
171 try:
172 parser.feed(urllib2.urlopen(url).read())
173 except (urllib2.URLError,):
174 print >> sys.stderr, 'Error getting', url
175 return []
176 parser.close()
177 return parser.failure_results
commit-bot@chromium.org517c1e22014-01-22 22:57:19 +0000178
commit-bot@chromium.org2a1704f2014-03-25 18:02:17 +0000179 Result = collections.namedtuple('Result', ['text', 'url'])
commit-bot@chromium.org517c1e22014-01-22 22:57:19 +0000180
commit-bot@chromium.org2a1704f2014-03-25 18:02:17 +0000181 def __init__(self):
182 HTMLParser.HTMLParser.__init__(self)
183 self.failure_results = []
184 self._current_failure_result = None
185 self._divlevel = None
186 self._li_level = 0
187 self._li_data = ''
188 self._current_failure = False
189 self._failure_results_url = ''
commit-bot@chromium.org517c1e22014-01-22 22:57:19 +0000190
commit-bot@chromium.org2a1704f2014-03-25 18:02:17 +0000191 def handle_starttag(self, tag, attrs):
192 """Overrides the HTMLParser method to implement functionality.
commit-bot@chromium.org517c1e22014-01-22 22:57:19 +0000193
commit-bot@chromium.org2a1704f2014-03-25 18:02:17 +0000194 [[begin standard library documentation]]
195 This method is called to handle the start of a tag
196 (e.g. <div id="main">).
commit-bot@chromium.org517c1e22014-01-22 22:57:19 +0000197
commit-bot@chromium.org2a1704f2014-03-25 18:02:17 +0000198 The tag argument is the name of the tag converted to lower
199 case. The attrs argument is a list of (name, value) pairs
200 containing the attributes found inside the tag's <>
201 brackets. The name will be translated to lower case, and
202 quotes in the value have been removed, and character and
203 entity references have been replaced.
commit-bot@chromium.org517c1e22014-01-22 22:57:19 +0000204
commit-bot@chromium.org2a1704f2014-03-25 18:02:17 +0000205 For instance, for the tag <A HREF="http://www.cwi.nl/">, this
206 method would be called as handle_starttag('a', [('href',
207 'http://www.cwi.nl/')]).
208 [[end standard library documentation]]
209 """
210 attrs = dict(attrs)
211 if tag == 'li':
212 # <li> tags can be nested. So we have to count the
213 # nest-level for backing out.
214 self._li_level += 1
215 return
216 if tag == 'div' and attrs.get('class') == 'failure result':
217 # We care about this sort of thing:
218 # <li>
219 # <li>
220 # <li>
221 # <div class="failure result">...</div>
222 # </li>
223 # </li>
224 # We want this text here.
225 # </li>
226 if self._li_level > 0:
227 self._current_failure = True # Tells us to keep text.
228 return
commit-bot@chromium.org517c1e22014-01-22 22:57:19 +0000229
commit-bot@chromium.org2a1704f2014-03-25 18:02:17 +0000230 if tag == 'a' and self._current_failure:
231 href = attrs.get('href')
232 # Sometimes we want to keep the stdio url. We always
233 # return it, just in case.
234 if href.endswith('/logs/stdio'):
235 self._failure_results_url = href
commit-bot@chromium.org517c1e22014-01-22 22:57:19 +0000236
commit-bot@chromium.org2a1704f2014-03-25 18:02:17 +0000237 def handle_data(self, data):
238 """Overrides the HTMLParser method to implement functionality.
commit-bot@chromium.org517c1e22014-01-22 22:57:19 +0000239
commit-bot@chromium.org2a1704f2014-03-25 18:02:17 +0000240 [[begin standard library documentation]]
241 This method is called to process arbitrary data (e.g. text
242 nodes and the content of <script>...</script> and
243 <style>...</style>).
244 [[end standard library documentation]]
245 """
246 if self._current_failure:
247 self._li_data += data
commit-bot@chromium.org517c1e22014-01-22 22:57:19 +0000248
commit-bot@chromium.org2a1704f2014-03-25 18:02:17 +0000249 def handle_endtag(self, tag):
250 """Overrides the HTMLParser method to implement functionality.
commit-bot@chromium.org517c1e22014-01-22 22:57:19 +0000251
commit-bot@chromium.org2a1704f2014-03-25 18:02:17 +0000252 [[begin standard library documentation]]
253 This method is called to handle the end tag of an element
254 (e.g. </div>). The tag argument is the name of the tag
255 converted to lower case.
256 [[end standard library documentation]]
257 """
258 if tag == 'li':
259 self._li_level -= 1
260 if 0 == self._li_level:
commit-bot@chromium.org517c1e22014-01-22 22:57:19 +0000261 if self._current_failure:
commit-bot@chromium.org2a1704f2014-03-25 18:02:17 +0000262 result = self._li_data.strip()
263 first = result.split()[0]
264 if first:
265 result = re.sub(
266 r'^%s(\s+%s)+' % (first, first), first, result)
267 # Sometimes, it repeats the same thing
268 # multiple times.
269 result = re.sub(r'unexpected flaky.*', '', result)
270 # Remove some extra unnecessary text.
271 result = re.sub(r'\bpreamble\b', '', result)
272 result = re.sub(r'\bstdio\b', '', result)
273 url = self._failure_results_url
274 self.failure_results.append(
275 BuilderHTMLParser.Result(result, url))
276 self._current_failure_result = None
277 # Reset the state.
278 self._current_failure = False
279 self._li_data = ''
280 self._failure_results_url = ''
commit-bot@chromium.org517c1e22014-01-22 22:57:19 +0000281
282
283def printer(indent, string):
commit-bot@chromium.org2a1704f2014-03-25 18:02:17 +0000284 """Print indented, wrapped text.
285 """
286 def wrap_to(line, columns):
287 """Wrap a line to the given number of columns, return a list
288 of strings.
commit-bot@chromium.org517c1e22014-01-22 22:57:19 +0000289 """
commit-bot@chromium.org2a1704f2014-03-25 18:02:17 +0000290 ret = []
291 nextline = ''
292 for word in line.split():
293 if nextline:
294 if len(nextline) + 1 + len(word) > columns:
295 ret.append(nextline)
296 nextline = word
297 else:
298 nextline += (' ' + word)
299 else:
300 nextline = word
301 if nextline:
302 ret.append(nextline)
303 return ret
304 out = sys.stdout
305 spacer = ' '
306 for line in string.split('\n'):
307 for i, wrapped_line in enumerate(wrap_to(line, 68 - (2 * indent))):
308 out.write(spacer * indent)
309 if i > 0:
310 out.write(spacer)
311 out.write(wrapped_line)
312 out.write('\n')
313 out.flush()
commit-bot@chromium.org517c1e22014-01-22 22:57:19 +0000314
315
316def main(control_url, roll_url, verbosity=1):
commit-bot@chromium.org2a1704f2014-03-25 18:02:17 +0000317 """Compare two Codereview URLs
commit-bot@chromium.org517c1e22014-01-22 22:57:19 +0000318
commit-bot@chromium.org2a1704f2014-03-25 18:02:17 +0000319 Args:
320 control_url, roll_url: (strings) URL of the format
321 https://codereview.chromium.org/?????????
commit-bot@chromium.org517c1e22014-01-22 22:57:19 +0000322
commit-bot@chromium.org2a1704f2014-03-25 18:02:17 +0000323 verbosity: (int) verbose level. 0, 1, or 2.
324 """
325 # pylint: disable=I0011,R0914,R0912
326 control = CodeReviewHTMLParser.parse(control_url)
327 roll = CodeReviewHTMLParser.parse(roll_url)
328 all_bots = set(control) & set(roll) # Set intersection.
329 if not all_bots:
330 print >> sys.stderr, (
331 'Error: control %s and roll %s have no common trybots.'
332 % (list(control), list(roll)))
333 return
commit-bot@chromium.org517c1e22014-01-22 22:57:19 +0000334
commit-bot@chromium.org2a1704f2014-03-25 18:02:17 +0000335 control_name = '[control %s]' % control_url.split('/')[-1]
336 roll_name = '[roll %s]' % roll_url.split('/')[-1]
commit-bot@chromium.org517c1e22014-01-22 22:57:19 +0000337
commit-bot@chromium.org2a1704f2014-03-25 18:02:17 +0000338 out = sys.stdout
commit-bot@chromium.org517c1e22014-01-22 22:57:19 +0000339
commit-bot@chromium.org2a1704f2014-03-25 18:02:17 +0000340 for bot in sorted(all_bots):
341 if (roll[bot].status == 'success'):
342 if verbosity > 1:
commit-bot@chromium.org517c1e22014-01-22 22:57:19 +0000343 printer(0, '==%s==' % bot)
commit-bot@chromium.org2a1704f2014-03-25 18:02:17 +0000344 printer(1, 'OK')
345 continue
commit-bot@chromium.org517c1e22014-01-22 22:57:19 +0000346
commit-bot@chromium.org2a1704f2014-03-25 18:02:17 +0000347 if control[bot].status != 'failure' and roll[bot].status != 'failure':
348 continue
349 printer(0, '==%s==' % bot)
350
351 formatted_results = []
352 for (status, name, url) in [
commit-bot@chromium.org517c1e22014-01-22 22:57:19 +0000353 (control[bot].status, control_name, control[bot].url),
commit-bot@chromium.org2a1704f2014-03-25 18:02:17 +0000354 ( roll[bot].status, roll_name, roll[bot].url)]:
355 lines = []
356 if status == 'failure':
357 results = BuilderHTMLParser.parse(url)
358 for result in results:
359 formatted_result = re.sub(r'(\S*\.html) ', '\n__\g<1>\n', result.text)
360 # Strip runtimes.
361 formatted_result = re.sub(r'\(.*\)', '', formatted_result)
362 lines.append((2, formatted_result))
363 if ('compile' in result.text or '...and more' in result.text):
364 lines.append((3, re.sub('/[^/]*$', '/', url) + result.url))
365 formatted_results.append(lines)
commit-bot@chromium.org517c1e22014-01-22 22:57:19 +0000366
commit-bot@chromium.org2a1704f2014-03-25 18:02:17 +0000367 identical = formatted_results[0] == formatted_results[1]
commit-bot@chromium.org517c1e22014-01-22 22:57:19 +0000368
commit-bot@chromium.org2a1704f2014-03-25 18:02:17 +0000369
370 for (formatted_result, (status, name, url)) in zip(
371 formatted_results,
372 [(control[bot].status, control_name, control[bot].url),
373 (roll[bot].status, roll_name, roll[bot].url)]):
374 if status != 'failure' and not identical:
375 printer(1, name)
376 printer(2, status)
377 elif status == 'failure':
378 if identical:
379 printer(1, control_name + ' and ' + roll_name + ' failed identically')
380 else:
381 printer(1, name)
382 for (indent, line) in formatted_result:
383 printer(indent, line)
384 if identical:
385 break
386 out.write('\n')
387
388 if verbosity > 0:
389 # Print out summary of all of the bots.
390 out.write('%11s %11s %4s %s\n\n' %
391 ('CONTROL', 'ROLL', 'DIFF', 'BOT'))
392 for bot in sorted(all_bots):
393 if roll[bot].status == 'success':
394 diff = ''
395 elif (control[bot].status == 'success' and
396 roll[bot].status == 'failure'):
397 diff = '!!!!'
398 elif ('pending' in control[bot].status or
399 'pending' in roll[bot].status):
400 diff = '....'
401 else:
402 diff = '****'
403 out.write('%11s %11s %4s %s\n' % (
404 control[bot].status, roll[bot].status, diff, bot))
405 out.write('\n')
406 out.flush()
commit-bot@chromium.org517c1e22014-01-22 22:57:19 +0000407
408if __name__ == '__main__':
commit-bot@chromium.org2a1704f2014-03-25 18:02:17 +0000409 if len(sys.argv) < 3:
410 print >> sys.stderr, __doc__
411 exit(1)
412 main(sys.argv[1], sys.argv[2],
413 int(os.environ.get('COMPARE_CODEREVIEW_VERBOSITY', 1)))
commit-bot@chromium.org517c1e22014-01-22 22:57:19 +0000414