blob: d3c8790578e704459f88df6239319dc6952e8f95 [file] [log] [blame]
epoger@google.comf9d134d2013-09-27 15:02:44 +00001#!/usr/bin/python
2
epoger@google.com9fb6c8a2013-10-09 18:05:58 +00003"""
epoger@google.comf9d134d2013-09-27 15:02:44 +00004Copyright 2013 Google Inc.
5
6Use of this source code is governed by a BSD-style license that can be
7found in the LICENSE file.
epoger@google.comf9d134d2013-09-27 15:02:44 +00008
epoger@google.comf9d134d2013-09-27 15:02:44 +00009Repackage expected/actual GM results as needed by our HTML rebaseline viewer.
epoger@google.com9fb6c8a2013-10-09 18:05:58 +000010"""
epoger@google.comf9d134d2013-09-27 15:02:44 +000011
12# System-level imports
13import fnmatch
14import json
epoger@google.comdcb4e652013-10-11 18:45:33 +000015import logging
epoger@google.comf9d134d2013-09-27 15:02:44 +000016import os
17import re
18import sys
epoger@google.com542b65f2013-10-15 20:10:33 +000019import time
epoger@google.comf9d134d2013-09-27 15:02:44 +000020
21# Imports from within Skia
22#
23# We need to add the 'gm' directory, so that we can import gm_json.py within
24# that directory. That script allows us to parse the actual-results.json file
25# written out by the GM tool.
26# Make sure that the 'gm' dir is in the PYTHONPATH, but add it at the *end*
27# so any dirs that are already in the PYTHONPATH will be preferred.
28GM_DIRECTORY = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
29if GM_DIRECTORY not in sys.path:
30 sys.path.append(GM_DIRECTORY)
31import gm_json
32
33IMAGE_FILENAME_RE = re.compile(gm_json.IMAGE_FILENAME_PATTERN)
epoger@google.comeb832592013-10-23 15:07:26 +000034IMAGE_FILENAME_FORMATTER = '%s_%s.png' # pass in (testname, config)
35
epoger@google.comafaad3d2013-09-30 15:06:25 +000036CATEGORIES_TO_SUMMARIZE = [
37 'builder', 'test', 'config', 'resultType',
38]
epoger@google.comdcb4e652013-10-11 18:45:33 +000039RESULTS_ALL = 'all'
40RESULTS_FAILURES = 'failures'
epoger@google.comf9d134d2013-09-27 15:02:44 +000041
42class Results(object):
43 """ Loads actual and expected results from all builders, supplying combined
epoger@google.comdcb4e652013-10-11 18:45:33 +000044 reports as requested.
45
epoger@google.comeb832592013-10-23 15:07:26 +000046 Once this object has been constructed, the results (in self._results[])
47 are immutable. If you want to update the results based on updated JSON
48 file contents, you will need to create a new Results object."""
epoger@google.comf9d134d2013-09-27 15:02:44 +000049
50 def __init__(self, actuals_root, expected_root):
51 """
epoger@google.com9fb6c8a2013-10-09 18:05:58 +000052 Args:
epoger@google.comf9d134d2013-09-27 15:02:44 +000053 actuals_root: root directory containing all actual-results.json files
54 expected_root: root directory containing all expected-results.json files
55 """
epoger@google.comeb832592013-10-23 15:07:26 +000056 self._actuals_root = actuals_root
57 self._expected_root = expected_root
58 self._load_actual_and_expected()
epoger@google.com542b65f2013-10-15 20:10:33 +000059 self._timestamp = int(time.time())
60
61 def get_timestamp(self):
62 """Return the time at which this object was created, in seconds past epoch
63 (UTC).
64 """
65 return self._timestamp
epoger@google.comf9d134d2013-09-27 15:02:44 +000066
epoger@google.comeb832592013-10-23 15:07:26 +000067 def edit_expectations(self, modifications):
68 """Edit the expectations stored within this object and write them back
69 to disk.
70
71 Note that this will NOT update the results stored in self._results[] ;
72 in order to see those updates, you must instantiate a new Results object
73 based on the (now updated) files on disk.
74
75 Args:
76 modifications: a list of dictionaries, one for each expectation to update:
77
78 [
79 {
80 'builder': 'Test-Mac10.6-MacMini4.1-GeForce320M-x86-Debug',
81 'test': 'bigmatrix',
82 'config': '8888',
83 'expectedHashType': 'bitmap-64bitMD5',
84 'expectedHashDigest': '10894408024079689926',
85 },
86 ...
87 ]
88
89 TODO(epoger): For now, this does not allow the caller to set any fields
90 other than expectedHashType/expectedHashDigest, and assumes that
91 ignore-failure should be set to False. We need to add support
92 for other fields (notes, bugs, etc.) and ignore-failure=True.
93 """
94 expected_builder_dicts = Results._read_dicts_from_root(self._expected_root)
95 for mod in modifications:
96 image_name = IMAGE_FILENAME_FORMATTER % (mod['test'], mod['config'])
97 # TODO(epoger): assumes a single allowed digest per test
98 allowed_digests = [[mod['expectedHashType'],
99 int(mod['expectedHashDigest'])]]
100 new_expectations = {
101 gm_json.JSONKEY_EXPECTEDRESULTS_ALLOWEDDIGESTS: allowed_digests,
102 gm_json.JSONKEY_EXPECTEDRESULTS_IGNOREFAILURE: False,
103 }
104 builder_dict = expected_builder_dicts[mod['builder']]
105 builder_expectations = builder_dict.get(gm_json.JSONKEY_EXPECTEDRESULTS)
106 if not builder_expectations:
107 builder_expectations = {}
108 builder_dict[gm_json.JSONKEY_EXPECTEDRESULTS] = builder_expectations
109 builder_expectations[image_name] = new_expectations
110 Results._write_dicts_to_root(expected_builder_dicts, self._expected_root)
111
epoger@google.comdcb4e652013-10-11 18:45:33 +0000112 def get_results_of_type(self, type):
113 """Return results of some/all tests (depending on 'type' parameter).
114
115 Args:
116 type: string describing which types of results to include; must be one
117 of the RESULTS_* constants
118
119 Results are returned as a dictionary in this form:
epoger@google.comf9d134d2013-09-27 15:02:44 +0000120
epoger@google.comafaad3d2013-09-30 15:06:25 +0000121 {
epoger@google.com9fb6c8a2013-10-09 18:05:58 +0000122 'categories': # dictionary of categories listed in
epoger@google.comafaad3d2013-09-30 15:06:25 +0000123 # CATEGORIES_TO_SUMMARIZE, with the number of times
124 # each value appears within its category
epoger@google.comf9d134d2013-09-27 15:02:44 +0000125 {
epoger@google.com9fb6c8a2013-10-09 18:05:58 +0000126 'resultType': # category name
epoger@google.comafaad3d2013-09-30 15:06:25 +0000127 {
epoger@google.com9fb6c8a2013-10-09 18:05:58 +0000128 'failed': 29, # category value and total number found of that value
129 'failure-ignored': 948,
130 'no-comparison': 4502,
131 'succeeded': 38609,
epoger@google.comafaad3d2013-09-30 15:06:25 +0000132 },
epoger@google.com9fb6c8a2013-10-09 18:05:58 +0000133 'builder':
epoger@google.comafaad3d2013-09-30 15:06:25 +0000134 {
epoger@google.com9fb6c8a2013-10-09 18:05:58 +0000135 'Test-Mac10.6-MacMini4.1-GeForce320M-x86-Debug': 1286,
136 'Test-Mac10.6-MacMini4.1-GeForce320M-x86-Release': 1134,
epoger@google.comafaad3d2013-09-30 15:06:25 +0000137 ...
138 },
139 ... # other categories from CATEGORIES_TO_SUMMARIZE
epoger@google.com9fb6c8a2013-10-09 18:05:58 +0000140 }, # end of 'categories' dictionary
epoger@google.comafaad3d2013-09-30 15:06:25 +0000141
epoger@google.com9fb6c8a2013-10-09 18:05:58 +0000142 'testData': # list of test results, with a dictionary for each
epoger@google.comafaad3d2013-09-30 15:06:25 +0000143 [
144 {
epoger@google.com9fb6c8a2013-10-09 18:05:58 +0000145 'builder': 'Test-Mac10.6-MacMini4.1-GeForce320M-x86-Debug',
146 'test': 'bigmatrix',
147 'config': '8888',
148 'resultType': 'failed',
149 'expectedHashType': 'bitmap-64bitMD5',
150 'expectedHashDigest': '10894408024079689926',
151 'actualHashType': 'bitmap-64bitMD5',
152 'actualHashDigest': '2409857384569',
epoger@google.comafaad3d2013-09-30 15:06:25 +0000153 },
154 ...
epoger@google.com9fb6c8a2013-10-09 18:05:58 +0000155 ], # end of 'testData' list
epoger@google.comafaad3d2013-09-30 15:06:25 +0000156 }
epoger@google.comf9d134d2013-09-27 15:02:44 +0000157 """
epoger@google.comdcb4e652013-10-11 18:45:33 +0000158 return self._results[type]
epoger@google.comf9d134d2013-09-27 15:02:44 +0000159
160 @staticmethod
epoger@google.comeb832592013-10-23 15:07:26 +0000161 def _read_dicts_from_root(root, pattern='*.json'):
epoger@google.com9fb6c8a2013-10-09 18:05:58 +0000162 """Read all JSON dictionaries within a directory tree.
epoger@google.comf9d134d2013-09-27 15:02:44 +0000163
epoger@google.com9fb6c8a2013-10-09 18:05:58 +0000164 Args:
epoger@google.comf9d134d2013-09-27 15:02:44 +0000165 root: path to root of directory tree
166 pattern: which files to read within root (fnmatch-style pattern)
epoger@google.com9fb6c8a2013-10-09 18:05:58 +0000167
168 Returns:
169 A meta-dictionary containing all the JSON dictionaries found within
170 the directory tree, keyed by the builder name of each dictionary.
epoger@google.com542b65f2013-10-15 20:10:33 +0000171
172 Raises:
173 IOError if root does not refer to an existing directory
epoger@google.comf9d134d2013-09-27 15:02:44 +0000174 """
epoger@google.com542b65f2013-10-15 20:10:33 +0000175 if not os.path.isdir(root):
176 raise IOError('no directory found at path %s' % root)
epoger@google.comf9d134d2013-09-27 15:02:44 +0000177 meta_dict = {}
178 for dirpath, dirnames, filenames in os.walk(root):
179 for matching_filename in fnmatch.filter(filenames, pattern):
180 builder = os.path.basename(dirpath)
epoger@google.comeb832592013-10-23 15:07:26 +0000181 # If we are reading from the collection of actual results, skip over
182 # the Trybot results (we don't maintain baselines for them).
epoger@google.comf9d134d2013-09-27 15:02:44 +0000183 if builder.endswith('-Trybot'):
184 continue
185 fullpath = os.path.join(dirpath, matching_filename)
186 meta_dict[builder] = gm_json.LoadFromFile(fullpath)
187 return meta_dict
188
epoger@google.comeb832592013-10-23 15:07:26 +0000189 @staticmethod
190 def _write_dicts_to_root(meta_dict, root, pattern='*.json'):
191 """Write all per-builder dictionaries within meta_dict to files under
192 the root path.
193
194 Security note: this will only write to files that already exist within
195 the root path (as found by os.walk() within root), so we don't need to
196 worry about malformed content writing to disk outside of root.
197 However, the data written to those files is not double-checked, so it
198 could contain poisonous data.
199
200 Args:
201 meta_dict: a builder-keyed meta-dictionary containing all the JSON
202 dictionaries we want to write out
203 root: path to root of directory tree within which to write files
204 pattern: which files to write within root (fnmatch-style pattern)
205
206 Raises:
207 IOError if root does not refer to an existing directory
208 KeyError if the set of per-builder dictionaries written out was
209 different than expected
210 """
211 if not os.path.isdir(root):
212 raise IOError('no directory found at path %s' % root)
213 actual_builders_written = []
214 for dirpath, dirnames, filenames in os.walk(root):
215 for matching_filename in fnmatch.filter(filenames, pattern):
216 builder = os.path.basename(dirpath)
217 # We should never encounter Trybot *expectations*, but if we are
218 # writing into the actual-results dir, skip the Trybot actuals.
219 # (I don't know why we would ever write into the actual-results dir,
220 # though.)
221 if builder.endswith('-Trybot'):
222 continue
223 per_builder_dict = meta_dict.get(builder)
224 if per_builder_dict:
225 fullpath = os.path.join(dirpath, matching_filename)
226 gm_json.WriteToFile(per_builder_dict, fullpath)
227 actual_builders_written.append(builder)
228
229 # Check: did we write out the set of per-builder dictionaries we
230 # expected to?
231 expected_builders_written = sorted(meta_dict.keys())
232 actual_builders_written.sort()
233 if expected_builders_written != actual_builders_written:
234 raise KeyError(
235 'expected to write dicts for builders %s, but actually wrote them '
236 'for builders %s' % (
237 expected_builders_written, actual_builders_written))
238
239 def _load_actual_and_expected(self):
240 """Loads the results of all tests, across all builders (based on the
241 files within self._actuals_root and self._expected_root),
epoger@google.comdcb4e652013-10-11 18:45:33 +0000242 and stores them in self._results.
epoger@google.comf9d134d2013-09-27 15:02:44 +0000243 """
epoger@google.comeb832592013-10-23 15:07:26 +0000244 actual_builder_dicts = Results._read_dicts_from_root(self._actuals_root)
245 expected_builder_dicts = Results._read_dicts_from_root(self._expected_root)
246
epoger@google.comdcb4e652013-10-11 18:45:33 +0000247 categories_all = {}
248 categories_failures = {}
249 Results._ensure_included_in_category_dict(categories_all,
250 'resultType', [
epoger@google.com5f2bb002013-10-02 18:57:48 +0000251 gm_json.JSONKEY_ACTUALRESULTS_FAILED,
252 gm_json.JSONKEY_ACTUALRESULTS_FAILUREIGNORED,
253 gm_json.JSONKEY_ACTUALRESULTS_NOCOMPARISON,
254 gm_json.JSONKEY_ACTUALRESULTS_SUCCEEDED,
255 ])
epoger@google.comdcb4e652013-10-11 18:45:33 +0000256 Results._ensure_included_in_category_dict(categories_failures,
257 'resultType', [
258 gm_json.JSONKEY_ACTUALRESULTS_FAILED,
259 gm_json.JSONKEY_ACTUALRESULTS_FAILUREIGNORED,
260 gm_json.JSONKEY_ACTUALRESULTS_NOCOMPARISON,
261 ])
epoger@google.com5f2bb002013-10-02 18:57:48 +0000262
epoger@google.comdcb4e652013-10-11 18:45:33 +0000263 data_all = []
264 data_failures = []
epoger@google.comeb832592013-10-23 15:07:26 +0000265 for builder in sorted(actual_builder_dicts.keys()):
epoger@google.comf9d134d2013-09-27 15:02:44 +0000266 actual_results_for_this_builder = (
epoger@google.comeb832592013-10-23 15:07:26 +0000267 actual_builder_dicts[builder][gm_json.JSONKEY_ACTUALRESULTS])
epoger@google.comf9d134d2013-09-27 15:02:44 +0000268 for result_type in sorted(actual_results_for_this_builder.keys()):
269 results_of_this_type = actual_results_for_this_builder[result_type]
270 if not results_of_this_type:
271 continue
272 for image_name in sorted(results_of_this_type.keys()):
273 actual_image = results_of_this_type[image_name]
274 try:
275 # TODO(epoger): assumes a single allowed digest per test
276 expected_image = (
epoger@google.comeb832592013-10-23 15:07:26 +0000277 expected_builder_dicts
epoger@google.comf9d134d2013-09-27 15:02:44 +0000278 [builder][gm_json.JSONKEY_EXPECTEDRESULTS]
279 [image_name][gm_json.JSONKEY_EXPECTEDRESULTS_ALLOWEDDIGESTS]
280 [0])
281 except (KeyError, TypeError):
282 # There are several cases in which we would expect to find
283 # no expectations for a given test:
284 #
285 # 1. result_type == NOCOMPARISON
286 # There are no expectations for this test yet!
287 #
288 # 2. ignore-tests.txt
289 # If a test has been listed in ignore-tests.txt, then its status
290 # may show as FAILUREIGNORED even if it doesn't have any
291 # expectations yet.
292 #
293 # 3. alternate rendering mode failures (e.g. serialized)
294 # In cases like
295 # https://code.google.com/p/skia/issues/detail?id=1684
296 # ('tileimagefilter GM test failing in serialized render mode'),
297 # the gm-actuals will list a failure for the alternate
298 # rendering mode even though we don't have explicit expectations
299 # for the test (the implicit expectation is that it must
300 # render the same in all rendering modes).
301 #
302 # Don't log types 1 or 2, because they are common.
303 # Log other types, because they are rare and we should know about
304 # them, but don't throw an exception, because we need to keep our
305 # tools working in the meanwhile!
306 if result_type not in [
307 gm_json.JSONKEY_ACTUALRESULTS_NOCOMPARISON,
308 gm_json.JSONKEY_ACTUALRESULTS_FAILUREIGNORED] :
epoger@google.comdcb4e652013-10-11 18:45:33 +0000309 logging.warning('No expectations found for test: %s' % {
epoger@google.comf9d134d2013-09-27 15:02:44 +0000310 'builder': builder,
311 'image_name': image_name,
312 'result_type': result_type,
epoger@google.comdcb4e652013-10-11 18:45:33 +0000313 })
epoger@google.comf9d134d2013-09-27 15:02:44 +0000314 expected_image = [None, None]
315
316 # If this test was recently rebaselined, it will remain in
epoger@google.com9fb6c8a2013-10-09 18:05:58 +0000317 # the 'failed' set of actuals until all the bots have
epoger@google.comf9d134d2013-09-27 15:02:44 +0000318 # cycled (although the expectations have indeed been set
319 # from the most recent actuals). Treat these as successes
320 # instead of failures.
321 #
322 # TODO(epoger): Do we need to do something similar in
323 # other cases, such as when we have recently marked a test
epoger@google.com9fb6c8a2013-10-09 18:05:58 +0000324 # as ignoreFailure but it still shows up in the 'failed'
epoger@google.comf9d134d2013-09-27 15:02:44 +0000325 # category? Maybe we should not rely on the result_type
326 # categories recorded within the gm_actuals AT ALL, and
327 # instead evaluate the result_type ourselves based on what
328 # we see in expectations vs actual checksum?
329 if expected_image == actual_image:
330 updated_result_type = gm_json.JSONKEY_ACTUALRESULTS_SUCCEEDED
331 else:
332 updated_result_type = result_type
333
epoger@google.comf9d134d2013-09-27 15:02:44 +0000334 (test, config) = IMAGE_FILENAME_RE.match(image_name).groups()
epoger@google.comafaad3d2013-09-30 15:06:25 +0000335 results_for_this_test = {
epoger@google.com9fb6c8a2013-10-09 18:05:58 +0000336 'builder': builder,
337 'test': test,
338 'config': config,
339 'resultType': updated_result_type,
340 'actualHashType': actual_image[0],
341 'actualHashDigest': str(actual_image[1]),
342 'expectedHashType': expected_image[0],
343 'expectedHashDigest': str(expected_image[1]),
epoger@google.comafaad3d2013-09-30 15:06:25 +0000344 }
epoger@google.comdcb4e652013-10-11 18:45:33 +0000345 Results._add_to_category_dict(categories_all, results_for_this_test)
346 data_all.append(results_for_this_test)
347 if updated_result_type != gm_json.JSONKEY_ACTUALRESULTS_SUCCEEDED:
348 Results._add_to_category_dict(categories_failures,
349 results_for_this_test)
350 data_failures.append(results_for_this_test)
351
352 self._results = {
353 RESULTS_ALL:
354 {'categories': categories_all, 'testData': data_all},
355 RESULTS_FAILURES:
356 {'categories': categories_failures, 'testData': data_failures},
357 }
epoger@google.comafaad3d2013-09-30 15:06:25 +0000358
359 @staticmethod
epoger@google.comdcb4e652013-10-11 18:45:33 +0000360 def _add_to_category_dict(category_dict, test_results):
epoger@google.com5f2bb002013-10-02 18:57:48 +0000361 """Add test_results to the category dictionary we are building.
epoger@google.comdcb4e652013-10-11 18:45:33 +0000362 (See documentation of self.get_results_of_type() for the format of this
363 dictionary.)
epoger@google.comafaad3d2013-09-30 15:06:25 +0000364
epoger@google.com9fb6c8a2013-10-09 18:05:58 +0000365 Args:
epoger@google.comafaad3d2013-09-30 15:06:25 +0000366 category_dict: category dict-of-dicts to add to; modify this in-place
367 test_results: test data with which to update category_list, in a dict:
368 {
epoger@google.com9fb6c8a2013-10-09 18:05:58 +0000369 'category_name': 'category_value',
370 'category_name': 'category_value',
epoger@google.comafaad3d2013-09-30 15:06:25 +0000371 ...
372 }
373 """
374 for category in CATEGORIES_TO_SUMMARIZE:
375 category_value = test_results.get(category)
376 if not category_value:
377 continue # test_results did not include this category, keep going
378 if not category_dict.get(category):
379 category_dict[category] = {}
380 if not category_dict[category].get(category_value):
381 category_dict[category][category_value] = 0
382 category_dict[category][category_value] += 1
epoger@google.com5f2bb002013-10-02 18:57:48 +0000383
384 @staticmethod
epoger@google.comdcb4e652013-10-11 18:45:33 +0000385 def _ensure_included_in_category_dict(category_dict,
386 category_name, category_values):
epoger@google.com5f2bb002013-10-02 18:57:48 +0000387 """Ensure that the category name/value pairs are included in category_dict,
388 even if there aren't any results with that name/value pair.
epoger@google.comdcb4e652013-10-11 18:45:33 +0000389 (See documentation of self.get_results_of_type() for the format of this
390 dictionary.)
epoger@google.com5f2bb002013-10-02 18:57:48 +0000391
epoger@google.com9fb6c8a2013-10-09 18:05:58 +0000392 Args:
epoger@google.com5f2bb002013-10-02 18:57:48 +0000393 category_dict: category dict-of-dicts to modify
394 category_name: category name, as a string
395 category_values: list of values we want to make sure are represented
396 for this category
397 """
398 if not category_dict.get(category_name):
399 category_dict[category_name] = {}
400 for category_value in category_values:
401 if not category_dict[category_name].get(category_value):
402 category_dict[category_name][category_value] = 0