| #!/usr/bin/python |
| |
| ''' |
| Copyright 2013 Google Inc. |
| |
| Use of this source code is governed by a BSD-style license that can be |
| found in the LICENSE file. |
| ''' |
| |
| ''' |
| Repackage expected/actual GM results as needed by our HTML rebaseline viewer. |
| ''' |
| |
| # System-level imports |
| import fnmatch |
| import json |
| import os |
| import re |
| import sys |
| |
| # Imports from within Skia |
| # |
| # We need to add the 'gm' directory, so that we can import gm_json.py within |
| # that directory. That script allows us to parse the actual-results.json file |
| # written out by the GM tool. |
| # Make sure that the 'gm' dir is in the PYTHONPATH, but add it at the *end* |
| # so any dirs that are already in the PYTHONPATH will be preferred. |
| GM_DIRECTORY = os.path.dirname(os.path.dirname(os.path.realpath(__file__))) |
| if GM_DIRECTORY not in sys.path: |
| sys.path.append(GM_DIRECTORY) |
| import gm_json |
| |
| IMAGE_FILENAME_RE = re.compile(gm_json.IMAGE_FILENAME_PATTERN) |
| CATEGORIES_TO_SUMMARIZE = [ |
| 'builder', 'test', 'config', 'resultType', |
| ] |
| |
| class Results(object): |
| """ Loads actual and expected results from all builders, supplying combined |
| reports as requested. """ |
| |
| def __init__(self, actuals_root, expected_root): |
| """ |
| params: |
| actuals_root: root directory containing all actual-results.json files |
| expected_root: root directory containing all expected-results.json files |
| """ |
| self._actual_builder_dicts = Results._GetDictsFromRoot(actuals_root) |
| self._expected_builder_dicts = Results._GetDictsFromRoot(expected_root) |
| self._all_results = Results._Combine( |
| actual_builder_dicts=self._actual_builder_dicts, |
| expected_builder_dicts=self._expected_builder_dicts) |
| |
| def GetAll(self): |
| """Return results of all tests, as a dictionary in this form: |
| |
| { |
| "categories": # dictionary of categories listed in |
| # CATEGORIES_TO_SUMMARIZE, with the number of times |
| # each value appears within its category |
| { |
| "resultType": # category name |
| { |
| "failed": 29, # category value and total number found of that value |
| "failure-ignored": 948, |
| "no-comparison": 4502, |
| "succeeded": 38609, |
| }, |
| "builder": |
| { |
| "Test-Mac10.6-MacMini4.1-GeForce320M-x86-Debug": 1286, |
| "Test-Mac10.6-MacMini4.1-GeForce320M-x86-Release": 1134, |
| ... |
| }, |
| ... # other categories from CATEGORIES_TO_SUMMARIZE |
| }, # end of "categories" dictionary |
| |
| "testData": # list of test results, with a dictionary for each |
| [ |
| { |
| "builder": "Test-Mac10.6-MacMini4.1-GeForce320M-x86-Debug", |
| "test": "bigmatrix", |
| "config": "8888", |
| "resultType": "failed", |
| "expectedHashType": "bitmap-64bitMD5", |
| "expectedHashDigest": "10894408024079689926", |
| "actualHashType": "bitmap-64bitMD5", |
| "actualHashDigest": "2409857384569", |
| }, |
| ... |
| ], # end of "testData" list |
| } |
| """ |
| return self._all_results |
| |
| @staticmethod |
| def _GetDictsFromRoot(root, pattern='*.json'): |
| """Read all JSON dictionaries within a directory tree, returning them within |
| a meta-dictionary (keyed by the builder name for each dictionary). |
| |
| params: |
| root: path to root of directory tree |
| pattern: which files to read within root (fnmatch-style pattern) |
| """ |
| meta_dict = {} |
| for dirpath, dirnames, filenames in os.walk(root): |
| for matching_filename in fnmatch.filter(filenames, pattern): |
| builder = os.path.basename(dirpath) |
| if builder.endswith('-Trybot'): |
| continue |
| fullpath = os.path.join(dirpath, matching_filename) |
| meta_dict[builder] = gm_json.LoadFromFile(fullpath) |
| return meta_dict |
| |
| @staticmethod |
| def _Combine(actual_builder_dicts, expected_builder_dicts): |
| """Gathers the results of all tests, across all builders (based on the |
| contents of actual_builder_dicts and expected_builder_dicts) |
| and returns it in a list in the same form needed for self.GetAll(). |
| |
| This is a static method, because once we start refreshing results |
| asynchronously, we need to make sure we are not corrupting the object's |
| member variables. |
| """ |
| test_data = [] |
| category_dict = {} |
| for builder in sorted(actual_builder_dicts.keys()): |
| actual_results_for_this_builder = ( |
| actual_builder_dicts[builder][gm_json.JSONKEY_ACTUALRESULTS]) |
| for result_type in sorted(actual_results_for_this_builder.keys()): |
| results_of_this_type = actual_results_for_this_builder[result_type] |
| if not results_of_this_type: |
| continue |
| for image_name in sorted(results_of_this_type.keys()): |
| actual_image = results_of_this_type[image_name] |
| try: |
| # TODO(epoger): assumes a single allowed digest per test |
| expected_image = ( |
| expected_builder_dicts |
| [builder][gm_json.JSONKEY_EXPECTEDRESULTS] |
| [image_name][gm_json.JSONKEY_EXPECTEDRESULTS_ALLOWEDDIGESTS] |
| [0]) |
| except (KeyError, TypeError): |
| # There are several cases in which we would expect to find |
| # no expectations for a given test: |
| # |
| # 1. result_type == NOCOMPARISON |
| # There are no expectations for this test yet! |
| # |
| # 2. ignore-tests.txt |
| # If a test has been listed in ignore-tests.txt, then its status |
| # may show as FAILUREIGNORED even if it doesn't have any |
| # expectations yet. |
| # |
| # 3. alternate rendering mode failures (e.g. serialized) |
| # In cases like |
| # https://code.google.com/p/skia/issues/detail?id=1684 |
| # ('tileimagefilter GM test failing in serialized render mode'), |
| # the gm-actuals will list a failure for the alternate |
| # rendering mode even though we don't have explicit expectations |
| # for the test (the implicit expectation is that it must |
| # render the same in all rendering modes). |
| # |
| # Don't log types 1 or 2, because they are common. |
| # Log other types, because they are rare and we should know about |
| # them, but don't throw an exception, because we need to keep our |
| # tools working in the meanwhile! |
| if result_type not in [ |
| gm_json.JSONKEY_ACTUALRESULTS_NOCOMPARISON, |
| gm_json.JSONKEY_ACTUALRESULTS_FAILUREIGNORED] : |
| print 'WARNING: No expectations found for test: %s' % { |
| 'builder': builder, |
| 'image_name': image_name, |
| 'result_type': result_type, |
| } |
| expected_image = [None, None] |
| |
| # If this test was recently rebaselined, it will remain in |
| # the "failed" set of actuals until all the bots have |
| # cycled (although the expectations have indeed been set |
| # from the most recent actuals). Treat these as successes |
| # instead of failures. |
| # |
| # TODO(epoger): Do we need to do something similar in |
| # other cases, such as when we have recently marked a test |
| # as ignoreFailure but it still shows up in the "failed" |
| # category? Maybe we should not rely on the result_type |
| # categories recorded within the gm_actuals AT ALL, and |
| # instead evaluate the result_type ourselves based on what |
| # we see in expectations vs actual checksum? |
| if expected_image == actual_image: |
| updated_result_type = gm_json.JSONKEY_ACTUALRESULTS_SUCCEEDED |
| else: |
| updated_result_type = result_type |
| |
| (test, config) = IMAGE_FILENAME_RE.match(image_name).groups() |
| results_for_this_test = { |
| "builder": builder, |
| "test": test, |
| "config": config, |
| "resultType": updated_result_type, |
| "actualHashType": actual_image[0], |
| "actualHashDigest": str(actual_image[1]), |
| "expectedHashType": expected_image[0], |
| "expectedHashDigest": str(expected_image[1]), |
| } |
| Results._AddToCategoryDict(category_dict, results_for_this_test) |
| |
| # TODO(epoger): For now, don't include succeeded results in the raw |
| # data. There are so many of them that they make the client too slow. |
| if updated_result_type != gm_json.JSONKEY_ACTUALRESULTS_SUCCEEDED: |
| test_data.append(results_for_this_test) |
| return {"categories": category_dict, "testData": test_data} |
| |
| @staticmethod |
| def _AddToCategoryDict(category_dict, test_results): |
| """Add test_results to the category dictionary we are building |
| (see documentation of self.GetAll() for the format of this dictionary). |
| |
| params: |
| category_dict: category dict-of-dicts to add to; modify this in-place |
| test_results: test data with which to update category_list, in a dict: |
| { |
| "category_name": "category_value", |
| "category_name": "category_value", |
| ... |
| } |
| """ |
| for category in CATEGORIES_TO_SUMMARIZE: |
| category_value = test_results.get(category) |
| if not category_value: |
| continue # test_results did not include this category, keep going |
| if not category_dict.get(category): |
| category_dict[category] = {} |
| if not category_dict[category].get(category_value): |
| category_dict[category][category_value] = 0 |
| category_dict[category][category_value] += 1 |