download and rebaseline images using server

BUG=
R=epoger@google.com

Review URL: https://codereview.chromium.org/20654006

git-svn-id: http://skia.googlecode.com/svn/trunk@10607 2bbb7eff-a529-9590-31e7-b0007b416f81
diff --git a/tools/skpdiff/skpdiff_server.py b/tools/skpdiff/skpdiff_server.py
index 79ffc83..1403540 100755
--- a/tools/skpdiff/skpdiff_server.py
+++ b/tools/skpdiff/skpdiff_server.py
@@ -8,6 +8,7 @@
 import os
 import os.path
 import re
+import subprocess
 import sys
 import tempfile
 import urllib2
@@ -99,7 +100,7 @@
     @param hash_val   The hash value of the image.
     """
 
-    # Seperate the test name from a image name
+    # Separate the test name from a image name
     image_match = IMAGE_FILENAME_RE.match(image_name)
     test_name = image_match.group(1)
 
@@ -111,71 +112,6 @@
     download_file(image_url, image_path)
 
 
-def download_changed_images(expectations_dir, expected_name, updated_name,
-                            expected_image_dir, actual_image_dir):
-
-    """Download the expected and actual GMs that changed into the given paths.
-    Determining what changed will be done by comparing each expected_name JSON
-    results file to its corresponding updated_name JSON results file if it
-    exists.
-
-    @param expectations_dir   The directory to traverse for results files. This
-           should resmble expectations/gm in the Skia trunk.
-    @param expected_name      The name of the expected result files. These are
-           in the format of expected-results.json.
-    @param updated_name       The name of the updated expected result files.
-           Normally this matches --expectations-filename-output for the
-           rebaseline.py tool.
-    @param expected_image_dir The directory to place downloaded expected images
-           into.
-    @param actual_image_dir   The directory to place downloaded actual images
-           into.
-    """
-
-    differ = jsondiff.GMDiffer()
-
-    # Look through expectations for hashes that changed
-    for root, dirs, files in os.walk(expectations_dir):
-        for expectation_file in files:
-            # There are many files in the expectations directory. We only care
-            # about expected results.
-            if expectation_file != expected_name:
-                continue
-
-            # Get the name of the results file, and be sure there is an updated
-            # result to compare against. If there is not, there is no point in
-            # diffing this device.
-            expected_file_path = os.path.join(root, expected_name)
-            updated_file_path = os.path.join(root, updated_name)
-            if not os.path.isfile(updated_file_path):
-                continue
-
-            # Find all expectations that did not match.
-            expected_diff = differ.GenerateDiffDict(expected_file_path,
-                                                    updated_file_path)
-
-            # The name of the device corresponds to the name of the folder we
-            # are in.
-            device_name = os.path.basename(root)
-
-            # Create name prefixes to store the devices old and new GM results
-            expected_image_prefix = os.path.join(expected_image_dir,
-                                                 device_name) + '-'
-
-            actual_image_prefix = os.path.join(actual_image_dir,
-                                               device_name) + '-'
-
-            # Download each image that had a differing result
-            for image_name, hashes in expected_diff.iteritems():
-                print('Downloading', image_name, 'for device', device_name)
-                download_gm_image(image_name,
-                                  expected_image_prefix + image_name,
-                                  hashes['old'])
-                download_gm_image(image_name,
-                                  actual_image_prefix + image_name,
-                                  hashes['new'])
-
-
 def get_image_set_from_skpdiff(skpdiff_records):
     """Get the set of all images references in the given records.
 
@@ -186,6 +122,249 @@
     return expected_set | actual_set
 
 
+def set_expected_hash_in_json(expected_results_json, image_name, hash_value):
+    """Set the expected hash for the object extracted from
+    expected-results.json. Note that this only work with bitmap-64bitMD5 hash
+    types.
+
+    @param expected_results_json The Python dictionary with the results to
+    modify.
+    @param image_name            The name of the image to set the hash of.
+    @param hash_value            The hash to set for the image.
+    """
+    expected_results = expected_results_json[gm_json.JSONKEY_EXPECTEDRESULTS]
+
+    if image_name in expected_results:
+        expected_results[image_name][gm_json.JSONKEY_EXPECTEDRESULTS_ALLOWEDDIGESTS][0][1] = hash_value
+    else:
+        expected_results[image_name] = {
+            gm_json.JSONKEY_EXPECTEDRESULTS_ALLOWEDDIGESTS:
+            [
+                [
+                    gm_json.JSONKEY_HASHTYPE_BITMAP_64BITMD5,
+                    hash_value
+                ]
+            ]
+        }
+
+
+def get_head_version(path):
+    """Get the version of the file at the given path stored inside the HEAD of
+    the git repository. It is returned as a string.
+
+    @param path The path of the file whose HEAD is returned. It is assumed the
+    path is inside a git repo rooted at SKIA_ROOT_DIR.
+    """
+
+    # git-show will not work with absolute paths. This ensures we give it a path
+    # relative to the skia root.
+    git_path = os.path.relpath(path, SKIA_ROOT_DIR)
+    git_show_proc = subprocess.Popen(['git', 'show', 'HEAD:' + git_path],
+                                     stdout=subprocess.PIPE)
+
+    # When invoked outside a shell, git will output the last committed version
+    # of the file directly to stdout.
+    git_version_content, _ = git_show_proc.communicate()
+    return git_version_content
+
+
+class GMInstance:
+    """Information about a GM test result on a specific device:
+     - device_name = the name of the device that rendered it
+     - image_name = the GM test name and config
+     - expected_hash = the current expected hash value
+     - actual_hash = the actual hash value
+    """
+    def __init__(self,
+                 device_name, image_name,
+                 expected_hash, actual_hash):
+        self.device_name = device_name
+        self.image_name = image_name
+        self.expected_hash = expected_hash
+        self.actual_hash = actual_hash
+
+
+class ExpectationsManager:
+    def __init__(self, expectations_dir, expected_name, updated_name,
+                 skpdiff_path):
+        """
+        @param expectations_dir   The directory to traverse for results files.
+               This should resemble expectations/gm in the Skia trunk.
+        @param expected_name      The name of the expected result files. These
+               are in the format of expected-results.json.
+        @param updated_name       The name of the updated expected result files.
+               Normally this matches --expectations-filename-output for the
+               rebaseline.py tool.
+        @param skpdiff_path       The path used to execute the skpdiff command.
+        """
+        self._expectations_dir = expectations_dir
+        self._expected_name = expected_name
+        self._updated_name = updated_name
+        self._skpdiff_path = skpdiff_path
+        self._generate_gm_comparison()
+
+    def _generate_gm_comparison(self):
+        """Generate all the data needed to compare GMs:
+           - determine which GMs changed
+           - download the changed images
+           - compare them with skpdiff
+        """
+
+        # Get the expectations and compare them with actual hashes
+        self._get_expectations()
+
+
+        # Create a temporary file tree that makes sense for skpdiff to operate
+        # on.
+        image_output_dir = tempfile.mkdtemp('skpdiff')
+        expected_image_dir = os.path.join(image_output_dir, 'expected')
+        actual_image_dir = os.path.join(image_output_dir, 'actual')
+        os.mkdir(expected_image_dir)
+        os.mkdir(actual_image_dir)
+
+        # Download expected and actual images that differed into the temporary
+        # file tree.
+        self._download_expectation_images(expected_image_dir, actual_image_dir)
+
+        # Invoke skpdiff with our downloaded images and place its results in the
+        # temporary directory.
+        self.skpdiff_output_path = os.path.join(image_output_dir,
+                                                'skpdiff_output.json')
+        skpdiff_cmd = SKPDIFF_INVOKE_FORMAT.format(self._skpdiff_path,
+                                                   self.skpdiff_output_path,
+                                                   expected_image_dir,
+                                                   actual_image_dir)
+        os.system(skpdiff_cmd)
+
+
+    def _get_expectations(self):
+        """Fills self._expectations with GMInstance objects for each test whose
+        expectation is different between the following two files:
+         - the local filesystem's updated results file
+         - git's head version of the expected results file
+        """
+        differ = jsondiff.GMDiffer()
+        self._expectations = []
+        for root, dirs, files in os.walk(self._expectations_dir):
+            for expectation_file in files:
+                # There are many files in the expectations directory. We only
+                # care about expected results.
+                if expectation_file != self._expected_name:
+                    continue
+
+                # Get the name of the results file, and be sure there is an
+                # updated result to compare against. If there is not, there is
+                # no point in diffing this device.
+                expected_file_path = os.path.join(root, self._expected_name)
+                updated_file_path = os.path.join(root, self._updated_name)
+                if not os.path.isfile(updated_file_path):
+                    continue
+
+                # Always get the expected results from git because we may have
+                # changed them in a previous instance of the server.
+                expected_contents = get_head_version(expected_file_path)
+                updated_contents = None
+                with open(updated_file_path, 'rb') as updated_file:
+                    updated_contents = updated_file.read()
+
+                # Find all expectations that did not match.
+                expected_diff = differ.GenerateDiffDictFromStrings(
+                    expected_contents,
+                    updated_contents)
+
+                # The name of the device corresponds to the name of the folder
+                # we are in.
+                device_name = os.path.basename(root)
+
+                # Store old and new versions of the expectation for each GM
+                for image_name, hashes in expected_diff.iteritems():
+                    self._expectations.append(
+                        GMInstance(device_name, image_name,
+                                   hashes['old'], hashes['new']))
+
+
+    def _download_expectation_images(self, expected_image_dir, actual_image_dir):
+        """Download the expected and actual images for the _expectations array.
+
+        @param expected_image_dir The directory to download expected images
+               into.
+        @param actual_image_dir   The directory to download actual images into.
+        """
+        image_map = {}
+
+        # Look through expectations and download their images.
+        for expectation in self._expectations:
+            # Build appropriate paths to download the images into.
+            expected_image_path = os.path.join(expected_image_dir,
+                                               expectation.device_name + '-' +
+                                               expectation.image_name)
+
+            actual_image_path = os.path.join(actual_image_dir,
+                                             expectation.device_name + '-' +
+                                             expectation.image_name)
+
+            print('Downloading %s for device %s' % (
+                expectation.image_name, expectation.device_name))
+
+            # Download images
+            download_gm_image(expectation.image_name,
+                              expected_image_path,
+                              expectation.expected_hash)
+
+            download_gm_image(expectation.image_name,
+                              actual_image_path,
+                              expectation.actual_hash)
+
+            # Annotate the expectations with where the images were downloaded
+            # to.
+            expectation.expected_image_path = expected_image_path
+            expectation.actual_image_path = actual_image_path
+
+            # Map the image paths back to the expectations.
+            image_map[expected_image_path] = (False, expectation)
+            image_map[actual_image_path] = (True, expectation)
+
+        self.image_map = image_map
+
+    def _set_expected_hash(self, device_name, image_name, hash_value):
+        """Set the expected hash for the image of the given device. This always
+        writes directly to the expected results file of the given device
+
+        @param device_name The name of the device to write the hash to.
+        @param image_name  The name of the image whose hash to set.
+        @param hash_value  The value of the hash to set.
+        """
+
+        # Retrieve the expected results file as it is in the working tree
+        json_path = os.path.join(self._expectations_dir, device_name,
+                                 self._expected_name)
+        expectations = gm_json.LoadFromFile(json_path)
+
+        # Set the specified hash.
+        set_expected_hash_in_json(expectations, image_name, hash_value)
+
+        # Write it out to disk using gm_json to keep the formatting consistent.
+        gm_json.WriteToFile(expectations, json_path)
+
+    def use_hash_of(self, image_path):
+        """Determine the hash of the image at the path using the records, and
+        set it as the expected hash for its device and image config.
+
+        @param image_path The path of the image as it was stored in the output
+               of skpdiff_path
+        """
+
+        # Get the metadata about the image at the path.
+        is_actual, expectation = self.image_map[image_path]
+
+        expectation_hash = expectation.actual_hash if is_actual else\
+                           expectation.expected_hash
+
+        # Write out that image's hash directly to the expected results file.
+        self._set_expected_hash(expectation.device_name, expectation.image_name,
+                                expectation_hash)
+
+
 class SkPDiffHandler(BaseHTTPServer.BaseHTTPRequestHandler):
     def send_file(self, file_path):
         # Grab the extension if there is one
@@ -211,7 +390,6 @@
         # under the dir_path. This is to prevent accidentally serving files
         # outside the directory intended using symlinks, or '../'.
         real_path = os.path.normpath(os.path.join(dir_path, file_path))
-        print(repr(real_path))
         if os.path.commonprefix([real_path, dir_path]) == dir_path:
             if os.path.isfile(real_path):
                 self.send_file(real_path)
@@ -248,12 +426,26 @@
         # If no file to send was found, just give the standard 404
         self.send_error(404)
 
+    def do_POST(self):
+        if self.path == '/set_hash':
+            content_length = int(self.headers['Content-length'])
+            request_data = json.loads(self.rfile.read(content_length))
+            self.server.expectations_manager.use_hash_of(request_data['path'])
+            self.send_response(200)
+            self.send_header('Content-type', 'application/json')
+            self.end_headers()
+            self.wfile.write('{"success":true}')
+            return
 
-def run_server(skpdiff_output_path, port=8080):
+        # If the we have no handler for this path, give em' the 404
+        self.send_error(404)
+
+
+def run_server(expectations_manager, port=8080):
     # Preload the skpdiff results file. This is so we can perform some
     # processing on it.
     skpdiff_output_json = ''
-    with open(skpdiff_output_path, 'rb') as records_file:
+    with open(expectations_manager.skpdiff_output_path, 'rb') as records_file:
         skpdiff_output_json = records_file.read()
 
     # It's important to parse the results file so that we can make a set of
@@ -264,7 +456,8 @@
     # Add JSONP padding to the JSON because the web page expects it. It expects
     # it because it was designed to run with or without a web server. Without a
     # web server, the only way to load JSON is with JSONP.
-    skpdiff_output_json = 'var SkPDiffRecords = ' + skpdiff_output_json
+    skpdiff_output_json = ('var SkPDiffRecords = ' +
+                           json.dumps({'records': skpdiff_records}) + ';')
 
     # Do not bind to interfaces other than localhost because the server will
     # attempt to serve files relative to the root directory as a last resort
@@ -274,7 +467,8 @@
     http_server = BaseHTTPServer.HTTPServer(server_address, SkPDiffHandler)
     http_server.image_set = image_set
     http_server.skpdiff_output_json = skpdiff_output_json
-    print('Navigate thine browser to: http://{}:{}'.format(*server_address))
+    http_server.expectations_manager = expectations_manager
+    print('Navigate thine browser to: http://{}:{}/'.format(*server_address))
     http_server.serve_forever()
 
 
@@ -320,39 +514,19 @@
     if skpdiff_path is None:
         sys.exit(1)
 
-    # Create a temporary file tree that makes sense for skpdiff.to operate on
-    image_output_dir = tempfile.mkdtemp('skpdiff')
-    expected_image_dir = os.path.join(image_output_dir, 'expected')
-    actual_image_dir = os.path.join(image_output_dir, 'actual')
-    os.mkdir(expected_image_dir)
-    os.mkdir(actual_image_dir)
-
     # Print out the paths of things for easier debugging
     print('script dir         :', SCRIPT_DIR)
     print('tools dir          :', TOOLS_DIR)
     print('root dir           :', SKIA_ROOT_DIR)
     print('expectations dir   :', args['expectations_dir'])
     print('skpdiff path       :', skpdiff_path)
-    print('tmp dir            :', image_output_dir)
-    print('expected image dir :', expected_image_dir)
-    print('actual image dir   :', actual_image_dir)
 
-    # Download expected and actual images that differed into the temporary file
-    # tree.
-    download_changed_images(args['expectations_dir'],
-                            args['expected'],   args['updated'],
-                            expected_image_dir, actual_image_dir)
+    expectations_manager = ExpectationsManager(args['expectations_dir'],
+                                               args['expected'],
+                                               args['updated'],
+                                               skpdiff_path)
 
-    # Invoke skpdiff with our downloaded images and place its results in the
-    # temporary directory.
-    skpdiff_output_path = os.path.join(image_output_dir, 'skpdiff_output.json')
-    skpdiff_cmd = SKPDIFF_INVOKE_FORMAT.format(skpdiff_path,
-                                               skpdiff_output_path,
-                                               expected_image_dir,
-                                               actual_image_dir)
-    os.system(skpdiff_cmd)
-
-    run_server(skpdiff_output_path, port=args['port'])
+    run_server(expectations_manager, port=args['port'])
 
 if __name__ == '__main__':
     main()