svndiff.py: add ability to compare before-and-after JSON files, not just raw images

This should complete step 3 of https://goto.google.com/ChecksumTransitionDetail !

R=borenet@google.com

Review URL: https://codereview.chromium.org/19112002

git-svn-id: http://skia.googlecode.com/svn/trunk@10113 2bbb7eff-a529-9590-31e7-b0007b416f81
diff --git a/gm/gm_json.py b/gm/gm_json.py
index 35b954e..44ec5ea 100644
--- a/gm/gm_json.py
+++ b/gm/gm_json.py
@@ -49,6 +49,30 @@
 # Allowed hash types for test expectations.
 JSONKEY_HASHTYPE_BITMAP_64BITMD5 = 'bitmap-64bitMD5'
 
+# Root directory where the buildbots store their actually-generated images...
+#  as a publicly readable HTTP URL:
+GM_ACTUALS_ROOT_HTTP_URL = (
+    'http://chromium-skia-gm.commondatastorage.googleapis.com/gm')
+#  as a GS URL that allows credential-protected write access:
+GM_ACTUALS_ROOT_GS_URL = 'gs://chromium-skia-gm/gm'
+
+# Pattern used to assemble each image's filename
+IMAGE_FILENAME_PATTERN = '(\S+)_(\S+).png'  # matches (testname, config)
+
+def CreateGmActualUrl(test_name, hash_type, hash_digest,
+                      gm_actuals_root_url=GM_ACTUALS_ROOT_HTTP_URL):
+  """Return the URL we can use to download a particular version of
+  the actually-generated image for this particular GM test.
+
+  test_name: name of the test, e.g. 'perlinnoise'
+  hash_type: string indicating the hash type used to generate hash_digest,
+             e.g. JSONKEY_HASHTYPE_BITMAP_64BITMD5
+  hash_digest: the hash digest of the image to retrieve
+  gm_actuals_root_url: root url where actual images are stored
+  """
+  return '%s/%s/%s/%s.png' % (gm_actuals_root_url, hash_type, test_name,
+                              hash_digest)
+
 def LoadFromString(file_contents):
   """Loads the JSON summary written out by the GM tool.
      Returns a dictionary keyed by the values listed as JSONKEY_ constants
diff --git a/tools/jsondiff.py b/tools/jsondiff.py
index a1ca257..dd89c6d 100755
--- a/tools/jsondiff.py
+++ b/tools/jsondiff.py
@@ -11,6 +11,8 @@
 Gathers diffs between 2 JSON expectations files, or between actual and
 expected results within a single JSON actual-results file,
 and generates an old-vs-new diff dictionary.
+
+TODO(epoger): Fix indentation in this file (2-space indents, not 4-space).
 '''
 
 # System-level imports
@@ -158,18 +160,24 @@
         return self._DictionaryDiff(old_results, new_results)
 
 
-# main...
-parser = argparse.ArgumentParser()
-parser.add_argument('old',
-                    help='Path to JSON file whose expectations to display on ' +
-                    'the "old" side of the diff. This can be a filepath on ' +
-                    'local storage, or a URL.')
-parser.add_argument('new', nargs='?',
-                    help='Path to JSON file whose expectations to display on ' +
-                    'the "new" side of the diff; if not specified, uses the ' +
-                    'ACTUAL results from the "old" JSON file. This can be a ' +
-                    'filepath on local storage, or a URL.')
-args = parser.parse_args()
-differ = GMDiffer()
-diffs = differ.GenerateDiffDict(oldfile=args.old, newfile=args.new)
-json.dump(diffs, sys.stdout, sort_keys=True, indent=2)
+def _Main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        'old',
+        help='Path to JSON file whose expectations to display on ' +
+        'the "old" side of the diff. This can be a filepath on ' +
+        'local storage, or a URL.')
+    parser.add_argument(
+        'new', nargs='?',
+        help='Path to JSON file whose expectations to display on ' +
+        'the "new" side of the diff; if not specified, uses the ' +
+        'ACTUAL results from the "old" JSON file. This can be a ' +
+        'filepath on local storage, or a URL.')
+    args = parser.parse_args()
+    differ = GMDiffer()
+    diffs = differ.GenerateDiffDict(oldfile=args.old, newfile=args.new)
+    json.dump(diffs, sys.stdout, sort_keys=True, indent=2)
+
+
+if __name__ == '__main__':
+    _Main()
diff --git a/tools/rebaseline.py b/tools/rebaseline.py
index ec5c1c9..65f157b 100755
--- a/tools/rebaseline.py
+++ b/tools/rebaseline.py
@@ -11,6 +11,8 @@
 Rebaselines the given GM tests, on all bots and all configurations.
 Must be run from the gm-expected directory.  If run from a git or SVN
 checkout, the files will be added to the staging area for commit.
+
+TODO(epoger): Fix indentation in this file (2-space indents, not 4-space).
 '''
 
 # System-level imports
@@ -146,7 +148,7 @@
         self._actuals_filename = actuals_filename
         self._exception_handler = exception_handler
         self._add_new = add_new
-        self._testname_pattern = re.compile('(\S+)_(\S+).png')
+        self._image_filename_re = re.compile(gm_json.IMAGE_FILENAME_PATTERN)
 
     # Returns the full contents of filepath, as a single string.
     # If filepath looks like a URL, try to read it that way instead of as
@@ -230,7 +232,8 @@
         skipped_images = []
         if results_to_update:
             for (image_name, image_results) in results_to_update.iteritems():
-                (test, config) = self._testname_pattern.match(image_name).groups()
+                (test, config) = \
+                    self._image_filename_re.match(image_name).groups()
                 if self._tests:
                     if test not in self._tests:
                         skipped_images.append(image_name)
@@ -239,10 +242,11 @@
                     if config not in self._configs:
                         skipped_images.append(image_name)
                         continue
-                expectations_dict[gm_json.JSONKEY_EXPECTEDRESULTS] \
-                                 [image_name] \
-                                 [gm_json.JSONKEY_EXPECTEDRESULTS_ALLOWEDDIGESTS] = \
-                                     [image_results]
+                expectations_dict \
+                    [gm_json.JSONKEY_EXPECTEDRESULTS] \
+                    [image_name] \
+                    [gm_json.JSONKEY_EXPECTEDRESULTS_ALLOWEDDIGESTS] = \
+                        [image_results]
 
         # Write out updated expectations.
         gm_json.WriteToFile(expectations_dict, expectations_json_filepath)
diff --git a/tools/rebaseline_imagefiles.py b/tools/rebaseline_imagefiles.py
index 38594ba..4662407 100755
--- a/tools/rebaseline_imagefiles.py
+++ b/tools/rebaseline_imagefiles.py
@@ -16,6 +16,8 @@
 
 There is a lot of code duplicated between here and rebaseline.py, but
 that's fine because we will delete this file soon.
+
+TODO(epoger): Fix indentation in this file (2-space indents, not 4-space).
 '''
 
 # System-level imports
@@ -77,9 +79,7 @@
         self._dry_run = dry_run
         self._add_new = add_new
         self._missing_json_is_fatal = missing_json_is_fatal
-        self._googlestorage_gm_actuals_root = (
-            'http://chromium-skia-gm.commondatastorage.googleapis.com/gm')
-        self._testname_pattern = re.compile('(\S+)_(\S+).png')
+        self._image_filename_re = re.compile(gm_json.IMAGE_FILENAME_PATTERN)
         self._is_svn_checkout = (
             os.path.exists(os.path.join(expectations_root, '.svn')) or
             os.path.exists(os.path.join(expectations_root, os.pardir, '.svn')))
@@ -101,7 +101,7 @@
     # Download a single actual result from GoogleStorage.
     # Raises an exception if it fails.
     def _DownloadFromGoogleStorage(self, infilename, outfilename, all_results):
-        test_name = self._testname_pattern.match(infilename).group(1)
+        test_name = self._image_filename_re.match(infilename).group(1)
         if not test_name:
             raise Exception('unable to find test_name for infilename %s' %
                             infilename)
@@ -114,8 +114,8 @@
             raise Exception(
                 'ValueError reading filename %s from all_results dict: %s' % (
                     infilename, e))
-        url = '%s/%s/%s/%s.png' % (self._googlestorage_gm_actuals_root,
-                                   hash_type, test_name, hash_value)
+        url = gm_json.CreateGmActualUrl(
+            test_name=test_name, hash_type=hash_type, hash_digest=hash_value)
         try:
             self._DownloadFile(source_url=url, dest_filename=outfilename)
         except CommandFailedException:
@@ -270,7 +270,7 @@
                                                add_new=self._add_new)
         skipped_files = []
         for filename in filenames:
-            (test, config) = self._testname_pattern.match(filename).groups()
+            (test, config) = self._image_filename_re.match(filename).groups()
             if self._tests:
                 if test not in self._tests:
                     skipped_files.append(filename)
diff --git a/tools/svndiff.py b/tools/svndiff.py
index 4759bed..626a1b9 100755
--- a/tools/svndiff.py
+++ b/tools/svndiff.py
@@ -1,24 +1,45 @@
 #!/usr/bin/python
 '''
-Generates a visual diff of all pending changes in the local SVN checkout.
-
-Launch with --help to see more information.
-
-
 Copyright 2012 Google Inc.
 
 Use of this source code is governed by a BSD-style license that can be
 found in the LICENSE file.
 '''
 
+'''
+Generates a visual diff of all pending changes in the local SVN checkout.
+
+Launch with --help to see more information.
+
+TODO(epoger): Fix indentation in this file (2-space indents, not 4-space).
+'''
+
 # common Python modules
 import optparse
 import os
 import re
 import shutil
+import sys
 import tempfile
+import urllib2
 
-# modules declared within this same directory
+# Imports from within Skia
+#
+# We need to add the 'gm' directory, so that we can import gm_json.py within
+# that directory.  That script allows us to parse the actual-results.json file
+# written out by the GM tool.
+# Make sure that the 'gm' dir is in the PYTHONPATH, but add it at the *end*
+# so any dirs that are already in the PYTHONPATH will be preferred.
+#
+# This assumes that the 'gm' directory has been checked out as a sibling of
+# the 'tools' directory containing this script, which will be the case if
+# 'trunk' was checked out as a single unit.
+GM_DIRECTORY = os.path.realpath(
+    os.path.join(os.path.dirname(os.path.dirname(__file__)), 'gm'))
+if GM_DIRECTORY not in sys.path:
+    sys.path.append(GM_DIRECTORY)
+import gm_json
+import jsondiff
 import svn
 
 USAGE_STRING = 'Usage: %s [options]'
@@ -32,12 +53,13 @@
 
 '''
 
+IMAGE_FILENAME_RE = re.compile(gm_json.IMAGE_FILENAME_PATTERN)
+
 TRUNK_PATH = os.path.join(os.path.dirname(__file__), os.pardir)
 
 OPTION_DEST_DIR = '--dest-dir'
-# default DEST_DIR is determined at runtime
 OPTION_PATH_TO_SKDIFF = '--path-to-skdiff'
-# default PATH_TO_SKDIFF is determined at runtime
+OPTION_SOURCE_DIR = '--source-dir'
 
 def RunCommand(command):
     """Run a command, raising an exception if it fails.
@@ -71,16 +93,77 @@
                     'specify the %s option or build skdiff?' % (
                         possible_paths, OPTION_PATH_TO_SKDIFF))
 
-def SvnDiff(path_to_skdiff, dest_dir):
-    """Generates a visual diff of all pending changes in the local SVN checkout.
+def _DownloadUrlToFile(source_url, dest_path):
+    """Download source_url, and save its contents to dest_path.
+    Raises an exception if there were any problems."""
+    reader = urllib2.urlopen(source_url)
+    writer = open(dest_path, 'wb')
+    writer.write(reader.read())
+    writer.close()
+
+def _CreateGSUrl(imagename, hash_type, hash_digest):
+    """Return the HTTP URL we can use to download this particular version of
+    the actually-generated GM image with this imagename.
+
+    imagename: name of the test image, e.g. 'perlinnoise_msaa4.png'
+    hash_type: string indicating the hash type used to generate hash_digest,
+               e.g. gm_json.JSONKEY_HASHTYPE_BITMAP_64BITMD5
+    hash_digest: the hash digest of the image to retrieve
+    """
+    return gm_json.CreateGmActualUrl(
+        test_name=IMAGE_FILENAME_RE.match(imagename).group(1),
+        hash_type=hash_type,
+        hash_digest=hash_digest)
+
+def _CallJsonDiff(old_json_path, new_json_path,
+                  old_flattened_dir, new_flattened_dir,
+                  filename_prefix):
+    """Using jsondiff.py, write the images that differ between two GM
+    expectations summary files (old and new) into old_flattened_dir and
+    new_flattened_dir.
+
+    filename_prefix: prefix to prepend to filenames of all images we write
+        into the flattened directories
+    """
+    json_differ = jsondiff.GMDiffer()
+    diff_dict = json_differ.GenerateDiffDict(oldfile=old_json_path,
+                                             newfile=new_json_path)
+    for (imagename, results) in diff_dict.iteritems():
+        old_checksum = results['old']
+        new_checksum = results['new']
+        # TODO(epoger): Currently, this assumes that all images have been
+        # checksummed using gm_json.JSONKEY_HASHTYPE_BITMAP_64BITMD5
+        old_image_url = _CreateGSUrl(
+            imagename=imagename,
+            hash_type=gm_json.JSONKEY_HASHTYPE_BITMAP_64BITMD5,
+            hash_digest=old_checksum)
+        new_image_url = _CreateGSUrl(
+            imagename=imagename,
+            hash_type=gm_json.JSONKEY_HASHTYPE_BITMAP_64BITMD5,
+            hash_digest=new_checksum)
+        _DownloadUrlToFile(
+            source_url=old_image_url,
+            dest_path=os.path.join(old_flattened_dir,
+                                   filename_prefix + imagename))
+        _DownloadUrlToFile(
+            source_url=new_image_url,
+            dest_path=os.path.join(new_flattened_dir,
+                                   filename_prefix + imagename))
+
+def SvnDiff(path_to_skdiff, dest_dir, source_dir):
+    """Generates a visual diff of all pending changes in source_dir.
 
     @param path_to_skdiff
     @param dest_dir existing directory within which to write results
+    @param source_dir
     """
     # Validate parameters, filling in default values if necessary and possible.
-    path_to_skdiff = FindPathToSkDiff(path_to_skdiff)
+    path_to_skdiff = os.path.abspath(FindPathToSkDiff(path_to_skdiff))
     if not dest_dir:
         dest_dir = tempfile.mkdtemp()
+    dest_dir = os.path.abspath(dest_dir)
+
+    os.chdir(source_dir)
 
     # Prepare temporary directories.
     modified_flattened_dir = os.path.join(dest_dir, 'modified_flattened')
@@ -100,14 +183,29 @@
     # 1. copy its current contents into modified_flattened_dir
     # 2. copy its original contents into original_flattened_dir
     for modified_file_path in modified_file_paths:
-        dest_filename = re.sub(os.sep, '__', modified_file_path)
-        # If the file had STATUS_DELETED, it won't exist anymore...
-        if os.path.isfile(modified_file_path):
-            shutil.copyfile(modified_file_path,
-                            os.path.join(modified_flattened_dir, dest_filename))
-        svn_repo.ExportBaseVersionOfFile(
-            modified_file_path,
-            os.path.join(original_flattened_dir, dest_filename))
+        if modified_file_path.endswith('.json'):
+            # Special handling for JSON files, in the hopes that they
+            # contain GM result summaries.
+            (_unused, original_file_path) = tempfile.mkstemp()
+            svn_repo.ExportBaseVersionOfFile(modified_file_path,
+                                             original_file_path)
+            platform_prefix = re.sub(os.sep, '__',
+                                     os.path.dirname(modified_file_path)) + '__'
+            _CallJsonDiff(old_json_path=original_file_path,
+                          new_json_path=modified_file_path,
+                          old_flattened_dir=original_flattened_dir,
+                          new_flattened_dir=modified_flattened_dir,
+                          filename_prefix=platform_prefix)
+            os.remove(original_file_path)
+        else:
+            dest_filename = re.sub(os.sep, '__', modified_file_path)
+            # If the file had STATUS_DELETED, it won't exist anymore...
+            if os.path.isfile(modified_file_path):
+                shutil.copyfile(modified_file_path,
+                                os.path.join(modified_flattened_dir, dest_filename))
+            svn_repo.ExportBaseVersionOfFile(
+                modified_file_path,
+                os.path.join(original_flattened_dir, dest_filename))
 
     # Run skdiff: compare original_flattened_dir against modified_flattened_dir
     RunCommand('%s %s %s %s' % (path_to_skdiff, original_flattened_dir,
@@ -124,7 +222,8 @@
     num_args = len(args)
     if num_args != 0:
         RaiseUsageException()
-    SvnDiff(path_to_skdiff=options.path_to_skdiff, dest_dir=options.dest_dir)
+    SvnDiff(path_to_skdiff=options.path_to_skdiff, dest_dir=options.dest_dir,
+            source_dir=options.source_dir)
 
 if __name__ == '__main__':
     parser = optparse.OptionParser(USAGE_STRING % '%prog' + HELP_STRING)
@@ -138,5 +237,9 @@
                       help='path to already-built skdiff tool; if not set, '
                       'will search for it in typical directories near this '
                       'script')
+    parser.add_option(OPTION_SOURCE_DIR,
+                      action='store', type='string', default='.',
+                      help='root directory within which to compare all ' +
+                      'files; defaults to "%default"')
     (options, args) = parser.parse_args()
     Main(options, args)
diff --git a/tools/verify_images_for_gm_results.py b/tools/verify_images_for_gm_results.py
index 0eb87d2..28beb59 100644
--- a/tools/verify_images_for_gm_results.py
+++ b/tools/verify_images_for_gm_results.py
@@ -16,6 +16,7 @@
 import sys
 
 
+# TODO(borenet): Replace some/all of these with constants from gm/gm_json.py
 AUTOGEN_URL = 'http://skia-autogen.googlecode.com/svn/gm-actual'
 GS_URL = 'gs://chromium-skia-gm/gm'
 TEST_NAME_PATTERN = re.compile('(\S+)_(\S+).png')