rebaseline_server: add ImagePair class, a step towards new intermediate JSON schema

See https://goto.google.com/ChangingRbsJson and bug 1919 for additional context

BUG=skia:1919
NOTRY=True
R=rmistry@google.com

Author: epoger@google.com

Review URL: https://codereview.chromium.org/157593006

git-svn-id: http://skia.googlecode.com/svn/trunk@13385 2bbb7eff-a529-9590-31e7-b0007b416f81
diff --git a/gm/rebaseline_server/imagediffdb.py b/gm/rebaseline_server/imagediffdb.py
index 3a2ce63..936301e 100644
--- a/gm/rebaseline_server/imagediffdb.py
+++ b/gm/rebaseline_server/imagediffdb.py
@@ -12,6 +12,7 @@
 import contextlib
 import logging
 import os
+import re
 import shutil
 import urllib
 try:
@@ -23,6 +24,8 @@
 DEFAULT_IMAGE_SUFFIX = '.png'
 DEFAULT_IMAGES_SUBDIR = 'images'
 
+DISALLOWED_FILEPATH_CHAR_REGEX = re.compile('[^\w\-]')
+
 DIFFS_SUBDIR = 'diffs'
 WHITEDIFFS_SUBDIR = 'whitediffs'
 
@@ -61,6 +64,9 @@
       actual_images_subdir: the subdirectory actual images are stored in.
       image_suffix: the suffix of images.
     """
+    expected_image_locator = _sanitize_locator(expected_image_locator)
+    actual_image_locator = _sanitize_locator(actual_image_locator)
+
     # Download the expected/actual images, if we don't have them already.
     # TODO(rmistry): Add a parameter that makes _download_and_open_image raise
     # an exception if images are not found locally (instead of trying to
@@ -132,6 +138,16 @@
     for each R/G/B channel, as a list."""
     return self._max_diff_per_channel
 
+  def as_dict(self):
+    """Returns a dictionary representation of this DiffRecord, as needed when
+    constructing the JSON representation."""
+    return {
+        'numDifferingPixels': self._num_pixels_differing,
+        'percentDifferingPixels': self.get_percent_pixels_differing(),
+        'weightedDiffMeasure': self.get_weighted_diff_measure(),
+        'maxDiffPerChannel': self._max_diff_per_channel,
+    }
+
 
 class ImageDiffDB(object):
   """ Calculates differences between image pairs, maintaining a database of
@@ -174,6 +190,8 @@
           actual image within storage_root (probably including a checksum to
           guarantee uniqueness)
     """
+    expected_image_locator = _sanitize_locator(expected_image_locator)
+    actual_image_locator = _sanitize_locator(actual_image_locator)
     key = (expected_image_locator, actual_image_locator)
     if not key in self._diff_dict:
       try:
@@ -193,7 +211,8 @@
 
     Raises a KeyError if we don't have a DiffRecord for this image pair.
     """
-    key = (expected_image_locator, actual_image_locator)
+    key = (_sanitize_locator(expected_image_locator),
+           _sanitize_locator(actual_image_locator))
     return self._diff_dict[key]
 
 
@@ -322,6 +341,15 @@
   if not os.path.isdir(path):
     os.makedirs(path)
 
+def _sanitize_locator(locator):
+  """Returns a sanitized version of a locator (one in which we know none of the
+  characters will have special meaning in filenames).
+
+  Args:
+    locator: string, or something that can be represented as a string
+  """
+  return DISALLOWED_FILEPATH_CHAR_REGEX.sub('_', str(locator))
+
 def _get_difference_locator(expected_image_locator, actual_image_locator):
   """Returns the locator string used to look up the diffs between expected_image
   and actual_image.
@@ -330,7 +358,8 @@
     expected_image_locator: locator string pointing at expected image
     actual_image_locator: locator string pointing at actual image
 
-  Returns: locator where the diffs between expected and actual images can be
-      found
+  Returns: already-sanitized locator where the diffs between expected and
+      actual images can be found
   """
-  return "%s-vs-%s" % (expected_image_locator, actual_image_locator)
+  return "%s-vs-%s" % (_sanitize_locator(expected_image_locator),
+                       _sanitize_locator(actual_image_locator))
diff --git a/gm/rebaseline_server/imagediffdb_test.py b/gm/rebaseline_server/imagediffdb_test.py
index b76a2c0..558a816 100755
--- a/gm/rebaseline_server/imagediffdb_test.py
+++ b/gm/rebaseline_server/imagediffdb_test.py
@@ -26,6 +26,7 @@
 
   def setUp(self):
     self._temp_dir = tempfile.mkdtemp()
+    self.maxDiff = None
 
   def tearDown(self):
     shutil.rmtree(self._temp_dir)
@@ -34,7 +35,20 @@
     """Tell unittest framework to not print docstrings for test cases."""
     return None
 
+  def test_sanitize_locator(self):
+    """Test _sanitize_locator()."""
+    self.assertEqual(imagediffdb._sanitize_locator('simple'), 'simple')
+    self.assertEqual(imagediffdb._sanitize_locator(1234), '1234')
+    self.assertEqual(imagediffdb._sanitize_locator('one/two'),  'one_two')
+    self.assertEqual(imagediffdb._sanitize_locator('one\\two'), 'one_two')
+    self.assertEqual(imagediffdb._sanitize_locator('one_two'),  'one_two')
+
   def test_simple(self):
+    """Test ImageDiffDB, downloading real known images from Google Storage.
+
+    TODO(epoger): Instead of hitting Google Storage, we should read image
+    files from local disk using a file:// IMG_URL_BASE.
+    """
     # params for each self-test:
     # 0. expected image locator
     # 1. expected image URL
@@ -45,16 +59,16 @@
     # 6. expected max_diff_per_channel
     selftests = [
         [
-            '16206093933823793653',
+            'arcofzorro/16206093933823793653',
             IMG_URL_BASE + 'arcofzorro/16206093933823793653.png',
-            '13786535001616823825',
+            'arcofzorro/13786535001616823825',
             IMG_URL_BASE + 'arcofzorro/13786535001616823825.png',
             '0.0662', '0.0113', [255, 255, 247],
         ],
         [
-            '10552995703607727960',
+            'gradients_degenerate_2pt/10552995703607727960',
             IMG_URL_BASE + 'gradients_degenerate_2pt/10552995703607727960.png',
-            '11198253335583713230',
+            'gradients_degenerate_2pt/11198253335583713230',
             IMG_URL_BASE + 'gradients_degenerate_2pt/11198253335583713230.png',
             '100.0000', '66.6667', [255, 0, 255],
         ],
diff --git a/gm/rebaseline_server/imagepair.py b/gm/rebaseline_server/imagepair.py
new file mode 100644
index 0000000..1c71bd9
--- /dev/null
+++ b/gm/rebaseline_server/imagepair.py
@@ -0,0 +1,83 @@
+#!/usr/bin/python
+
+"""
+Copyright 2014 Google Inc.
+
+Use of this source code is governed by a BSD-style license that can be
+found in the LICENSE file.
+
+ImagePair class (see class docstring for details)
+"""
+
+import posixpath
+
+# Keys used within ImagePair dictionary representations.
+KEY_DIFFERENCE_DATA = 'differenceData'
+KEY_EXPECTATIONS_DATA = 'expectationsData'
+KEY_EXTRA_COLUMN_VALUES = 'extraColumnValues'
+KEY_IMAGE_A_URL = 'imageAUrl'
+KEY_IMAGE_B_URL = 'imageBUrl'
+KEY_IS_DIFFERENT = 'isDifferent'
+
+
+class ImagePair(object):
+  """
+  Describes a pair of images, along with optional metadata (pixel difference
+  metrics, whether to ignore mismatches, etc.)
+  """
+
+  def __init__(self, image_diff_db,
+               base_url, imageA_relative_url, imageB_relative_url,
+               expectations=None, extra_columns=None):
+    """
+    Args:
+      image_diff_db: ImageDiffDB instance we use to generate/store image diffs
+      base_url: base of all image URLs
+      imageA_relative_url: URL pointing at an image, relative to base_url
+      imageB_relative_url: URL pointing at an image, relative to base_url
+      expectations: optional dictionary containing expectations-specific
+          metadata (ignore-failure, bug numbers, etc.)
+      extra_columns: optional dictionary containing more metadata (test name,
+          builder name, etc.)
+    """
+    self.base_url = base_url
+    self.imageA_relative_url = imageA_relative_url
+    self.imageB_relative_url = imageB_relative_url
+    self.expectations_dict = expectations
+    self.extra_columns_dict = extra_columns
+    if imageA_relative_url == imageB_relative_url:
+      self.diff_record = None
+    else:
+      # TODO(epoger): Rather than blocking until image_diff_db can read in
+      # the image pair and generate diffs, it would be better to do it
+      # asynchronously: tell image_diff_db to download a bunch of file pairs,
+      # and only block later if we're still waiting for diff_records to come
+      # back.
+      image_diff_db.add_image_pair(
+          expected_image_locator=imageA_relative_url,
+          expected_image_url=posixpath.join(base_url, imageA_relative_url),
+          actual_image_locator=imageB_relative_url,
+          actual_image_url=posixpath.join(base_url, imageB_relative_url))
+      self.diff_record = image_diff_db.get_diff_record(
+          expected_image_locator=imageA_relative_url,
+          actual_image_locator=imageB_relative_url)
+
+  def as_dict(self):
+    """
+    Return a dictionary describing this ImagePair, as needed when constructing
+    the JSON representation.  Uses the KEY_* constants as keys.
+    """
+    asdict = {
+        KEY_IMAGE_A_URL: self.imageA_relative_url,
+        KEY_IMAGE_B_URL: self.imageB_relative_url,
+    }
+    if self.expectations_dict:
+      asdict[KEY_EXPECTATIONS_DATA] = self.expectations_dict
+    if self.extra_columns_dict:
+      asdict[KEY_EXTRA_COLUMN_VALUES] = self.extra_columns_dict
+    if self.diff_record and (self.diff_record.get_num_pixels_differing() > 0):
+      asdict[KEY_IS_DIFFERENT] = True
+      asdict[KEY_DIFFERENCE_DATA] = self.diff_record.as_dict()
+    else:
+      asdict[KEY_IS_DIFFERENT] = False
+    return asdict
diff --git a/gm/rebaseline_server/imagepair_test.py b/gm/rebaseline_server/imagepair_test.py
new file mode 100755
index 0000000..fc1f275
--- /dev/null
+++ b/gm/rebaseline_server/imagepair_test.py
@@ -0,0 +1,153 @@
+#!/usr/bin/python
+
+"""
+Copyright 2014 Google Inc.
+
+Use of this source code is governed by a BSD-style license that can be
+found in the LICENSE file.
+
+Test imagepair.py
+"""
+
+# System-level imports
+import shutil
+import tempfile
+import unittest
+
+# Local imports
+import imagediffdb
+import imagepair
+
+
+IMG_URL_BASE = 'http://chromium-skia-gm.commondatastorage.googleapis.com/gm/bitmap-64bitMD5/'
+
+
+class ImagePairTest(unittest.TestCase):
+
+  def setUp(self):
+    self._temp_dir = tempfile.mkdtemp()
+    self.maxDiff = None
+
+  def tearDown(self):
+    shutil.rmtree(self._temp_dir)
+
+  def shortDescription(self):
+    """Tell unittest framework to not print docstrings for test cases."""
+    return None
+
+  def test_endToEnd(self):
+    """Test ImagePair, using a real ImageDiffDB to download real images.
+
+    TODO(epoger): Either in addition to or instead of this end-to-end test,
+    we should perform some tests using either:
+    1. a mock ImageDiffDB, or
+    2. a real ImageDiffDB that doesn't hit Google Storage looking for input
+       image files (maybe a file:// IMG_URL_BASE)
+    """
+    # params for each self-test:
+    #
+    # inputs:
+    #  0. imageA_relative_URL
+    #  1. imageB_relative_URL
+    #  2. expectations dict
+    #  3. extra_columns dict
+    # expected output:
+    #  4. expected result of ImagePair.as_dict()
+    selftests = [
+        [
+            # inputs:
+            'arcofzorro/16206093933823793653.png',
+            'arcofzorro/16206093933823793653.png',
+            None,
+            {
+                'builder': 'MyBuilder',
+                'test': 'MyTest',
+            },
+            # expected output:
+            {
+                'extraColumnValues': {
+                    'builder': 'MyBuilder',
+                    'test': 'MyTest',
+                },
+                'imageAUrl': 'arcofzorro/16206093933823793653.png',
+                'imageBUrl': 'arcofzorro/16206093933823793653.png',
+                'isDifferent': False,
+            },
+        ],
+
+        [
+            # inputs:
+            'arcofzorro/16206093933823793653.png',
+            'arcofzorro/13786535001616823825.png',
+            None,
+            None,
+            # expected output:
+            {
+                'differenceData': {
+                    'maxDiffPerChannel': [255, 255, 247],
+                    'numDifferingPixels': 662,
+                    'percentDifferingPixels': 0.0662,
+                    'weightedDiffMeasure': 0.01127756555171088,
+                },
+                'imageAUrl': 'arcofzorro/16206093933823793653.png',
+                'imageBUrl': 'arcofzorro/13786535001616823825.png',
+                'isDifferent': True,
+            },
+        ],
+
+        [
+            # inputs:
+            'gradients_degenerate_2pt/10552995703607727960.png',
+            'gradients_degenerate_2pt/11198253335583713230.png',
+            {
+                'ignoreFailure': True,
+                'bugs': [1001, 1002],
+            },
+            {
+                'builder': 'MyBuilder',
+                'test': 'MyTest',
+            },
+            # expected output:
+            {
+                'differenceData': {
+                    'maxDiffPerChannel': [255, 0, 255],
+                    'numDifferingPixels': 102400,
+                    'percentDifferingPixels': 100.00,
+                    'weightedDiffMeasure': 66.66666666666667,
+                },
+                'expectationsData': {
+                    'bugs': [1001, 1002],
+                    'ignoreFailure': True,
+                },
+                'extraColumnValues': {
+                    'builder': 'MyBuilder',
+                    'test': 'MyTest',
+                },
+                'imageAUrl':
+                    'gradients_degenerate_2pt/10552995703607727960.png',
+                'imageBUrl':
+                    'gradients_degenerate_2pt/11198253335583713230.png',
+                'isDifferent': True,
+            },
+        ],
+    ]
+
+    db = imagediffdb.ImageDiffDB(self._temp_dir)
+    for selftest in selftests:
+      image_pair = imagepair.ImagePair(
+          image_diff_db=db,
+          base_url=IMG_URL_BASE,
+          imageA_relative_url=selftest[0],
+          imageB_relative_url=selftest[1],
+          expectations=selftest[2],
+          extra_columns=selftest[3])
+      self.assertEqual(image_pair.as_dict(), selftest[4])
+
+
+def main():
+  suite = unittest.TestLoader().loadTestsFromTestCase(ImagePairTest)
+  unittest.TextTestRunner(verbosity=2).run(suite)
+
+
+if __name__ == '__main__':
+  main()