rebaseline_server: add ImagePair class, a step towards new intermediate JSON schema

See https://goto.google.com/ChangingRbsJson and bug 1919 for additional context

BUG=skia:1919
NOTRY=True
R=rmistry@google.com

Author: epoger@google.com

Review URL: https://codereview.chromium.org/157593006

git-svn-id: http://skia.googlecode.com/svn/trunk@13385 2bbb7eff-a529-9590-31e7-b0007b416f81
diff --git a/gm/rebaseline_server/imagediffdb.py b/gm/rebaseline_server/imagediffdb.py
index 3a2ce63..936301e 100644
--- a/gm/rebaseline_server/imagediffdb.py
+++ b/gm/rebaseline_server/imagediffdb.py
@@ -12,6 +12,7 @@
 import contextlib
 import logging
 import os
+import re
 import shutil
 import urllib
 try:
@@ -23,6 +24,8 @@
 DEFAULT_IMAGE_SUFFIX = '.png'
 DEFAULT_IMAGES_SUBDIR = 'images'
 
+DISALLOWED_FILEPATH_CHAR_REGEX = re.compile('[^\w\-]')
+
 DIFFS_SUBDIR = 'diffs'
 WHITEDIFFS_SUBDIR = 'whitediffs'
 
@@ -61,6 +64,9 @@
       actual_images_subdir: the subdirectory actual images are stored in.
       image_suffix: the suffix of images.
     """
+    expected_image_locator = _sanitize_locator(expected_image_locator)
+    actual_image_locator = _sanitize_locator(actual_image_locator)
+
     # Download the expected/actual images, if we don't have them already.
     # TODO(rmistry): Add a parameter that makes _download_and_open_image raise
     # an exception if images are not found locally (instead of trying to
@@ -132,6 +138,16 @@
     for each R/G/B channel, as a list."""
     return self._max_diff_per_channel
 
+  def as_dict(self):
+    """Returns a dictionary representation of this DiffRecord, as needed when
+    constructing the JSON representation."""
+    return {
+        'numDifferingPixels': self._num_pixels_differing,
+        'percentDifferingPixels': self.get_percent_pixels_differing(),
+        'weightedDiffMeasure': self.get_weighted_diff_measure(),
+        'maxDiffPerChannel': self._max_diff_per_channel,
+    }
+
 
 class ImageDiffDB(object):
   """ Calculates differences between image pairs, maintaining a database of
@@ -174,6 +190,8 @@
           actual image within storage_root (probably including a checksum to
           guarantee uniqueness)
     """
+    expected_image_locator = _sanitize_locator(expected_image_locator)
+    actual_image_locator = _sanitize_locator(actual_image_locator)
     key = (expected_image_locator, actual_image_locator)
     if not key in self._diff_dict:
       try:
@@ -193,7 +211,8 @@
 
     Raises a KeyError if we don't have a DiffRecord for this image pair.
     """
-    key = (expected_image_locator, actual_image_locator)
+    key = (_sanitize_locator(expected_image_locator),
+           _sanitize_locator(actual_image_locator))
     return self._diff_dict[key]
 
 
@@ -322,6 +341,15 @@
   if not os.path.isdir(path):
     os.makedirs(path)
 
+def _sanitize_locator(locator):
+  """Returns a sanitized version of a locator (one in which we know none of the
+  characters will have special meaning in filenames).
+
+  Args:
+    locator: string, or something that can be represented as a string
+  """
+  return DISALLOWED_FILEPATH_CHAR_REGEX.sub('_', str(locator))
+
 def _get_difference_locator(expected_image_locator, actual_image_locator):
   """Returns the locator string used to look up the diffs between expected_image
   and actual_image.
@@ -330,7 +358,8 @@
     expected_image_locator: locator string pointing at expected image
     actual_image_locator: locator string pointing at actual image
 
-  Returns: locator where the diffs between expected and actual images can be
-      found
+  Returns: already-sanitized locator where the diffs between expected and
+      actual images can be found
   """
-  return "%s-vs-%s" % (expected_image_locator, actual_image_locator)
+  return "%s-vs-%s" % (_sanitize_locator(expected_image_locator),
+                       _sanitize_locator(actual_image_locator))