blob: f6071f9700661abb3d405881e5eba751a2ede7d0 [file] [log] [blame]
epoger@google.com9dddf6f2013-11-08 16:25:25 +00001#!/usr/bin/python
2
3"""
4Copyright 2013 Google Inc.
5
6Use of this source code is governed by a BSD-style license that can be
7found in the LICENSE file.
8
9Calulate differences between image pairs, and store them in a database.
10"""
11
12import contextlib
epoger54f1ad82014-07-02 07:43:04 -070013import json
epoger@google.com9dddf6f2013-11-08 16:25:25 +000014import logging
15import os
commit-bot@chromium.org9985ef52014-02-10 18:19:30 +000016import re
epoger@google.com9dddf6f2013-11-08 16:25:25 +000017import shutil
commit-bot@chromium.org44546f82014-02-11 18:21:26 +000018import sys
19import tempfile
epoger@google.com9dddf6f2013-11-08 16:25:25 +000020import urllib
epoger@google.com9dddf6f2013-11-08 16:25:25 +000021
commit-bot@chromium.org44546f82014-02-11 18:21:26 +000022# Set the PYTHONPATH to include the tools directory.
23sys.path.append(
24 os.path.join(
25 os.path.dirname(os.path.realpath(__file__)), os.pardir, os.pardir,
26 'tools'))
27import find_run_binary
28
commit-bot@chromium.org4d0f0082014-02-18 14:38:22 +000029SKPDIFF_BINARY = find_run_binary.find_path_to_program('skpdiff')
commit-bot@chromium.org44546f82014-02-11 18:21:26 +000030
rmistry@google.com5861e522013-12-21 19:07:40 +000031DEFAULT_IMAGE_SUFFIX = '.png'
32DEFAULT_IMAGES_SUBDIR = 'images'
epoger@google.com9dddf6f2013-11-08 16:25:25 +000033
commit-bot@chromium.org9985ef52014-02-10 18:19:30 +000034DISALLOWED_FILEPATH_CHAR_REGEX = re.compile('[^\w\-]')
35
epoger54f1ad82014-07-02 07:43:04 -070036RGBDIFFS_SUBDIR = 'diffs'
epoger@google.com9dddf6f2013-11-08 16:25:25 +000037WHITEDIFFS_SUBDIR = 'whitediffs'
38
commit-bot@chromium.org16f41802014-02-26 19:05:20 +000039# Keys used within DiffRecord dictionary representations.
40# NOTE: Keep these in sync with static/constants.js
commit-bot@chromium.org68a38152014-05-12 20:40:29 +000041KEY__DIFFERENCES__MAX_DIFF_PER_CHANNEL = 'maxDiffPerChannel'
42KEY__DIFFERENCES__NUM_DIFF_PIXELS = 'numDifferingPixels'
43KEY__DIFFERENCES__PERCENT_DIFF_PIXELS = 'percentDifferingPixels'
44KEY__DIFFERENCES__PERCEPTUAL_DIFF = 'perceptualDifference'
commit-bot@chromium.org16f41802014-02-26 19:05:20 +000045
epoger@google.com9dddf6f2013-11-08 16:25:25 +000046
47class DiffRecord(object):
48 """ Record of differences between two images. """
49
50 def __init__(self, storage_root,
51 expected_image_url, expected_image_locator,
rmistry@google.com5861e522013-12-21 19:07:40 +000052 actual_image_url, actual_image_locator,
53 expected_images_subdir=DEFAULT_IMAGES_SUBDIR,
54 actual_images_subdir=DEFAULT_IMAGES_SUBDIR,
55 image_suffix=DEFAULT_IMAGE_SUFFIX):
epoger@google.com9dddf6f2013-11-08 16:25:25 +000056 """Download this pair of images (unless we already have them on local disk),
57 and prepare a DiffRecord for them.
58
commit-bot@chromium.orgc9b511f2014-04-15 18:50:12 +000059 TODO(epoger): Make this asynchronously download images, rather than blocking
60 until the images have been downloaded and processed.
61
epoger@google.com9dddf6f2013-11-08 16:25:25 +000062 Args:
63 storage_root: root directory on local disk within which we store all
64 images
65 expected_image_url: file or HTTP url from which we will download the
66 expected image
67 expected_image_locator: a unique ID string under which we will store the
68 expected image within storage_root (probably including a checksum to
69 guarantee uniqueness)
70 actual_image_url: file or HTTP url from which we will download the
71 actual image
72 actual_image_locator: a unique ID string under which we will store the
73 actual image within storage_root (probably including a checksum to
74 guarantee uniqueness)
rmistry@google.com5861e522013-12-21 19:07:40 +000075 expected_images_subdir: the subdirectory expected images are stored in.
76 actual_images_subdir: the subdirectory actual images are stored in.
77 image_suffix: the suffix of images.
epoger@google.com9dddf6f2013-11-08 16:25:25 +000078 """
commit-bot@chromium.org9985ef52014-02-10 18:19:30 +000079 expected_image_locator = _sanitize_locator(expected_image_locator)
80 actual_image_locator = _sanitize_locator(actual_image_locator)
81
epoger@google.com9dddf6f2013-11-08 16:25:25 +000082 # Download the expected/actual images, if we don't have them already.
epoger54f1ad82014-07-02 07:43:04 -070083 # TODO(rmistry): Add a parameter that just tries to use already-present
84 # image files rather than downloading them.
commit-bot@chromium.org8cc39a62014-03-04 16:46:22 +000085 expected_image_file = os.path.join(
86 storage_root, expected_images_subdir,
87 str(expected_image_locator) + image_suffix)
88 actual_image_file = os.path.join(
89 storage_root, actual_images_subdir,
90 str(actual_image_locator) + image_suffix)
91 try:
epoger54f1ad82014-07-02 07:43:04 -070092 _download_file(expected_image_file, expected_image_url)
commit-bot@chromium.org8cc39a62014-03-04 16:46:22 +000093 except Exception:
94 logging.exception('unable to download expected_image_url %s to file %s' %
95 (expected_image_url, expected_image_file))
96 raise
97 try:
epoger54f1ad82014-07-02 07:43:04 -070098 _download_file(actual_image_file, actual_image_url)
commit-bot@chromium.org8cc39a62014-03-04 16:46:22 +000099 except Exception:
100 logging.exception('unable to download actual_image_url %s to file %s' %
101 (actual_image_url, actual_image_file))
102 raise
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000103
epoger54f1ad82014-07-02 07:43:04 -0700104 # Get all diff images and values from skpdiff binary.
105 skpdiff_output_dir = tempfile.mkdtemp()
commit-bot@chromium.org44546f82014-02-11 18:21:26 +0000106 try:
epoger54f1ad82014-07-02 07:43:04 -0700107 skpdiff_summary_file = os.path.join(skpdiff_output_dir,
108 'skpdiff-output.json')
109 skpdiff_rgbdiff_dir = os.path.join(skpdiff_output_dir, 'rgbDiff')
110 skpdiff_whitediff_dir = os.path.join(skpdiff_output_dir, 'whiteDiff')
commit-bot@chromium.org44546f82014-02-11 18:21:26 +0000111 expected_img = os.path.join(storage_root, expected_images_subdir,
112 str(expected_image_locator) + image_suffix)
113 actual_img = os.path.join(storage_root, actual_images_subdir,
114 str(actual_image_locator) + image_suffix)
epoger54f1ad82014-07-02 07:43:04 -0700115
116 # TODO: Call skpdiff ONCE for all image pairs, instead of calling it
117 # repeatedly. This will allow us to parallelize a lot more work.
commit-bot@chromium.org44546f82014-02-11 18:21:26 +0000118 find_run_binary.run_command(
commit-bot@chromium.org4d0f0082014-02-18 14:38:22 +0000119 [SKPDIFF_BINARY, '-p', expected_img, actual_img,
epoger54f1ad82014-07-02 07:43:04 -0700120 '--jsonp', 'false',
121 '--output', skpdiff_summary_file,
122 '--differs', 'perceptual', 'different_pixels',
123 '--rgbDiffDir', skpdiff_rgbdiff_dir,
124 '--whiteDiffDir', skpdiff_whitediff_dir,
125 ])
126
127 # Get information out of the skpdiff_summary_file.
128 with contextlib.closing(open(skpdiff_summary_file)) as fp:
129 data = json.load(fp)
130
131 # For now, we can assume there is only one record in the output summary,
132 # since we passed skpdiff only one pair of images.
133 record = data['records'][0]
134 self._width = record['width']
135 self._height = record['height']
136 # TODO: make max_diff_per_channel a tuple instead of a list, because the
137 # structure is meaningful (first element is red, second is green, etc.)
138 # See http://stackoverflow.com/a/626871
139 self._max_diff_per_channel = [
140 record['maxRedDiff'], record['maxGreenDiff'], record['maxBlueDiff']]
141 rgb_diff_path = record['rgbDiffPath']
142 white_diff_path = record['whiteDiffPath']
143 per_differ_stats = record['diffs']
144 for stats in per_differ_stats:
145 differ_name = stats['differName']
146 if differ_name == 'different_pixels':
147 self._num_pixels_differing = stats['pointsOfInterest']
148 elif differ_name == 'perceptual':
149 perceptual_similarity = stats['result']
150
151 # skpdiff returns the perceptual similarity; convert it to get the
152 # perceptual difference percentage.
153 # skpdiff outputs -1 if the images are different sizes. Treat any
154 # output that does not lie in [0, 1] as having 0% perceptual
155 # similarity.
156 if not 0 <= perceptual_similarity <= 1:
157 perceptual_similarity = 0
158 self._perceptual_difference = 100 - (perceptual_similarity * 100)
159
160 # Store the rgbdiff and whitediff images generated above.
161 diff_image_locator = _get_difference_locator(
162 expected_image_locator=expected_image_locator,
163 actual_image_locator=actual_image_locator)
164 basename = str(diff_image_locator) + image_suffix
165 _mkdir_unless_exists(os.path.join(storage_root, RGBDIFFS_SUBDIR))
166 _mkdir_unless_exists(os.path.join(storage_root, WHITEDIFFS_SUBDIR))
167 # TODO: Modify skpdiff's behavior so we can tell it exactly where to
168 # write the image files into, rather than having to move them around
169 # after skpdiff writes them out.
170 shutil.copyfile(rgb_diff_path,
171 os.path.join(storage_root, RGBDIFFS_SUBDIR, basename))
172 shutil.copyfile(white_diff_path,
173 os.path.join(storage_root, WHITEDIFFS_SUBDIR, basename))
174
commit-bot@chromium.org44546f82014-02-11 18:21:26 +0000175 finally:
epoger54f1ad82014-07-02 07:43:04 -0700176 shutil.rmtree(skpdiff_output_dir)
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000177
epoger6132b432014-07-09 07:59:06 -0700178 # TODO(epoger): Use properties instead of getters throughout.
179 # See http://stackoverflow.com/a/6618176
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000180 def get_num_pixels_differing(self):
181 """Returns the absolute number of pixels that differ."""
182 return self._num_pixels_differing
183
184 def get_percent_pixels_differing(self):
185 """Returns the percentage of pixels that differ, as a float between
186 0 and 100 (inclusive)."""
187 return ((float(self._num_pixels_differing) * 100) /
188 (self._width * self._height))
189
commit-bot@chromium.org44546f82014-02-11 18:21:26 +0000190 def get_perceptual_difference(self):
191 """Returns the perceptual difference percentage."""
192 return self._perceptual_difference
193
epoger@google.com214a0242013-11-22 19:26:18 +0000194 def get_max_diff_per_channel(self):
195 """Returns the maximum difference between the expected and actual images
196 for each R/G/B channel, as a list."""
197 return self._max_diff_per_channel
198
commit-bot@chromium.org9985ef52014-02-10 18:19:30 +0000199 def as_dict(self):
200 """Returns a dictionary representation of this DiffRecord, as needed when
201 constructing the JSON representation."""
202 return {
commit-bot@chromium.org68a38152014-05-12 20:40:29 +0000203 KEY__DIFFERENCES__NUM_DIFF_PIXELS: self._num_pixels_differing,
204 KEY__DIFFERENCES__PERCENT_DIFF_PIXELS:
commit-bot@chromium.org16f41802014-02-26 19:05:20 +0000205 self.get_percent_pixels_differing(),
commit-bot@chromium.org68a38152014-05-12 20:40:29 +0000206 KEY__DIFFERENCES__MAX_DIFF_PER_CHANNEL: self._max_diff_per_channel,
207 KEY__DIFFERENCES__PERCEPTUAL_DIFF: self._perceptual_difference,
commit-bot@chromium.org9985ef52014-02-10 18:19:30 +0000208 }
209
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000210
211class ImageDiffDB(object):
212 """ Calculates differences between image pairs, maintaining a database of
213 them for download."""
214
commit-bot@chromium.orgc9b511f2014-04-15 18:50:12 +0000215 def __init__(self, storage_root):
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000216 """
217 Args:
218 storage_root: string; root path within the DB will store all of its stuff
219 """
220 self._storage_root = storage_root
221
222 # Dictionary of DiffRecords, keyed by (expected_image_locator,
223 # actual_image_locator) tuples.
224 self._diff_dict = {}
225
epoger6132b432014-07-09 07:59:06 -0700226 @property
227 def storage_root(self):
228 return self._storage_root
229
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000230 def add_image_pair(self,
231 expected_image_url, expected_image_locator,
232 actual_image_url, actual_image_locator):
233 """Download this pair of images (unless we already have them on local disk),
234 and prepare a DiffRecord for them.
235
commit-bot@chromium.orgc9b511f2014-04-15 18:50:12 +0000236 TODO(epoger): Make this asynchronously download images, rather than blocking
237 until the images have been downloaded and processed.
238 When we do that, we should probably add a new method that will block
239 until all of the images have been downloaded and processed. Otherwise,
240 we won't know when it's safe to start calling get_diff_record().
241 jcgregorio notes: maybe just make ImageDiffDB thread-safe and create a
242 thread-pool/worker queue at a higher level that just uses ImageDiffDB?
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000243
244 Args:
245 expected_image_url: file or HTTP url from which we will download the
246 expected image
247 expected_image_locator: a unique ID string under which we will store the
248 expected image within storage_root (probably including a checksum to
249 guarantee uniqueness)
250 actual_image_url: file or HTTP url from which we will download the
251 actual image
252 actual_image_locator: a unique ID string under which we will store the
253 actual image within storage_root (probably including a checksum to
254 guarantee uniqueness)
255 """
commit-bot@chromium.orgc9b511f2014-04-15 18:50:12 +0000256 expected_image_locator = _sanitize_locator(expected_image_locator)
257 actual_image_locator = _sanitize_locator(actual_image_locator)
258 key = (expected_image_locator, actual_image_locator)
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000259 if not key in self._diff_dict:
260 try:
261 new_diff_record = DiffRecord(
262 self._storage_root,
263 expected_image_url=expected_image_url,
264 expected_image_locator=expected_image_locator,
265 actual_image_url=actual_image_url,
266 actual_image_locator=actual_image_locator)
commit-bot@chromium.orga47e7ac2013-12-19 20:01:34 +0000267 except Exception:
commit-bot@chromium.org68449582014-04-01 22:16:33 +0000268 # If we can't create a real DiffRecord for this (expected, actual) pair,
269 # store None and the UI will show whatever information we DO have.
270 # Fixes http://skbug.com/2368 .
271 logging.exception(
272 'got exception while creating a DiffRecord for '
273 'expected_image_url=%s , actual_image_url=%s; returning None' % (
274 expected_image_url, actual_image_url))
275 new_diff_record = None
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000276 self._diff_dict[key] = new_diff_record
277
278 def get_diff_record(self, expected_image_locator, actual_image_locator):
279 """Returns the DiffRecord for this image pair.
280
commit-bot@chromium.orgc9b511f2014-04-15 18:50:12 +0000281 Raises a KeyError if we don't have a DiffRecord for this image pair.
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000282 """
commit-bot@chromium.orgc9b511f2014-04-15 18:50:12 +0000283 key = (_sanitize_locator(expected_image_locator),
284 _sanitize_locator(actual_image_locator))
285 return self._diff_dict[key]
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000286
287
288# Utility functions
289
epoger54f1ad82014-07-02 07:43:04 -0700290def _download_file(local_filepath, url):
291 """Download a file from url to local_filepath, unless it is already there.
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000292
293 Args:
294 local_filepath: path on local disk where the image should be stored
295 url: URL from which we can download the image if we don't have it yet
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000296 """
297 if not os.path.exists(local_filepath):
298 _mkdir_unless_exists(os.path.dirname(local_filepath))
299 with contextlib.closing(urllib.urlopen(url)) as url_handle:
commit-bot@chromium.orgc9b511f2014-04-15 18:50:12 +0000300 with open(local_filepath, 'wb') as file_handle:
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000301 shutil.copyfileobj(fsrc=url_handle, fdst=file_handle)
epoger@google.com214a0242013-11-22 19:26:18 +0000302
commit-bot@chromium.org16f41802014-02-26 19:05:20 +0000303
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000304def _mkdir_unless_exists(path):
305 """Unless path refers to an already-existing directory, create it.
306
307 Args:
308 path: path on local disk
309 """
commit-bot@chromium.orgc9b511f2014-04-15 18:50:12 +0000310 if not os.path.isdir(path):
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000311 os.makedirs(path)
312
commit-bot@chromium.org16f41802014-02-26 19:05:20 +0000313
commit-bot@chromium.org9985ef52014-02-10 18:19:30 +0000314def _sanitize_locator(locator):
315 """Returns a sanitized version of a locator (one in which we know none of the
316 characters will have special meaning in filenames).
317
318 Args:
319 locator: string, or something that can be represented as a string
320 """
321 return DISALLOWED_FILEPATH_CHAR_REGEX.sub('_', str(locator))
322
commit-bot@chromium.org16f41802014-02-26 19:05:20 +0000323
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000324def _get_difference_locator(expected_image_locator, actual_image_locator):
325 """Returns the locator string used to look up the diffs between expected_image
326 and actual_image.
327
commit-bot@chromium.org16f41802014-02-26 19:05:20 +0000328 We must keep this function in sync with getImageDiffRelativeUrl() in
329 static/loader.js
330
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000331 Args:
332 expected_image_locator: locator string pointing at expected image
333 actual_image_locator: locator string pointing at actual image
334
commit-bot@chromium.org9985ef52014-02-10 18:19:30 +0000335 Returns: already-sanitized locator where the diffs between expected and
336 actual images can be found
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000337 """
commit-bot@chromium.org9985ef52014-02-10 18:19:30 +0000338 return "%s-vs-%s" % (_sanitize_locator(expected_image_locator),
339 _sanitize_locator(actual_image_locator))