blob: 8c010f8a2bbf364e010153a393cd15c5b03267af [file] [log] [blame]
epoger@google.com9dddf6f2013-11-08 16:25:25 +00001#!/usr/bin/python
2
3"""
4Copyright 2013 Google Inc.
5
6Use of this source code is governed by a BSD-style license that can be
7found in the LICENSE file.
8
9Calulate differences between image pairs, and store them in a database.
10"""
11
12import contextlib
commit-bot@chromium.org44546f82014-02-11 18:21:26 +000013import csv
epoger@google.com9dddf6f2013-11-08 16:25:25 +000014import logging
15import os
commit-bot@chromium.org9985ef52014-02-10 18:19:30 +000016import re
epoger@google.com9dddf6f2013-11-08 16:25:25 +000017import shutil
commit-bot@chromium.org44546f82014-02-11 18:21:26 +000018import sys
19import tempfile
epoger@google.com9dddf6f2013-11-08 16:25:25 +000020import urllib
21try:
22 from PIL import Image, ImageChops
23except ImportError:
24 raise ImportError('Requires PIL to be installed; see '
25 + 'http://www.pythonware.com/products/pil/')
26
commit-bot@chromium.org44546f82014-02-11 18:21:26 +000027# Set the PYTHONPATH to include the tools directory.
28sys.path.append(
29 os.path.join(
30 os.path.dirname(os.path.realpath(__file__)), os.pardir, os.pardir,
31 'tools'))
32import find_run_binary
33
34SKPDIFF_BINARY_NAME = 'skpdiff'
35
rmistry@google.com5861e522013-12-21 19:07:40 +000036DEFAULT_IMAGE_SUFFIX = '.png'
37DEFAULT_IMAGES_SUBDIR = 'images'
epoger@google.com9dddf6f2013-11-08 16:25:25 +000038
commit-bot@chromium.org9985ef52014-02-10 18:19:30 +000039DISALLOWED_FILEPATH_CHAR_REGEX = re.compile('[^\w\-]')
40
epoger@google.com9dddf6f2013-11-08 16:25:25 +000041DIFFS_SUBDIR = 'diffs'
42WHITEDIFFS_SUBDIR = 'whitediffs'
43
epoger@google.com214a0242013-11-22 19:26:18 +000044VALUES_PER_BAND = 256
45
epoger@google.com9dddf6f2013-11-08 16:25:25 +000046
47class DiffRecord(object):
48 """ Record of differences between two images. """
49
50 def __init__(self, storage_root,
51 expected_image_url, expected_image_locator,
rmistry@google.com5861e522013-12-21 19:07:40 +000052 actual_image_url, actual_image_locator,
53 expected_images_subdir=DEFAULT_IMAGES_SUBDIR,
54 actual_images_subdir=DEFAULT_IMAGES_SUBDIR,
55 image_suffix=DEFAULT_IMAGE_SUFFIX):
epoger@google.com9dddf6f2013-11-08 16:25:25 +000056 """Download this pair of images (unless we already have them on local disk),
57 and prepare a DiffRecord for them.
58
59 TODO(epoger): Make this asynchronously download images, rather than blocking
60 until the images have been downloaded and processed.
61
62 Args:
63 storage_root: root directory on local disk within which we store all
64 images
65 expected_image_url: file or HTTP url from which we will download the
66 expected image
67 expected_image_locator: a unique ID string under which we will store the
68 expected image within storage_root (probably including a checksum to
69 guarantee uniqueness)
70 actual_image_url: file or HTTP url from which we will download the
71 actual image
72 actual_image_locator: a unique ID string under which we will store the
73 actual image within storage_root (probably including a checksum to
74 guarantee uniqueness)
rmistry@google.com5861e522013-12-21 19:07:40 +000075 expected_images_subdir: the subdirectory expected images are stored in.
76 actual_images_subdir: the subdirectory actual images are stored in.
77 image_suffix: the suffix of images.
epoger@google.com9dddf6f2013-11-08 16:25:25 +000078 """
commit-bot@chromium.org9985ef52014-02-10 18:19:30 +000079 expected_image_locator = _sanitize_locator(expected_image_locator)
80 actual_image_locator = _sanitize_locator(actual_image_locator)
81
epoger@google.com9dddf6f2013-11-08 16:25:25 +000082 # Download the expected/actual images, if we don't have them already.
rmistry@google.com5861e522013-12-21 19:07:40 +000083 # TODO(rmistry): Add a parameter that makes _download_and_open_image raise
84 # an exception if images are not found locally (instead of trying to
85 # download them).
epoger@google.com9dddf6f2013-11-08 16:25:25 +000086 expected_image = _download_and_open_image(
rmistry@google.com5861e522013-12-21 19:07:40 +000087 os.path.join(storage_root, expected_images_subdir,
88 str(expected_image_locator) + image_suffix),
epoger@google.com9dddf6f2013-11-08 16:25:25 +000089 expected_image_url)
90 actual_image = _download_and_open_image(
rmistry@google.com5861e522013-12-21 19:07:40 +000091 os.path.join(storage_root, actual_images_subdir,
92 str(actual_image_locator) + image_suffix),
epoger@google.com9dddf6f2013-11-08 16:25:25 +000093 actual_image_url)
94
epoger@google.com214a0242013-11-22 19:26:18 +000095 # Generate the diff image (absolute diff at each pixel) and
96 # max_diff_per_channel.
epoger@google.com9dddf6f2013-11-08 16:25:25 +000097 diff_image = _generate_image_diff(actual_image, expected_image)
epoger@google.com214a0242013-11-22 19:26:18 +000098 diff_histogram = diff_image.histogram()
99 (diff_width, diff_height) = diff_image.size
100 self._weighted_diff_measure = _calculate_weighted_diff_metric(
101 diff_histogram, diff_width * diff_height)
102 self._max_diff_per_channel = _max_per_band(diff_histogram)
103
104 # Generate the whitediff image (any differing pixels show as white).
105 # This is tricky, because when you convert color images to grayscale or
106 # black & white in PIL, it has its own ideas about thresholds.
107 # We have to force it: if a pixel has any color at all, it's a '1'.
108 bands = diff_image.split()
109 graydiff_image = ImageChops.lighter(ImageChops.lighter(
110 bands[0], bands[1]), bands[2])
111 whitediff_image = (graydiff_image.point(lambda p: p > 0 and VALUES_PER_BAND)
112 .convert('1', dither=Image.NONE))
113
commit-bot@chromium.org44546f82014-02-11 18:21:26 +0000114 # Calculate the perceptual difference percentage.
115 skpdiff_csv_dir = tempfile.mkdtemp()
116 try:
117 skpdiff_csv_output = os.path.join(skpdiff_csv_dir, 'skpdiff-output.csv')
118 skpdiff_binary = find_run_binary.find_path_to_program(SKPDIFF_BINARY_NAME)
119 expected_img = os.path.join(storage_root, expected_images_subdir,
120 str(expected_image_locator) + image_suffix)
121 actual_img = os.path.join(storage_root, actual_images_subdir,
122 str(actual_image_locator) + image_suffix)
123 find_run_binary.run_command(
124 [skpdiff_binary, '-p', expected_img, actual_img,
125 '--csv', skpdiff_csv_output, '-d', 'perceptual'])
126 with contextlib.closing(open(skpdiff_csv_output)) as csv_file:
127 for row in csv.DictReader(csv_file):
128 perceptual_similarity = float(row[' perceptual'].strip())
129 if not 0 <= perceptual_similarity <= 1:
130 # skpdiff outputs -1 if the images are different sizes. Treat any
131 # output that does not lie in [0, 1] as having 0% perceptual
132 # similarity.
133 perceptual_similarity = 0
134 # skpdiff returns the perceptual similarity, convert it to get the
135 # perceptual difference percentage.
136 self._perceptual_difference = 100 - (perceptual_similarity * 100)
137 finally:
138 shutil.rmtree(skpdiff_csv_dir)
139
epoger@google.com214a0242013-11-22 19:26:18 +0000140 # Final touches on diff_image: use whitediff_image as an alpha mask.
141 # Unchanged pixels are transparent; differing pixels are opaque.
142 diff_image.putalpha(whitediff_image)
143
144 # Store the diff and whitediff images generated above.
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000145 diff_image_locator = _get_difference_locator(
146 expected_image_locator=expected_image_locator,
147 actual_image_locator=actual_image_locator)
rmistry@google.com5861e522013-12-21 19:07:40 +0000148 basename = str(diff_image_locator) + image_suffix
epoger@google.com214a0242013-11-22 19:26:18 +0000149 _save_image(diff_image, os.path.join(
150 storage_root, DIFFS_SUBDIR, basename))
151 _save_image(whitediff_image, os.path.join(
152 storage_root, WHITEDIFFS_SUBDIR, basename))
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000153
154 # Calculate difference metrics.
155 (self._width, self._height) = diff_image.size
epoger@google.com214a0242013-11-22 19:26:18 +0000156 self._num_pixels_differing = (
157 whitediff_image.histogram()[VALUES_PER_BAND - 1])
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000158
159 def get_num_pixels_differing(self):
160 """Returns the absolute number of pixels that differ."""
161 return self._num_pixels_differing
162
163 def get_percent_pixels_differing(self):
164 """Returns the percentage of pixels that differ, as a float between
165 0 and 100 (inclusive)."""
166 return ((float(self._num_pixels_differing) * 100) /
167 (self._width * self._height))
168
commit-bot@chromium.org44546f82014-02-11 18:21:26 +0000169 def get_perceptual_difference(self):
170 """Returns the perceptual difference percentage."""
171 return self._perceptual_difference
172
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000173 def get_weighted_diff_measure(self):
174 """Returns a weighted measure of image diffs, as a float between 0 and 100
175 (inclusive)."""
176 return self._weighted_diff_measure
177
epoger@google.com214a0242013-11-22 19:26:18 +0000178 def get_max_diff_per_channel(self):
179 """Returns the maximum difference between the expected and actual images
180 for each R/G/B channel, as a list."""
181 return self._max_diff_per_channel
182
commit-bot@chromium.org9985ef52014-02-10 18:19:30 +0000183 def as_dict(self):
184 """Returns a dictionary representation of this DiffRecord, as needed when
185 constructing the JSON representation."""
186 return {
187 'numDifferingPixels': self._num_pixels_differing,
188 'percentDifferingPixels': self.get_percent_pixels_differing(),
189 'weightedDiffMeasure': self.get_weighted_diff_measure(),
190 'maxDiffPerChannel': self._max_diff_per_channel,
191 }
192
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000193
194class ImageDiffDB(object):
195 """ Calculates differences between image pairs, maintaining a database of
196 them for download."""
197
198 def __init__(self, storage_root):
199 """
200 Args:
201 storage_root: string; root path within the DB will store all of its stuff
202 """
203 self._storage_root = storage_root
204
205 # Dictionary of DiffRecords, keyed by (expected_image_locator,
206 # actual_image_locator) tuples.
207 self._diff_dict = {}
208
209 def add_image_pair(self,
210 expected_image_url, expected_image_locator,
211 actual_image_url, actual_image_locator):
212 """Download this pair of images (unless we already have them on local disk),
213 and prepare a DiffRecord for them.
214
215 TODO(epoger): Make this asynchronously download images, rather than blocking
216 until the images have been downloaded and processed.
217 When we do that, we should probably add a new method that will block
218 until all of the images have been downloaded and processed. Otherwise,
219 we won't know when it's safe to start calling get_diff_record().
220 jcgregorio notes: maybe just make ImageDiffDB thread-safe and create a
221 thread-pool/worker queue at a higher level that just uses ImageDiffDB?
222
223 Args:
224 expected_image_url: file or HTTP url from which we will download the
225 expected image
226 expected_image_locator: a unique ID string under which we will store the
227 expected image within storage_root (probably including a checksum to
228 guarantee uniqueness)
229 actual_image_url: file or HTTP url from which we will download the
230 actual image
231 actual_image_locator: a unique ID string under which we will store the
232 actual image within storage_root (probably including a checksum to
233 guarantee uniqueness)
234 """
commit-bot@chromium.org9985ef52014-02-10 18:19:30 +0000235 expected_image_locator = _sanitize_locator(expected_image_locator)
236 actual_image_locator = _sanitize_locator(actual_image_locator)
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000237 key = (expected_image_locator, actual_image_locator)
238 if not key in self._diff_dict:
239 try:
240 new_diff_record = DiffRecord(
241 self._storage_root,
242 expected_image_url=expected_image_url,
243 expected_image_locator=expected_image_locator,
244 actual_image_url=actual_image_url,
245 actual_image_locator=actual_image_locator)
commit-bot@chromium.orga47e7ac2013-12-19 20:01:34 +0000246 except Exception:
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000247 logging.exception('got exception while creating new DiffRecord')
248 return
249 self._diff_dict[key] = new_diff_record
250
251 def get_diff_record(self, expected_image_locator, actual_image_locator):
252 """Returns the DiffRecord for this image pair.
253
254 Raises a KeyError if we don't have a DiffRecord for this image pair.
255 """
commit-bot@chromium.org9985ef52014-02-10 18:19:30 +0000256 key = (_sanitize_locator(expected_image_locator),
257 _sanitize_locator(actual_image_locator))
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000258 return self._diff_dict[key]
259
260
261# Utility functions
262
epoger@google.com214a0242013-11-22 19:26:18 +0000263def _calculate_weighted_diff_metric(histogram, num_pixels):
264 """Given the histogram of a diff image (per-channel diff at each
265 pixel between two images), calculate the weighted diff metric (a
266 stab at how different the two images really are).
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000267
268 Args:
epoger@google.com214a0242013-11-22 19:26:18 +0000269 histogram: PIL histogram of a per-channel diff between two images
270 num_pixels: integer; the total number of pixels in the diff image
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000271
272 Returns: a weighted diff metric, as a float between 0 and 100 (inclusive).
273 """
epoger@google.com214a0242013-11-22 19:26:18 +0000274 # TODO(epoger): As a wild guess at an appropriate metric, weight each
275 # different pixel by the square of its delta value. (The more different
276 # a pixel is from its expectation, the more we care about it.)
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000277 # In the long term, we will probably use some metric generated by
278 # skpdiff anyway.
epoger@google.com214a0242013-11-22 19:26:18 +0000279 assert(len(histogram) % VALUES_PER_BAND == 0)
280 num_bands = len(histogram) / VALUES_PER_BAND
281 max_diff = num_pixels * num_bands * (VALUES_PER_BAND - 1)**2
282 total_diff = 0
283 for index in xrange(len(histogram)):
284 total_diff += histogram[index] * (index % VALUES_PER_BAND)**2
285 return float(100 * total_diff) / max_diff
286
287def _max_per_band(histogram):
288 """Given the histogram of an image, return the maximum value of each band
289 (a.k.a. "color channel", such as R/G/B) across the entire image.
290
291 Args:
292 histogram: PIL histogram
293
294 Returns the maximum value of each band within the image histogram, as a list.
295 """
296 max_per_band = []
297 assert(len(histogram) % VALUES_PER_BAND == 0)
298 num_bands = len(histogram) / VALUES_PER_BAND
299 for band in xrange(num_bands):
300 # Assuming that VALUES_PER_BAND is 256...
301 # the 'R' band makes up indices 0-255 in the histogram,
302 # the 'G' band makes up indices 256-511 in the histogram,
303 # etc.
304 min_index = band * VALUES_PER_BAND
305 index = min_index + VALUES_PER_BAND
306 while index > min_index:
307 index -= 1
308 if histogram[index] > 0:
309 max_per_band.append(index - min_index)
310 break
311 return max_per_band
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000312
313def _generate_image_diff(image1, image2):
314 """Wrapper for ImageChops.difference(image1, image2) that will handle some
315 errors automatically, or at least yield more useful error messages.
316
317 TODO(epoger): Currently, some of the images generated by the bots are RGBA
318 and others are RGB. I'm not sure why that is. For now, to avoid confusion
319 within the UI, convert all to RGB when diffing.
320
321 Args:
322 image1: a PIL image object
323 image2: a PIL image object
324
325 Returns: per-pixel diffs between image1 and image2, as a PIL image object
326 """
327 try:
328 return ImageChops.difference(image1.convert('RGB'), image2.convert('RGB'))
329 except ValueError:
330 logging.error('Error diffing image1 [%s] and image2 [%s].' % (
331 repr(image1), repr(image2)))
332 raise
333
334def _download_and_open_image(local_filepath, url):
335 """Open the image at local_filepath; if there is no file at that path,
336 download it from url to that path and then open it.
337
338 Args:
339 local_filepath: path on local disk where the image should be stored
340 url: URL from which we can download the image if we don't have it yet
341
342 Returns: a PIL image object
343 """
344 if not os.path.exists(local_filepath):
345 _mkdir_unless_exists(os.path.dirname(local_filepath))
346 with contextlib.closing(urllib.urlopen(url)) as url_handle:
347 with open(local_filepath, 'wb') as file_handle:
348 shutil.copyfileobj(fsrc=url_handle, fdst=file_handle)
349 return _open_image(local_filepath)
350
351def _open_image(filepath):
352 """Wrapper for Image.open(filepath) that yields more useful error messages.
353
354 Args:
355 filepath: path on local disk to load image from
356
357 Returns: a PIL image object
358 """
359 try:
360 return Image.open(filepath)
361 except IOError:
362 logging.error('IOError loading image file %s' % filepath)
363 raise
364
epoger@google.com214a0242013-11-22 19:26:18 +0000365def _save_image(image, filepath, format='PNG'):
366 """Write an image to disk, creating any intermediate directories as needed.
367
368 Args:
369 image: a PIL image object
370 filepath: path on local disk to write image to
371 format: one of the PIL image formats, listed at
372 http://effbot.org/imagingbook/formats.htm
373 """
374 _mkdir_unless_exists(os.path.dirname(filepath))
375 image.save(filepath, format)
376
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000377def _mkdir_unless_exists(path):
378 """Unless path refers to an already-existing directory, create it.
379
380 Args:
381 path: path on local disk
382 """
383 if not os.path.isdir(path):
384 os.makedirs(path)
385
commit-bot@chromium.org9985ef52014-02-10 18:19:30 +0000386def _sanitize_locator(locator):
387 """Returns a sanitized version of a locator (one in which we know none of the
388 characters will have special meaning in filenames).
389
390 Args:
391 locator: string, or something that can be represented as a string
392 """
393 return DISALLOWED_FILEPATH_CHAR_REGEX.sub('_', str(locator))
394
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000395def _get_difference_locator(expected_image_locator, actual_image_locator):
396 """Returns the locator string used to look up the diffs between expected_image
397 and actual_image.
398
399 Args:
400 expected_image_locator: locator string pointing at expected image
401 actual_image_locator: locator string pointing at actual image
402
commit-bot@chromium.org9985ef52014-02-10 18:19:30 +0000403 Returns: already-sanitized locator where the diffs between expected and
404 actual images can be found
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000405 """
commit-bot@chromium.org9985ef52014-02-10 18:19:30 +0000406 return "%s-vs-%s" % (_sanitize_locator(expected_image_locator),
407 _sanitize_locator(actual_image_locator))