blob: 8cec46bc9434fcb74ee7d1b1d2619e0411704618 [file] [log] [blame]
epoger@google.com9dddf6f2013-11-08 16:25:25 +00001#!/usr/bin/python
2
3"""
4Copyright 2013 Google Inc.
5
6Use of this source code is governed by a BSD-style license that can be
7found in the LICENSE file.
8
9Calulate differences between image pairs, and store them in a database.
10"""
11
12import contextlib
commit-bot@chromium.org44546f82014-02-11 18:21:26 +000013import csv
epoger@google.com9dddf6f2013-11-08 16:25:25 +000014import logging
15import os
commit-bot@chromium.org9985ef52014-02-10 18:19:30 +000016import re
epoger@google.com9dddf6f2013-11-08 16:25:25 +000017import shutil
commit-bot@chromium.org44546f82014-02-11 18:21:26 +000018import sys
19import tempfile
epoger@google.com9dddf6f2013-11-08 16:25:25 +000020import urllib
21try:
22 from PIL import Image, ImageChops
23except ImportError:
24 raise ImportError('Requires PIL to be installed; see '
25 + 'http://www.pythonware.com/products/pil/')
26
commit-bot@chromium.org44546f82014-02-11 18:21:26 +000027# Set the PYTHONPATH to include the tools directory.
28sys.path.append(
29 os.path.join(
30 os.path.dirname(os.path.realpath(__file__)), os.pardir, os.pardir,
31 'tools'))
32import find_run_binary
33
commit-bot@chromium.org4d0f0082014-02-18 14:38:22 +000034SKPDIFF_BINARY = find_run_binary.find_path_to_program('skpdiff')
commit-bot@chromium.org44546f82014-02-11 18:21:26 +000035
rmistry@google.com5861e522013-12-21 19:07:40 +000036DEFAULT_IMAGE_SUFFIX = '.png'
37DEFAULT_IMAGES_SUBDIR = 'images'
epoger@google.com9dddf6f2013-11-08 16:25:25 +000038
commit-bot@chromium.org9985ef52014-02-10 18:19:30 +000039DISALLOWED_FILEPATH_CHAR_REGEX = re.compile('[^\w\-]')
40
epoger@google.com9dddf6f2013-11-08 16:25:25 +000041DIFFS_SUBDIR = 'diffs'
42WHITEDIFFS_SUBDIR = 'whitediffs'
43
epoger@google.com214a0242013-11-22 19:26:18 +000044VALUES_PER_BAND = 256
45
epoger@google.com9dddf6f2013-11-08 16:25:25 +000046
47class DiffRecord(object):
48 """ Record of differences between two images. """
49
50 def __init__(self, storage_root,
51 expected_image_url, expected_image_locator,
rmistry@google.com5861e522013-12-21 19:07:40 +000052 actual_image_url, actual_image_locator,
53 expected_images_subdir=DEFAULT_IMAGES_SUBDIR,
54 actual_images_subdir=DEFAULT_IMAGES_SUBDIR,
55 image_suffix=DEFAULT_IMAGE_SUFFIX):
epoger@google.com9dddf6f2013-11-08 16:25:25 +000056 """Download this pair of images (unless we already have them on local disk),
57 and prepare a DiffRecord for them.
58
59 TODO(epoger): Make this asynchronously download images, rather than blocking
60 until the images have been downloaded and processed.
61
62 Args:
63 storage_root: root directory on local disk within which we store all
64 images
65 expected_image_url: file or HTTP url from which we will download the
66 expected image
67 expected_image_locator: a unique ID string under which we will store the
68 expected image within storage_root (probably including a checksum to
69 guarantee uniqueness)
70 actual_image_url: file or HTTP url from which we will download the
71 actual image
72 actual_image_locator: a unique ID string under which we will store the
73 actual image within storage_root (probably including a checksum to
74 guarantee uniqueness)
rmistry@google.com5861e522013-12-21 19:07:40 +000075 expected_images_subdir: the subdirectory expected images are stored in.
76 actual_images_subdir: the subdirectory actual images are stored in.
77 image_suffix: the suffix of images.
epoger@google.com9dddf6f2013-11-08 16:25:25 +000078 """
commit-bot@chromium.org9985ef52014-02-10 18:19:30 +000079 expected_image_locator = _sanitize_locator(expected_image_locator)
80 actual_image_locator = _sanitize_locator(actual_image_locator)
81
epoger@google.com9dddf6f2013-11-08 16:25:25 +000082 # Download the expected/actual images, if we don't have them already.
rmistry@google.com5861e522013-12-21 19:07:40 +000083 # TODO(rmistry): Add a parameter that makes _download_and_open_image raise
84 # an exception if images are not found locally (instead of trying to
85 # download them).
epoger@google.com9dddf6f2013-11-08 16:25:25 +000086 expected_image = _download_and_open_image(
rmistry@google.com5861e522013-12-21 19:07:40 +000087 os.path.join(storage_root, expected_images_subdir,
88 str(expected_image_locator) + image_suffix),
epoger@google.com9dddf6f2013-11-08 16:25:25 +000089 expected_image_url)
90 actual_image = _download_and_open_image(
rmistry@google.com5861e522013-12-21 19:07:40 +000091 os.path.join(storage_root, actual_images_subdir,
92 str(actual_image_locator) + image_suffix),
epoger@google.com9dddf6f2013-11-08 16:25:25 +000093 actual_image_url)
94
epoger@google.com214a0242013-11-22 19:26:18 +000095 # Generate the diff image (absolute diff at each pixel) and
96 # max_diff_per_channel.
epoger@google.com9dddf6f2013-11-08 16:25:25 +000097 diff_image = _generate_image_diff(actual_image, expected_image)
epoger@google.com214a0242013-11-22 19:26:18 +000098 diff_histogram = diff_image.histogram()
99 (diff_width, diff_height) = diff_image.size
100 self._weighted_diff_measure = _calculate_weighted_diff_metric(
101 diff_histogram, diff_width * diff_height)
102 self._max_diff_per_channel = _max_per_band(diff_histogram)
103
104 # Generate the whitediff image (any differing pixels show as white).
105 # This is tricky, because when you convert color images to grayscale or
106 # black & white in PIL, it has its own ideas about thresholds.
107 # We have to force it: if a pixel has any color at all, it's a '1'.
108 bands = diff_image.split()
109 graydiff_image = ImageChops.lighter(ImageChops.lighter(
110 bands[0], bands[1]), bands[2])
111 whitediff_image = (graydiff_image.point(lambda p: p > 0 and VALUES_PER_BAND)
112 .convert('1', dither=Image.NONE))
113
commit-bot@chromium.org44546f82014-02-11 18:21:26 +0000114 # Calculate the perceptual difference percentage.
115 skpdiff_csv_dir = tempfile.mkdtemp()
116 try:
117 skpdiff_csv_output = os.path.join(skpdiff_csv_dir, 'skpdiff-output.csv')
commit-bot@chromium.org44546f82014-02-11 18:21:26 +0000118 expected_img = os.path.join(storage_root, expected_images_subdir,
119 str(expected_image_locator) + image_suffix)
120 actual_img = os.path.join(storage_root, actual_images_subdir,
121 str(actual_image_locator) + image_suffix)
122 find_run_binary.run_command(
commit-bot@chromium.org4d0f0082014-02-18 14:38:22 +0000123 [SKPDIFF_BINARY, '-p', expected_img, actual_img,
commit-bot@chromium.org44546f82014-02-11 18:21:26 +0000124 '--csv', skpdiff_csv_output, '-d', 'perceptual'])
125 with contextlib.closing(open(skpdiff_csv_output)) as csv_file:
126 for row in csv.DictReader(csv_file):
127 perceptual_similarity = float(row[' perceptual'].strip())
128 if not 0 <= perceptual_similarity <= 1:
129 # skpdiff outputs -1 if the images are different sizes. Treat any
130 # output that does not lie in [0, 1] as having 0% perceptual
131 # similarity.
132 perceptual_similarity = 0
133 # skpdiff returns the perceptual similarity, convert it to get the
134 # perceptual difference percentage.
135 self._perceptual_difference = 100 - (perceptual_similarity * 100)
136 finally:
137 shutil.rmtree(skpdiff_csv_dir)
138
epoger@google.com214a0242013-11-22 19:26:18 +0000139 # Final touches on diff_image: use whitediff_image as an alpha mask.
140 # Unchanged pixels are transparent; differing pixels are opaque.
141 diff_image.putalpha(whitediff_image)
142
143 # Store the diff and whitediff images generated above.
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000144 diff_image_locator = _get_difference_locator(
145 expected_image_locator=expected_image_locator,
146 actual_image_locator=actual_image_locator)
rmistry@google.com5861e522013-12-21 19:07:40 +0000147 basename = str(diff_image_locator) + image_suffix
epoger@google.com214a0242013-11-22 19:26:18 +0000148 _save_image(diff_image, os.path.join(
149 storage_root, DIFFS_SUBDIR, basename))
150 _save_image(whitediff_image, os.path.join(
151 storage_root, WHITEDIFFS_SUBDIR, basename))
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000152
153 # Calculate difference metrics.
154 (self._width, self._height) = diff_image.size
epoger@google.com214a0242013-11-22 19:26:18 +0000155 self._num_pixels_differing = (
156 whitediff_image.histogram()[VALUES_PER_BAND - 1])
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000157
158 def get_num_pixels_differing(self):
159 """Returns the absolute number of pixels that differ."""
160 return self._num_pixels_differing
161
162 def get_percent_pixels_differing(self):
163 """Returns the percentage of pixels that differ, as a float between
164 0 and 100 (inclusive)."""
165 return ((float(self._num_pixels_differing) * 100) /
166 (self._width * self._height))
167
commit-bot@chromium.org44546f82014-02-11 18:21:26 +0000168 def get_perceptual_difference(self):
169 """Returns the perceptual difference percentage."""
170 return self._perceptual_difference
171
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000172 def get_weighted_diff_measure(self):
173 """Returns a weighted measure of image diffs, as a float between 0 and 100
commit-bot@chromium.org4d0f0082014-02-18 14:38:22 +0000174 (inclusive).
175
176 TODO(epoger): Delete this function, now that we have perceptual diff?
177 """
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000178 return self._weighted_diff_measure
179
epoger@google.com214a0242013-11-22 19:26:18 +0000180 def get_max_diff_per_channel(self):
181 """Returns the maximum difference between the expected and actual images
182 for each R/G/B channel, as a list."""
183 return self._max_diff_per_channel
184
commit-bot@chromium.org9985ef52014-02-10 18:19:30 +0000185 def as_dict(self):
186 """Returns a dictionary representation of this DiffRecord, as needed when
187 constructing the JSON representation."""
188 return {
189 'numDifferingPixels': self._num_pixels_differing,
190 'percentDifferingPixels': self.get_percent_pixels_differing(),
191 'weightedDiffMeasure': self.get_weighted_diff_measure(),
192 'maxDiffPerChannel': self._max_diff_per_channel,
193 }
194
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000195
196class ImageDiffDB(object):
197 """ Calculates differences between image pairs, maintaining a database of
198 them for download."""
199
200 def __init__(self, storage_root):
201 """
202 Args:
203 storage_root: string; root path within the DB will store all of its stuff
204 """
205 self._storage_root = storage_root
206
207 # Dictionary of DiffRecords, keyed by (expected_image_locator,
208 # actual_image_locator) tuples.
209 self._diff_dict = {}
210
211 def add_image_pair(self,
212 expected_image_url, expected_image_locator,
213 actual_image_url, actual_image_locator):
214 """Download this pair of images (unless we already have them on local disk),
215 and prepare a DiffRecord for them.
216
217 TODO(epoger): Make this asynchronously download images, rather than blocking
218 until the images have been downloaded and processed.
219 When we do that, we should probably add a new method that will block
220 until all of the images have been downloaded and processed. Otherwise,
221 we won't know when it's safe to start calling get_diff_record().
222 jcgregorio notes: maybe just make ImageDiffDB thread-safe and create a
223 thread-pool/worker queue at a higher level that just uses ImageDiffDB?
224
225 Args:
226 expected_image_url: file or HTTP url from which we will download the
227 expected image
228 expected_image_locator: a unique ID string under which we will store the
229 expected image within storage_root (probably including a checksum to
230 guarantee uniqueness)
231 actual_image_url: file or HTTP url from which we will download the
232 actual image
233 actual_image_locator: a unique ID string under which we will store the
234 actual image within storage_root (probably including a checksum to
235 guarantee uniqueness)
236 """
commit-bot@chromium.org9985ef52014-02-10 18:19:30 +0000237 expected_image_locator = _sanitize_locator(expected_image_locator)
238 actual_image_locator = _sanitize_locator(actual_image_locator)
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000239 key = (expected_image_locator, actual_image_locator)
240 if not key in self._diff_dict:
241 try:
242 new_diff_record = DiffRecord(
243 self._storage_root,
244 expected_image_url=expected_image_url,
245 expected_image_locator=expected_image_locator,
246 actual_image_url=actual_image_url,
247 actual_image_locator=actual_image_locator)
commit-bot@chromium.orga47e7ac2013-12-19 20:01:34 +0000248 except Exception:
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000249 logging.exception('got exception while creating new DiffRecord')
250 return
251 self._diff_dict[key] = new_diff_record
252
253 def get_diff_record(self, expected_image_locator, actual_image_locator):
254 """Returns the DiffRecord for this image pair.
255
256 Raises a KeyError if we don't have a DiffRecord for this image pair.
257 """
commit-bot@chromium.org9985ef52014-02-10 18:19:30 +0000258 key = (_sanitize_locator(expected_image_locator),
259 _sanitize_locator(actual_image_locator))
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000260 return self._diff_dict[key]
261
262
263# Utility functions
264
epoger@google.com214a0242013-11-22 19:26:18 +0000265def _calculate_weighted_diff_metric(histogram, num_pixels):
266 """Given the histogram of a diff image (per-channel diff at each
267 pixel between two images), calculate the weighted diff metric (a
268 stab at how different the two images really are).
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000269
commit-bot@chromium.org4d0f0082014-02-18 14:38:22 +0000270 TODO(epoger): Delete this function, now that we have perceptual diff?
271
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000272 Args:
epoger@google.com214a0242013-11-22 19:26:18 +0000273 histogram: PIL histogram of a per-channel diff between two images
274 num_pixels: integer; the total number of pixels in the diff image
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000275
276 Returns: a weighted diff metric, as a float between 0 and 100 (inclusive).
277 """
epoger@google.com214a0242013-11-22 19:26:18 +0000278 # TODO(epoger): As a wild guess at an appropriate metric, weight each
279 # different pixel by the square of its delta value. (The more different
280 # a pixel is from its expectation, the more we care about it.)
epoger@google.com214a0242013-11-22 19:26:18 +0000281 assert(len(histogram) % VALUES_PER_BAND == 0)
282 num_bands = len(histogram) / VALUES_PER_BAND
283 max_diff = num_pixels * num_bands * (VALUES_PER_BAND - 1)**2
284 total_diff = 0
285 for index in xrange(len(histogram)):
286 total_diff += histogram[index] * (index % VALUES_PER_BAND)**2
287 return float(100 * total_diff) / max_diff
288
289def _max_per_band(histogram):
290 """Given the histogram of an image, return the maximum value of each band
291 (a.k.a. "color channel", such as R/G/B) across the entire image.
292
293 Args:
294 histogram: PIL histogram
295
296 Returns the maximum value of each band within the image histogram, as a list.
297 """
298 max_per_band = []
299 assert(len(histogram) % VALUES_PER_BAND == 0)
300 num_bands = len(histogram) / VALUES_PER_BAND
301 for band in xrange(num_bands):
302 # Assuming that VALUES_PER_BAND is 256...
303 # the 'R' band makes up indices 0-255 in the histogram,
304 # the 'G' band makes up indices 256-511 in the histogram,
305 # etc.
306 min_index = band * VALUES_PER_BAND
307 index = min_index + VALUES_PER_BAND
308 while index > min_index:
309 index -= 1
310 if histogram[index] > 0:
311 max_per_band.append(index - min_index)
312 break
313 return max_per_band
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000314
315def _generate_image_diff(image1, image2):
316 """Wrapper for ImageChops.difference(image1, image2) that will handle some
317 errors automatically, or at least yield more useful error messages.
318
319 TODO(epoger): Currently, some of the images generated by the bots are RGBA
320 and others are RGB. I'm not sure why that is. For now, to avoid confusion
321 within the UI, convert all to RGB when diffing.
322
323 Args:
324 image1: a PIL image object
325 image2: a PIL image object
326
327 Returns: per-pixel diffs between image1 and image2, as a PIL image object
328 """
329 try:
330 return ImageChops.difference(image1.convert('RGB'), image2.convert('RGB'))
331 except ValueError:
332 logging.error('Error diffing image1 [%s] and image2 [%s].' % (
333 repr(image1), repr(image2)))
334 raise
335
336def _download_and_open_image(local_filepath, url):
337 """Open the image at local_filepath; if there is no file at that path,
338 download it from url to that path and then open it.
339
340 Args:
341 local_filepath: path on local disk where the image should be stored
342 url: URL from which we can download the image if we don't have it yet
343
344 Returns: a PIL image object
345 """
346 if not os.path.exists(local_filepath):
347 _mkdir_unless_exists(os.path.dirname(local_filepath))
348 with contextlib.closing(urllib.urlopen(url)) as url_handle:
349 with open(local_filepath, 'wb') as file_handle:
350 shutil.copyfileobj(fsrc=url_handle, fdst=file_handle)
351 return _open_image(local_filepath)
352
353def _open_image(filepath):
354 """Wrapper for Image.open(filepath) that yields more useful error messages.
355
356 Args:
357 filepath: path on local disk to load image from
358
359 Returns: a PIL image object
360 """
361 try:
362 return Image.open(filepath)
363 except IOError:
364 logging.error('IOError loading image file %s' % filepath)
365 raise
366
epoger@google.com214a0242013-11-22 19:26:18 +0000367def _save_image(image, filepath, format='PNG'):
368 """Write an image to disk, creating any intermediate directories as needed.
369
370 Args:
371 image: a PIL image object
372 filepath: path on local disk to write image to
373 format: one of the PIL image formats, listed at
374 http://effbot.org/imagingbook/formats.htm
375 """
376 _mkdir_unless_exists(os.path.dirname(filepath))
377 image.save(filepath, format)
378
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000379def _mkdir_unless_exists(path):
380 """Unless path refers to an already-existing directory, create it.
381
382 Args:
383 path: path on local disk
384 """
385 if not os.path.isdir(path):
386 os.makedirs(path)
387
commit-bot@chromium.org9985ef52014-02-10 18:19:30 +0000388def _sanitize_locator(locator):
389 """Returns a sanitized version of a locator (one in which we know none of the
390 characters will have special meaning in filenames).
391
392 Args:
393 locator: string, or something that can be represented as a string
394 """
395 return DISALLOWED_FILEPATH_CHAR_REGEX.sub('_', str(locator))
396
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000397def _get_difference_locator(expected_image_locator, actual_image_locator):
398 """Returns the locator string used to look up the diffs between expected_image
399 and actual_image.
400
401 Args:
402 expected_image_locator: locator string pointing at expected image
403 actual_image_locator: locator string pointing at actual image
404
commit-bot@chromium.org9985ef52014-02-10 18:19:30 +0000405 Returns: already-sanitized locator where the diffs between expected and
406 actual images can be found
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000407 """
commit-bot@chromium.org9985ef52014-02-10 18:19:30 +0000408 return "%s-vs-%s" % (_sanitize_locator(expected_image_locator),
409 _sanitize_locator(actual_image_locator))