blob: f3347f75afe3f1f5410d13126dfb1cf41001dd79 [file] [log] [blame]
epoger@google.com9dddf6f2013-11-08 16:25:25 +00001#!/usr/bin/python
2
3"""
4Copyright 2013 Google Inc.
5
6Use of this source code is governed by a BSD-style license that can be
7found in the LICENSE file.
8
9Calulate differences between image pairs, and store them in a database.
10"""
11
12import contextlib
commit-bot@chromium.org44546f82014-02-11 18:21:26 +000013import csv
epoger@google.com9dddf6f2013-11-08 16:25:25 +000014import logging
15import os
commit-bot@chromium.org9985ef52014-02-10 18:19:30 +000016import re
epoger@google.com9dddf6f2013-11-08 16:25:25 +000017import shutil
commit-bot@chromium.org44546f82014-02-11 18:21:26 +000018import sys
19import tempfile
epoger@google.com9dddf6f2013-11-08 16:25:25 +000020import urllib
21try:
22 from PIL import Image, ImageChops
23except ImportError:
24 raise ImportError('Requires PIL to be installed; see '
25 + 'http://www.pythonware.com/products/pil/')
26
commit-bot@chromium.org44546f82014-02-11 18:21:26 +000027# Set the PYTHONPATH to include the tools directory.
28sys.path.append(
29 os.path.join(
30 os.path.dirname(os.path.realpath(__file__)), os.pardir, os.pardir,
31 'tools'))
32import find_run_binary
33
commit-bot@chromium.org4d0f0082014-02-18 14:38:22 +000034SKPDIFF_BINARY = find_run_binary.find_path_to_program('skpdiff')
commit-bot@chromium.org44546f82014-02-11 18:21:26 +000035
rmistry@google.com5861e522013-12-21 19:07:40 +000036DEFAULT_IMAGE_SUFFIX = '.png'
37DEFAULT_IMAGES_SUBDIR = 'images'
epoger@google.com9dddf6f2013-11-08 16:25:25 +000038
commit-bot@chromium.org9985ef52014-02-10 18:19:30 +000039DISALLOWED_FILEPATH_CHAR_REGEX = re.compile('[^\w\-]')
40
epoger@google.com9dddf6f2013-11-08 16:25:25 +000041DIFFS_SUBDIR = 'diffs'
42WHITEDIFFS_SUBDIR = 'whitediffs'
43
epoger@google.com214a0242013-11-22 19:26:18 +000044VALUES_PER_BAND = 256
45
commit-bot@chromium.org16f41802014-02-26 19:05:20 +000046# Keys used within DiffRecord dictionary representations.
47# NOTE: Keep these in sync with static/constants.js
48KEY__DIFFERENCE_DATA__MAX_DIFF_PER_CHANNEL = 'maxDiffPerChannel'
49KEY__DIFFERENCE_DATA__NUM_DIFF_PIXELS = 'numDifferingPixels'
50KEY__DIFFERENCE_DATA__PERCENT_DIFF_PIXELS = 'percentDifferingPixels'
51KEY__DIFFERENCE_DATA__PERCEPTUAL_DIFF = 'perceptualDifference'
52KEY__DIFFERENCE_DATA__WEIGHTED_DIFF = 'weightedDiffMeasure'
53
epoger@google.com9dddf6f2013-11-08 16:25:25 +000054
55class DiffRecord(object):
56 """ Record of differences between two images. """
57
58 def __init__(self, storage_root,
59 expected_image_url, expected_image_locator,
rmistry@google.com5861e522013-12-21 19:07:40 +000060 actual_image_url, actual_image_locator,
61 expected_images_subdir=DEFAULT_IMAGES_SUBDIR,
62 actual_images_subdir=DEFAULT_IMAGES_SUBDIR,
63 image_suffix=DEFAULT_IMAGE_SUFFIX):
epoger@google.com9dddf6f2013-11-08 16:25:25 +000064 """Download this pair of images (unless we already have them on local disk),
65 and prepare a DiffRecord for them.
66
67 TODO(epoger): Make this asynchronously download images, rather than blocking
68 until the images have been downloaded and processed.
69
70 Args:
71 storage_root: root directory on local disk within which we store all
72 images
73 expected_image_url: file or HTTP url from which we will download the
74 expected image
75 expected_image_locator: a unique ID string under which we will store the
76 expected image within storage_root (probably including a checksum to
77 guarantee uniqueness)
78 actual_image_url: file or HTTP url from which we will download the
79 actual image
80 actual_image_locator: a unique ID string under which we will store the
81 actual image within storage_root (probably including a checksum to
82 guarantee uniqueness)
rmistry@google.com5861e522013-12-21 19:07:40 +000083 expected_images_subdir: the subdirectory expected images are stored in.
84 actual_images_subdir: the subdirectory actual images are stored in.
85 image_suffix: the suffix of images.
epoger@google.com9dddf6f2013-11-08 16:25:25 +000086 """
commit-bot@chromium.org9985ef52014-02-10 18:19:30 +000087 expected_image_locator = _sanitize_locator(expected_image_locator)
88 actual_image_locator = _sanitize_locator(actual_image_locator)
89
epoger@google.com9dddf6f2013-11-08 16:25:25 +000090 # Download the expected/actual images, if we don't have them already.
rmistry@google.com5861e522013-12-21 19:07:40 +000091 # TODO(rmistry): Add a parameter that makes _download_and_open_image raise
92 # an exception if images are not found locally (instead of trying to
93 # download them).
epoger@google.com9dddf6f2013-11-08 16:25:25 +000094 expected_image = _download_and_open_image(
rmistry@google.com5861e522013-12-21 19:07:40 +000095 os.path.join(storage_root, expected_images_subdir,
96 str(expected_image_locator) + image_suffix),
epoger@google.com9dddf6f2013-11-08 16:25:25 +000097 expected_image_url)
98 actual_image = _download_and_open_image(
rmistry@google.com5861e522013-12-21 19:07:40 +000099 os.path.join(storage_root, actual_images_subdir,
100 str(actual_image_locator) + image_suffix),
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000101 actual_image_url)
102
epoger@google.com214a0242013-11-22 19:26:18 +0000103 # Generate the diff image (absolute diff at each pixel) and
104 # max_diff_per_channel.
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000105 diff_image = _generate_image_diff(actual_image, expected_image)
epoger@google.com214a0242013-11-22 19:26:18 +0000106 diff_histogram = diff_image.histogram()
107 (diff_width, diff_height) = diff_image.size
108 self._weighted_diff_measure = _calculate_weighted_diff_metric(
109 diff_histogram, diff_width * diff_height)
110 self._max_diff_per_channel = _max_per_band(diff_histogram)
111
112 # Generate the whitediff image (any differing pixels show as white).
113 # This is tricky, because when you convert color images to grayscale or
114 # black & white in PIL, it has its own ideas about thresholds.
115 # We have to force it: if a pixel has any color at all, it's a '1'.
116 bands = diff_image.split()
117 graydiff_image = ImageChops.lighter(ImageChops.lighter(
118 bands[0], bands[1]), bands[2])
119 whitediff_image = (graydiff_image.point(lambda p: p > 0 and VALUES_PER_BAND)
120 .convert('1', dither=Image.NONE))
121
commit-bot@chromium.org44546f82014-02-11 18:21:26 +0000122 # Calculate the perceptual difference percentage.
123 skpdiff_csv_dir = tempfile.mkdtemp()
124 try:
125 skpdiff_csv_output = os.path.join(skpdiff_csv_dir, 'skpdiff-output.csv')
commit-bot@chromium.org44546f82014-02-11 18:21:26 +0000126 expected_img = os.path.join(storage_root, expected_images_subdir,
127 str(expected_image_locator) + image_suffix)
128 actual_img = os.path.join(storage_root, actual_images_subdir,
129 str(actual_image_locator) + image_suffix)
130 find_run_binary.run_command(
commit-bot@chromium.org4d0f0082014-02-18 14:38:22 +0000131 [SKPDIFF_BINARY, '-p', expected_img, actual_img,
commit-bot@chromium.org44546f82014-02-11 18:21:26 +0000132 '--csv', skpdiff_csv_output, '-d', 'perceptual'])
133 with contextlib.closing(open(skpdiff_csv_output)) as csv_file:
134 for row in csv.DictReader(csv_file):
135 perceptual_similarity = float(row[' perceptual'].strip())
136 if not 0 <= perceptual_similarity <= 1:
137 # skpdiff outputs -1 if the images are different sizes. Treat any
138 # output that does not lie in [0, 1] as having 0% perceptual
139 # similarity.
140 perceptual_similarity = 0
141 # skpdiff returns the perceptual similarity, convert it to get the
142 # perceptual difference percentage.
143 self._perceptual_difference = 100 - (perceptual_similarity * 100)
144 finally:
145 shutil.rmtree(skpdiff_csv_dir)
146
epoger@google.com214a0242013-11-22 19:26:18 +0000147 # Final touches on diff_image: use whitediff_image as an alpha mask.
148 # Unchanged pixels are transparent; differing pixels are opaque.
149 diff_image.putalpha(whitediff_image)
150
151 # Store the diff and whitediff images generated above.
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000152 diff_image_locator = _get_difference_locator(
153 expected_image_locator=expected_image_locator,
154 actual_image_locator=actual_image_locator)
rmistry@google.com5861e522013-12-21 19:07:40 +0000155 basename = str(diff_image_locator) + image_suffix
epoger@google.com214a0242013-11-22 19:26:18 +0000156 _save_image(diff_image, os.path.join(
157 storage_root, DIFFS_SUBDIR, basename))
158 _save_image(whitediff_image, os.path.join(
159 storage_root, WHITEDIFFS_SUBDIR, basename))
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000160
161 # Calculate difference metrics.
162 (self._width, self._height) = diff_image.size
epoger@google.com214a0242013-11-22 19:26:18 +0000163 self._num_pixels_differing = (
164 whitediff_image.histogram()[VALUES_PER_BAND - 1])
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000165
166 def get_num_pixels_differing(self):
167 """Returns the absolute number of pixels that differ."""
168 return self._num_pixels_differing
169
170 def get_percent_pixels_differing(self):
171 """Returns the percentage of pixels that differ, as a float between
172 0 and 100 (inclusive)."""
173 return ((float(self._num_pixels_differing) * 100) /
174 (self._width * self._height))
175
commit-bot@chromium.org44546f82014-02-11 18:21:26 +0000176 def get_perceptual_difference(self):
177 """Returns the perceptual difference percentage."""
178 return self._perceptual_difference
179
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000180 def get_weighted_diff_measure(self):
181 """Returns a weighted measure of image diffs, as a float between 0 and 100
commit-bot@chromium.org4d0f0082014-02-18 14:38:22 +0000182 (inclusive).
183
184 TODO(epoger): Delete this function, now that we have perceptual diff?
185 """
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000186 return self._weighted_diff_measure
187
epoger@google.com214a0242013-11-22 19:26:18 +0000188 def get_max_diff_per_channel(self):
189 """Returns the maximum difference between the expected and actual images
190 for each R/G/B channel, as a list."""
191 return self._max_diff_per_channel
192
commit-bot@chromium.org9985ef52014-02-10 18:19:30 +0000193 def as_dict(self):
194 """Returns a dictionary representation of this DiffRecord, as needed when
195 constructing the JSON representation."""
196 return {
commit-bot@chromium.org16f41802014-02-26 19:05:20 +0000197 KEY__DIFFERENCE_DATA__NUM_DIFF_PIXELS: self._num_pixels_differing,
198 KEY__DIFFERENCE_DATA__PERCENT_DIFF_PIXELS:
199 self.get_percent_pixels_differing(),
200 KEY__DIFFERENCE_DATA__WEIGHTED_DIFF: self.get_weighted_diff_measure(),
201 KEY__DIFFERENCE_DATA__MAX_DIFF_PER_CHANNEL: self._max_diff_per_channel,
202 KEY__DIFFERENCE_DATA__PERCEPTUAL_DIFF: self._perceptual_difference,
commit-bot@chromium.org9985ef52014-02-10 18:19:30 +0000203 }
204
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000205
206class ImageDiffDB(object):
207 """ Calculates differences between image pairs, maintaining a database of
208 them for download."""
209
210 def __init__(self, storage_root):
211 """
212 Args:
213 storage_root: string; root path within the DB will store all of its stuff
214 """
215 self._storage_root = storage_root
216
217 # Dictionary of DiffRecords, keyed by (expected_image_locator,
218 # actual_image_locator) tuples.
219 self._diff_dict = {}
220
221 def add_image_pair(self,
222 expected_image_url, expected_image_locator,
223 actual_image_url, actual_image_locator):
224 """Download this pair of images (unless we already have them on local disk),
225 and prepare a DiffRecord for them.
226
227 TODO(epoger): Make this asynchronously download images, rather than blocking
228 until the images have been downloaded and processed.
229 When we do that, we should probably add a new method that will block
230 until all of the images have been downloaded and processed. Otherwise,
231 we won't know when it's safe to start calling get_diff_record().
232 jcgregorio notes: maybe just make ImageDiffDB thread-safe and create a
233 thread-pool/worker queue at a higher level that just uses ImageDiffDB?
234
235 Args:
236 expected_image_url: file or HTTP url from which we will download the
237 expected image
238 expected_image_locator: a unique ID string under which we will store the
239 expected image within storage_root (probably including a checksum to
240 guarantee uniqueness)
241 actual_image_url: file or HTTP url from which we will download the
242 actual image
243 actual_image_locator: a unique ID string under which we will store the
244 actual image within storage_root (probably including a checksum to
245 guarantee uniqueness)
246 """
commit-bot@chromium.org9985ef52014-02-10 18:19:30 +0000247 expected_image_locator = _sanitize_locator(expected_image_locator)
248 actual_image_locator = _sanitize_locator(actual_image_locator)
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000249 key = (expected_image_locator, actual_image_locator)
250 if not key in self._diff_dict:
251 try:
252 new_diff_record = DiffRecord(
253 self._storage_root,
254 expected_image_url=expected_image_url,
255 expected_image_locator=expected_image_locator,
256 actual_image_url=actual_image_url,
257 actual_image_locator=actual_image_locator)
commit-bot@chromium.orga47e7ac2013-12-19 20:01:34 +0000258 except Exception:
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000259 logging.exception('got exception while creating new DiffRecord')
260 return
261 self._diff_dict[key] = new_diff_record
262
263 def get_diff_record(self, expected_image_locator, actual_image_locator):
264 """Returns the DiffRecord for this image pair.
265
266 Raises a KeyError if we don't have a DiffRecord for this image pair.
267 """
commit-bot@chromium.org9985ef52014-02-10 18:19:30 +0000268 key = (_sanitize_locator(expected_image_locator),
269 _sanitize_locator(actual_image_locator))
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000270 return self._diff_dict[key]
271
272
273# Utility functions
274
epoger@google.com214a0242013-11-22 19:26:18 +0000275def _calculate_weighted_diff_metric(histogram, num_pixels):
276 """Given the histogram of a diff image (per-channel diff at each
277 pixel between two images), calculate the weighted diff metric (a
278 stab at how different the two images really are).
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000279
commit-bot@chromium.org4d0f0082014-02-18 14:38:22 +0000280 TODO(epoger): Delete this function, now that we have perceptual diff?
281
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000282 Args:
epoger@google.com214a0242013-11-22 19:26:18 +0000283 histogram: PIL histogram of a per-channel diff between two images
284 num_pixels: integer; the total number of pixels in the diff image
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000285
286 Returns: a weighted diff metric, as a float between 0 and 100 (inclusive).
287 """
epoger@google.com214a0242013-11-22 19:26:18 +0000288 # TODO(epoger): As a wild guess at an appropriate metric, weight each
289 # different pixel by the square of its delta value. (The more different
290 # a pixel is from its expectation, the more we care about it.)
epoger@google.com214a0242013-11-22 19:26:18 +0000291 assert(len(histogram) % VALUES_PER_BAND == 0)
292 num_bands = len(histogram) / VALUES_PER_BAND
293 max_diff = num_pixels * num_bands * (VALUES_PER_BAND - 1)**2
294 total_diff = 0
295 for index in xrange(len(histogram)):
296 total_diff += histogram[index] * (index % VALUES_PER_BAND)**2
297 return float(100 * total_diff) / max_diff
298
commit-bot@chromium.org16f41802014-02-26 19:05:20 +0000299
epoger@google.com214a0242013-11-22 19:26:18 +0000300def _max_per_band(histogram):
301 """Given the histogram of an image, return the maximum value of each band
302 (a.k.a. "color channel", such as R/G/B) across the entire image.
303
304 Args:
305 histogram: PIL histogram
306
307 Returns the maximum value of each band within the image histogram, as a list.
308 """
309 max_per_band = []
310 assert(len(histogram) % VALUES_PER_BAND == 0)
311 num_bands = len(histogram) / VALUES_PER_BAND
312 for band in xrange(num_bands):
313 # Assuming that VALUES_PER_BAND is 256...
314 # the 'R' band makes up indices 0-255 in the histogram,
315 # the 'G' band makes up indices 256-511 in the histogram,
316 # etc.
317 min_index = band * VALUES_PER_BAND
318 index = min_index + VALUES_PER_BAND
319 while index > min_index:
320 index -= 1
321 if histogram[index] > 0:
322 max_per_band.append(index - min_index)
323 break
324 return max_per_band
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000325
commit-bot@chromium.org16f41802014-02-26 19:05:20 +0000326
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000327def _generate_image_diff(image1, image2):
328 """Wrapper for ImageChops.difference(image1, image2) that will handle some
329 errors automatically, or at least yield more useful error messages.
330
331 TODO(epoger): Currently, some of the images generated by the bots are RGBA
332 and others are RGB. I'm not sure why that is. For now, to avoid confusion
333 within the UI, convert all to RGB when diffing.
334
335 Args:
336 image1: a PIL image object
337 image2: a PIL image object
338
339 Returns: per-pixel diffs between image1 and image2, as a PIL image object
340 """
341 try:
342 return ImageChops.difference(image1.convert('RGB'), image2.convert('RGB'))
343 except ValueError:
344 logging.error('Error diffing image1 [%s] and image2 [%s].' % (
345 repr(image1), repr(image2)))
346 raise
347
commit-bot@chromium.org16f41802014-02-26 19:05:20 +0000348
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000349def _download_and_open_image(local_filepath, url):
350 """Open the image at local_filepath; if there is no file at that path,
351 download it from url to that path and then open it.
352
353 Args:
354 local_filepath: path on local disk where the image should be stored
355 url: URL from which we can download the image if we don't have it yet
356
357 Returns: a PIL image object
358 """
359 if not os.path.exists(local_filepath):
360 _mkdir_unless_exists(os.path.dirname(local_filepath))
361 with contextlib.closing(urllib.urlopen(url)) as url_handle:
362 with open(local_filepath, 'wb') as file_handle:
363 shutil.copyfileobj(fsrc=url_handle, fdst=file_handle)
364 return _open_image(local_filepath)
365
commit-bot@chromium.org16f41802014-02-26 19:05:20 +0000366
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000367def _open_image(filepath):
368 """Wrapper for Image.open(filepath) that yields more useful error messages.
369
370 Args:
371 filepath: path on local disk to load image from
372
373 Returns: a PIL image object
374 """
375 try:
376 return Image.open(filepath)
377 except IOError:
378 logging.error('IOError loading image file %s' % filepath)
379 raise
380
commit-bot@chromium.org16f41802014-02-26 19:05:20 +0000381
epoger@google.com214a0242013-11-22 19:26:18 +0000382def _save_image(image, filepath, format='PNG'):
383 """Write an image to disk, creating any intermediate directories as needed.
384
385 Args:
386 image: a PIL image object
387 filepath: path on local disk to write image to
388 format: one of the PIL image formats, listed at
389 http://effbot.org/imagingbook/formats.htm
390 """
391 _mkdir_unless_exists(os.path.dirname(filepath))
392 image.save(filepath, format)
393
commit-bot@chromium.org16f41802014-02-26 19:05:20 +0000394
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000395def _mkdir_unless_exists(path):
396 """Unless path refers to an already-existing directory, create it.
397
398 Args:
399 path: path on local disk
400 """
401 if not os.path.isdir(path):
402 os.makedirs(path)
403
commit-bot@chromium.org16f41802014-02-26 19:05:20 +0000404
commit-bot@chromium.org9985ef52014-02-10 18:19:30 +0000405def _sanitize_locator(locator):
406 """Returns a sanitized version of a locator (one in which we know none of the
407 characters will have special meaning in filenames).
408
409 Args:
410 locator: string, or something that can be represented as a string
411 """
412 return DISALLOWED_FILEPATH_CHAR_REGEX.sub('_', str(locator))
413
commit-bot@chromium.org16f41802014-02-26 19:05:20 +0000414
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000415def _get_difference_locator(expected_image_locator, actual_image_locator):
416 """Returns the locator string used to look up the diffs between expected_image
417 and actual_image.
418
commit-bot@chromium.org16f41802014-02-26 19:05:20 +0000419 We must keep this function in sync with getImageDiffRelativeUrl() in
420 static/loader.js
421
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000422 Args:
423 expected_image_locator: locator string pointing at expected image
424 actual_image_locator: locator string pointing at actual image
425
commit-bot@chromium.org9985ef52014-02-10 18:19:30 +0000426 Returns: already-sanitized locator where the diffs between expected and
427 actual images can be found
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000428 """
commit-bot@chromium.org9985ef52014-02-10 18:19:30 +0000429 return "%s-vs-%s" % (_sanitize_locator(expected_image_locator),
430 _sanitize_locator(actual_image_locator))