blob: 3b1eb3ebc032a804b444c453dd28f9ed4cc06307 [file] [log] [blame]
epoger@google.com9dddf6f2013-11-08 16:25:25 +00001#!/usr/bin/python
2
3"""
4Copyright 2013 Google Inc.
5
6Use of this source code is governed by a BSD-style license that can be
7found in the LICENSE file.
8
9Calulate differences between image pairs, and store them in a database.
10"""
11
12import contextlib
commit-bot@chromium.org44546f82014-02-11 18:21:26 +000013import csv
epoger@google.com9dddf6f2013-11-08 16:25:25 +000014import logging
15import os
commit-bot@chromium.org9985ef52014-02-10 18:19:30 +000016import re
epoger@google.com9dddf6f2013-11-08 16:25:25 +000017import shutil
commit-bot@chromium.org44546f82014-02-11 18:21:26 +000018import sys
19import tempfile
epoger@google.com9dddf6f2013-11-08 16:25:25 +000020import urllib
21try:
22 from PIL import Image, ImageChops
23except ImportError:
24 raise ImportError('Requires PIL to be installed; see '
25 + 'http://www.pythonware.com/products/pil/')
26
commit-bot@chromium.org44546f82014-02-11 18:21:26 +000027# Set the PYTHONPATH to include the tools directory.
28sys.path.append(
29 os.path.join(
30 os.path.dirname(os.path.realpath(__file__)), os.pardir, os.pardir,
31 'tools'))
32import find_run_binary
33
commit-bot@chromium.org4d0f0082014-02-18 14:38:22 +000034SKPDIFF_BINARY = find_run_binary.find_path_to_program('skpdiff')
commit-bot@chromium.org44546f82014-02-11 18:21:26 +000035
rmistry@google.com5861e522013-12-21 19:07:40 +000036DEFAULT_IMAGE_SUFFIX = '.png'
37DEFAULT_IMAGES_SUBDIR = 'images'
epoger@google.com9dddf6f2013-11-08 16:25:25 +000038
commit-bot@chromium.org9985ef52014-02-10 18:19:30 +000039DISALLOWED_FILEPATH_CHAR_REGEX = re.compile('[^\w\-]')
40
epoger@google.com9dddf6f2013-11-08 16:25:25 +000041DIFFS_SUBDIR = 'diffs'
42WHITEDIFFS_SUBDIR = 'whitediffs'
43
epoger@google.com214a0242013-11-22 19:26:18 +000044VALUES_PER_BAND = 256
45
commit-bot@chromium.org16f41802014-02-26 19:05:20 +000046# Keys used within DiffRecord dictionary representations.
47# NOTE: Keep these in sync with static/constants.js
48KEY__DIFFERENCE_DATA__MAX_DIFF_PER_CHANNEL = 'maxDiffPerChannel'
49KEY__DIFFERENCE_DATA__NUM_DIFF_PIXELS = 'numDifferingPixels'
50KEY__DIFFERENCE_DATA__PERCENT_DIFF_PIXELS = 'percentDifferingPixels'
51KEY__DIFFERENCE_DATA__PERCEPTUAL_DIFF = 'perceptualDifference'
52KEY__DIFFERENCE_DATA__WEIGHTED_DIFF = 'weightedDiffMeasure'
53
epoger@google.com9dddf6f2013-11-08 16:25:25 +000054
55class DiffRecord(object):
56 """ Record of differences between two images. """
57
58 def __init__(self, storage_root,
59 expected_image_url, expected_image_locator,
rmistry@google.com5861e522013-12-21 19:07:40 +000060 actual_image_url, actual_image_locator,
61 expected_images_subdir=DEFAULT_IMAGES_SUBDIR,
62 actual_images_subdir=DEFAULT_IMAGES_SUBDIR,
63 image_suffix=DEFAULT_IMAGE_SUFFIX):
epoger@google.com9dddf6f2013-11-08 16:25:25 +000064 """Download this pair of images (unless we already have them on local disk),
65 and prepare a DiffRecord for them.
66
commit-bot@chromium.orgc9b511f2014-04-15 18:50:12 +000067 TODO(epoger): Make this asynchronously download images, rather than blocking
68 until the images have been downloaded and processed.
69
epoger@google.com9dddf6f2013-11-08 16:25:25 +000070 Args:
71 storage_root: root directory on local disk within which we store all
72 images
73 expected_image_url: file or HTTP url from which we will download the
74 expected image
75 expected_image_locator: a unique ID string under which we will store the
76 expected image within storage_root (probably including a checksum to
77 guarantee uniqueness)
78 actual_image_url: file or HTTP url from which we will download the
79 actual image
80 actual_image_locator: a unique ID string under which we will store the
81 actual image within storage_root (probably including a checksum to
82 guarantee uniqueness)
rmistry@google.com5861e522013-12-21 19:07:40 +000083 expected_images_subdir: the subdirectory expected images are stored in.
84 actual_images_subdir: the subdirectory actual images are stored in.
85 image_suffix: the suffix of images.
epoger@google.com9dddf6f2013-11-08 16:25:25 +000086 """
commit-bot@chromium.org9985ef52014-02-10 18:19:30 +000087 expected_image_locator = _sanitize_locator(expected_image_locator)
88 actual_image_locator = _sanitize_locator(actual_image_locator)
89
epoger@google.com9dddf6f2013-11-08 16:25:25 +000090 # Download the expected/actual images, if we don't have them already.
rmistry@google.com5861e522013-12-21 19:07:40 +000091 # TODO(rmistry): Add a parameter that makes _download_and_open_image raise
92 # an exception if images are not found locally (instead of trying to
93 # download them).
commit-bot@chromium.org8cc39a62014-03-04 16:46:22 +000094 expected_image_file = os.path.join(
95 storage_root, expected_images_subdir,
96 str(expected_image_locator) + image_suffix)
97 actual_image_file = os.path.join(
98 storage_root, actual_images_subdir,
99 str(actual_image_locator) + image_suffix)
100 try:
101 expected_image = _download_and_open_image(
102 expected_image_file, expected_image_url)
103 except Exception:
104 logging.exception('unable to download expected_image_url %s to file %s' %
105 (expected_image_url, expected_image_file))
106 raise
107 try:
108 actual_image = _download_and_open_image(
109 actual_image_file, actual_image_url)
110 except Exception:
111 logging.exception('unable to download actual_image_url %s to file %s' %
112 (actual_image_url, actual_image_file))
113 raise
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000114
epoger@google.com214a0242013-11-22 19:26:18 +0000115 # Generate the diff image (absolute diff at each pixel) and
116 # max_diff_per_channel.
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000117 diff_image = _generate_image_diff(actual_image, expected_image)
epoger@google.com214a0242013-11-22 19:26:18 +0000118 diff_histogram = diff_image.histogram()
119 (diff_width, diff_height) = diff_image.size
120 self._weighted_diff_measure = _calculate_weighted_diff_metric(
121 diff_histogram, diff_width * diff_height)
122 self._max_diff_per_channel = _max_per_band(diff_histogram)
123
124 # Generate the whitediff image (any differing pixels show as white).
125 # This is tricky, because when you convert color images to grayscale or
126 # black & white in PIL, it has its own ideas about thresholds.
127 # We have to force it: if a pixel has any color at all, it's a '1'.
128 bands = diff_image.split()
129 graydiff_image = ImageChops.lighter(ImageChops.lighter(
130 bands[0], bands[1]), bands[2])
131 whitediff_image = (graydiff_image.point(lambda p: p > 0 and VALUES_PER_BAND)
132 .convert('1', dither=Image.NONE))
133
commit-bot@chromium.org44546f82014-02-11 18:21:26 +0000134 # Calculate the perceptual difference percentage.
135 skpdiff_csv_dir = tempfile.mkdtemp()
136 try:
137 skpdiff_csv_output = os.path.join(skpdiff_csv_dir, 'skpdiff-output.csv')
commit-bot@chromium.org44546f82014-02-11 18:21:26 +0000138 expected_img = os.path.join(storage_root, expected_images_subdir,
139 str(expected_image_locator) + image_suffix)
140 actual_img = os.path.join(storage_root, actual_images_subdir,
141 str(actual_image_locator) + image_suffix)
142 find_run_binary.run_command(
commit-bot@chromium.org4d0f0082014-02-18 14:38:22 +0000143 [SKPDIFF_BINARY, '-p', expected_img, actual_img,
commit-bot@chromium.org44546f82014-02-11 18:21:26 +0000144 '--csv', skpdiff_csv_output, '-d', 'perceptual'])
145 with contextlib.closing(open(skpdiff_csv_output)) as csv_file:
146 for row in csv.DictReader(csv_file):
147 perceptual_similarity = float(row[' perceptual'].strip())
148 if not 0 <= perceptual_similarity <= 1:
149 # skpdiff outputs -1 if the images are different sizes. Treat any
150 # output that does not lie in [0, 1] as having 0% perceptual
151 # similarity.
152 perceptual_similarity = 0
153 # skpdiff returns the perceptual similarity, convert it to get the
154 # perceptual difference percentage.
155 self._perceptual_difference = 100 - (perceptual_similarity * 100)
156 finally:
157 shutil.rmtree(skpdiff_csv_dir)
158
epoger@google.com214a0242013-11-22 19:26:18 +0000159 # Final touches on diff_image: use whitediff_image as an alpha mask.
160 # Unchanged pixels are transparent; differing pixels are opaque.
161 diff_image.putalpha(whitediff_image)
162
163 # Store the diff and whitediff images generated above.
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000164 diff_image_locator = _get_difference_locator(
165 expected_image_locator=expected_image_locator,
166 actual_image_locator=actual_image_locator)
rmistry@google.com5861e522013-12-21 19:07:40 +0000167 basename = str(diff_image_locator) + image_suffix
epoger@google.com214a0242013-11-22 19:26:18 +0000168 _save_image(diff_image, os.path.join(
169 storage_root, DIFFS_SUBDIR, basename))
170 _save_image(whitediff_image, os.path.join(
171 storage_root, WHITEDIFFS_SUBDIR, basename))
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000172
173 # Calculate difference metrics.
174 (self._width, self._height) = diff_image.size
epoger@google.com214a0242013-11-22 19:26:18 +0000175 self._num_pixels_differing = (
176 whitediff_image.histogram()[VALUES_PER_BAND - 1])
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000177
178 def get_num_pixels_differing(self):
179 """Returns the absolute number of pixels that differ."""
180 return self._num_pixels_differing
181
182 def get_percent_pixels_differing(self):
183 """Returns the percentage of pixels that differ, as a float between
184 0 and 100 (inclusive)."""
185 return ((float(self._num_pixels_differing) * 100) /
186 (self._width * self._height))
187
commit-bot@chromium.org44546f82014-02-11 18:21:26 +0000188 def get_perceptual_difference(self):
189 """Returns the perceptual difference percentage."""
190 return self._perceptual_difference
191
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000192 def get_weighted_diff_measure(self):
193 """Returns a weighted measure of image diffs, as a float between 0 and 100
commit-bot@chromium.org4d0f0082014-02-18 14:38:22 +0000194 (inclusive).
195
196 TODO(epoger): Delete this function, now that we have perceptual diff?
197 """
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000198 return self._weighted_diff_measure
199
epoger@google.com214a0242013-11-22 19:26:18 +0000200 def get_max_diff_per_channel(self):
201 """Returns the maximum difference between the expected and actual images
202 for each R/G/B channel, as a list."""
203 return self._max_diff_per_channel
204
commit-bot@chromium.org9985ef52014-02-10 18:19:30 +0000205 def as_dict(self):
206 """Returns a dictionary representation of this DiffRecord, as needed when
207 constructing the JSON representation."""
208 return {
commit-bot@chromium.org16f41802014-02-26 19:05:20 +0000209 KEY__DIFFERENCE_DATA__NUM_DIFF_PIXELS: self._num_pixels_differing,
210 KEY__DIFFERENCE_DATA__PERCENT_DIFF_PIXELS:
211 self.get_percent_pixels_differing(),
212 KEY__DIFFERENCE_DATA__WEIGHTED_DIFF: self.get_weighted_diff_measure(),
213 KEY__DIFFERENCE_DATA__MAX_DIFF_PER_CHANNEL: self._max_diff_per_channel,
214 KEY__DIFFERENCE_DATA__PERCEPTUAL_DIFF: self._perceptual_difference,
commit-bot@chromium.org9985ef52014-02-10 18:19:30 +0000215 }
216
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000217
218class ImageDiffDB(object):
219 """ Calculates differences between image pairs, maintaining a database of
220 them for download."""
221
commit-bot@chromium.orgc9b511f2014-04-15 18:50:12 +0000222 def __init__(self, storage_root):
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000223 """
224 Args:
225 storage_root: string; root path within the DB will store all of its stuff
226 """
227 self._storage_root = storage_root
228
229 # Dictionary of DiffRecords, keyed by (expected_image_locator,
230 # actual_image_locator) tuples.
231 self._diff_dict = {}
232
233 def add_image_pair(self,
234 expected_image_url, expected_image_locator,
235 actual_image_url, actual_image_locator):
236 """Download this pair of images (unless we already have them on local disk),
237 and prepare a DiffRecord for them.
238
commit-bot@chromium.orgc9b511f2014-04-15 18:50:12 +0000239 TODO(epoger): Make this asynchronously download images, rather than blocking
240 until the images have been downloaded and processed.
241 When we do that, we should probably add a new method that will block
242 until all of the images have been downloaded and processed. Otherwise,
243 we won't know when it's safe to start calling get_diff_record().
244 jcgregorio notes: maybe just make ImageDiffDB thread-safe and create a
245 thread-pool/worker queue at a higher level that just uses ImageDiffDB?
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000246
247 Args:
248 expected_image_url: file or HTTP url from which we will download the
249 expected image
250 expected_image_locator: a unique ID string under which we will store the
251 expected image within storage_root (probably including a checksum to
252 guarantee uniqueness)
253 actual_image_url: file or HTTP url from which we will download the
254 actual image
255 actual_image_locator: a unique ID string under which we will store the
256 actual image within storage_root (probably including a checksum to
257 guarantee uniqueness)
258 """
commit-bot@chromium.orgc9b511f2014-04-15 18:50:12 +0000259 expected_image_locator = _sanitize_locator(expected_image_locator)
260 actual_image_locator = _sanitize_locator(actual_image_locator)
261 key = (expected_image_locator, actual_image_locator)
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000262 if not key in self._diff_dict:
263 try:
264 new_diff_record = DiffRecord(
265 self._storage_root,
266 expected_image_url=expected_image_url,
267 expected_image_locator=expected_image_locator,
268 actual_image_url=actual_image_url,
269 actual_image_locator=actual_image_locator)
commit-bot@chromium.orga47e7ac2013-12-19 20:01:34 +0000270 except Exception:
commit-bot@chromium.org68449582014-04-01 22:16:33 +0000271 # If we can't create a real DiffRecord for this (expected, actual) pair,
272 # store None and the UI will show whatever information we DO have.
273 # Fixes http://skbug.com/2368 .
274 logging.exception(
275 'got exception while creating a DiffRecord for '
276 'expected_image_url=%s , actual_image_url=%s; returning None' % (
277 expected_image_url, actual_image_url))
278 new_diff_record = None
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000279 self._diff_dict[key] = new_diff_record
280
281 def get_diff_record(self, expected_image_locator, actual_image_locator):
282 """Returns the DiffRecord for this image pair.
283
commit-bot@chromium.orgc9b511f2014-04-15 18:50:12 +0000284 Raises a KeyError if we don't have a DiffRecord for this image pair.
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000285 """
commit-bot@chromium.orgc9b511f2014-04-15 18:50:12 +0000286 key = (_sanitize_locator(expected_image_locator),
287 _sanitize_locator(actual_image_locator))
288 return self._diff_dict[key]
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000289
290
291# Utility functions
292
epoger@google.com214a0242013-11-22 19:26:18 +0000293def _calculate_weighted_diff_metric(histogram, num_pixels):
294 """Given the histogram of a diff image (per-channel diff at each
295 pixel between two images), calculate the weighted diff metric (a
296 stab at how different the two images really are).
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000297
commit-bot@chromium.org4d0f0082014-02-18 14:38:22 +0000298 TODO(epoger): Delete this function, now that we have perceptual diff?
299
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000300 Args:
epoger@google.com214a0242013-11-22 19:26:18 +0000301 histogram: PIL histogram of a per-channel diff between two images
302 num_pixels: integer; the total number of pixels in the diff image
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000303
304 Returns: a weighted diff metric, as a float between 0 and 100 (inclusive).
305 """
epoger@google.com214a0242013-11-22 19:26:18 +0000306 # TODO(epoger): As a wild guess at an appropriate metric, weight each
307 # different pixel by the square of its delta value. (The more different
308 # a pixel is from its expectation, the more we care about it.)
epoger@google.com214a0242013-11-22 19:26:18 +0000309 assert(len(histogram) % VALUES_PER_BAND == 0)
310 num_bands = len(histogram) / VALUES_PER_BAND
311 max_diff = num_pixels * num_bands * (VALUES_PER_BAND - 1)**2
312 total_diff = 0
313 for index in xrange(len(histogram)):
314 total_diff += histogram[index] * (index % VALUES_PER_BAND)**2
315 return float(100 * total_diff) / max_diff
316
commit-bot@chromium.org16f41802014-02-26 19:05:20 +0000317
epoger@google.com214a0242013-11-22 19:26:18 +0000318def _max_per_band(histogram):
319 """Given the histogram of an image, return the maximum value of each band
320 (a.k.a. "color channel", such as R/G/B) across the entire image.
321
322 Args:
323 histogram: PIL histogram
324
325 Returns the maximum value of each band within the image histogram, as a list.
326 """
327 max_per_band = []
328 assert(len(histogram) % VALUES_PER_BAND == 0)
329 num_bands = len(histogram) / VALUES_PER_BAND
330 for band in xrange(num_bands):
331 # Assuming that VALUES_PER_BAND is 256...
332 # the 'R' band makes up indices 0-255 in the histogram,
333 # the 'G' band makes up indices 256-511 in the histogram,
334 # etc.
335 min_index = band * VALUES_PER_BAND
336 index = min_index + VALUES_PER_BAND
337 while index > min_index:
338 index -= 1
339 if histogram[index] > 0:
340 max_per_band.append(index - min_index)
341 break
342 return max_per_band
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000343
commit-bot@chromium.org16f41802014-02-26 19:05:20 +0000344
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000345def _generate_image_diff(image1, image2):
346 """Wrapper for ImageChops.difference(image1, image2) that will handle some
347 errors automatically, or at least yield more useful error messages.
348
349 TODO(epoger): Currently, some of the images generated by the bots are RGBA
350 and others are RGB. I'm not sure why that is. For now, to avoid confusion
351 within the UI, convert all to RGB when diffing.
352
353 Args:
354 image1: a PIL image object
355 image2: a PIL image object
356
357 Returns: per-pixel diffs between image1 and image2, as a PIL image object
358 """
359 try:
360 return ImageChops.difference(image1.convert('RGB'), image2.convert('RGB'))
361 except ValueError:
362 logging.error('Error diffing image1 [%s] and image2 [%s].' % (
363 repr(image1), repr(image2)))
364 raise
365
commit-bot@chromium.org16f41802014-02-26 19:05:20 +0000366
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000367def _download_and_open_image(local_filepath, url):
368 """Open the image at local_filepath; if there is no file at that path,
369 download it from url to that path and then open it.
370
371 Args:
372 local_filepath: path on local disk where the image should be stored
373 url: URL from which we can download the image if we don't have it yet
374
375 Returns: a PIL image object
376 """
377 if not os.path.exists(local_filepath):
378 _mkdir_unless_exists(os.path.dirname(local_filepath))
379 with contextlib.closing(urllib.urlopen(url)) as url_handle:
commit-bot@chromium.orgc9b511f2014-04-15 18:50:12 +0000380 with open(local_filepath, 'wb') as file_handle:
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000381 shutil.copyfileobj(fsrc=url_handle, fdst=file_handle)
382 return _open_image(local_filepath)
383
commit-bot@chromium.org16f41802014-02-26 19:05:20 +0000384
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000385def _open_image(filepath):
386 """Wrapper for Image.open(filepath) that yields more useful error messages.
387
388 Args:
389 filepath: path on local disk to load image from
390
391 Returns: a PIL image object
392 """
393 try:
394 return Image.open(filepath)
395 except IOError:
commit-bot@chromium.orgda0ceb22014-03-26 13:38:29 +0000396 # If we are unable to load an image from the file, delete it from disk
397 # and we will try to fetch it again next time. Fixes http://skbug.com/2247
398 logging.error('IOError loading image file %s ; deleting it.' % filepath)
399 os.remove(filepath)
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000400 raise
401
commit-bot@chromium.org16f41802014-02-26 19:05:20 +0000402
epoger@google.com214a0242013-11-22 19:26:18 +0000403def _save_image(image, filepath, format='PNG'):
404 """Write an image to disk, creating any intermediate directories as needed.
405
406 Args:
407 image: a PIL image object
408 filepath: path on local disk to write image to
409 format: one of the PIL image formats, listed at
410 http://effbot.org/imagingbook/formats.htm
411 """
412 _mkdir_unless_exists(os.path.dirname(filepath))
413 image.save(filepath, format)
414
commit-bot@chromium.org16f41802014-02-26 19:05:20 +0000415
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000416def _mkdir_unless_exists(path):
417 """Unless path refers to an already-existing directory, create it.
418
419 Args:
420 path: path on local disk
421 """
commit-bot@chromium.orgc9b511f2014-04-15 18:50:12 +0000422 if not os.path.isdir(path):
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000423 os.makedirs(path)
424
commit-bot@chromium.org16f41802014-02-26 19:05:20 +0000425
commit-bot@chromium.org9985ef52014-02-10 18:19:30 +0000426def _sanitize_locator(locator):
427 """Returns a sanitized version of a locator (one in which we know none of the
428 characters will have special meaning in filenames).
429
430 Args:
431 locator: string, or something that can be represented as a string
432 """
433 return DISALLOWED_FILEPATH_CHAR_REGEX.sub('_', str(locator))
434
commit-bot@chromium.org16f41802014-02-26 19:05:20 +0000435
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000436def _get_difference_locator(expected_image_locator, actual_image_locator):
437 """Returns the locator string used to look up the diffs between expected_image
438 and actual_image.
439
commit-bot@chromium.org16f41802014-02-26 19:05:20 +0000440 We must keep this function in sync with getImageDiffRelativeUrl() in
441 static/loader.js
442
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000443 Args:
444 expected_image_locator: locator string pointing at expected image
445 actual_image_locator: locator string pointing at actual image
446
commit-bot@chromium.org9985ef52014-02-10 18:19:30 +0000447 Returns: already-sanitized locator where the diffs between expected and
448 actual images can be found
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000449 """
commit-bot@chromium.org9985ef52014-02-10 18:19:30 +0000450 return "%s-vs-%s" % (_sanitize_locator(expected_image_locator),
451 _sanitize_locator(actual_image_locator))