blob: 936301e1cdef393e736ad852283768482037a63c [file] [log] [blame]
epoger@google.com9dddf6f2013-11-08 16:25:25 +00001#!/usr/bin/python
2
3"""
4Copyright 2013 Google Inc.
5
6Use of this source code is governed by a BSD-style license that can be
7found in the LICENSE file.
8
9Calulate differences between image pairs, and store them in a database.
10"""
11
12import contextlib
13import logging
14import os
commit-bot@chromium.org9985ef52014-02-10 18:19:30 +000015import re
epoger@google.com9dddf6f2013-11-08 16:25:25 +000016import shutil
17import urllib
18try:
19 from PIL import Image, ImageChops
20except ImportError:
21 raise ImportError('Requires PIL to be installed; see '
22 + 'http://www.pythonware.com/products/pil/')
23
rmistry@google.com5861e522013-12-21 19:07:40 +000024DEFAULT_IMAGE_SUFFIX = '.png'
25DEFAULT_IMAGES_SUBDIR = 'images'
epoger@google.com9dddf6f2013-11-08 16:25:25 +000026
commit-bot@chromium.org9985ef52014-02-10 18:19:30 +000027DISALLOWED_FILEPATH_CHAR_REGEX = re.compile('[^\w\-]')
28
epoger@google.com9dddf6f2013-11-08 16:25:25 +000029DIFFS_SUBDIR = 'diffs'
30WHITEDIFFS_SUBDIR = 'whitediffs'
31
epoger@google.com214a0242013-11-22 19:26:18 +000032VALUES_PER_BAND = 256
33
epoger@google.com9dddf6f2013-11-08 16:25:25 +000034
35class DiffRecord(object):
36 """ Record of differences between two images. """
37
38 def __init__(self, storage_root,
39 expected_image_url, expected_image_locator,
rmistry@google.com5861e522013-12-21 19:07:40 +000040 actual_image_url, actual_image_locator,
41 expected_images_subdir=DEFAULT_IMAGES_SUBDIR,
42 actual_images_subdir=DEFAULT_IMAGES_SUBDIR,
43 image_suffix=DEFAULT_IMAGE_SUFFIX):
epoger@google.com9dddf6f2013-11-08 16:25:25 +000044 """Download this pair of images (unless we already have them on local disk),
45 and prepare a DiffRecord for them.
46
47 TODO(epoger): Make this asynchronously download images, rather than blocking
48 until the images have been downloaded and processed.
49
50 Args:
51 storage_root: root directory on local disk within which we store all
52 images
53 expected_image_url: file or HTTP url from which we will download the
54 expected image
55 expected_image_locator: a unique ID string under which we will store the
56 expected image within storage_root (probably including a checksum to
57 guarantee uniqueness)
58 actual_image_url: file or HTTP url from which we will download the
59 actual image
60 actual_image_locator: a unique ID string under which we will store the
61 actual image within storage_root (probably including a checksum to
62 guarantee uniqueness)
rmistry@google.com5861e522013-12-21 19:07:40 +000063 expected_images_subdir: the subdirectory expected images are stored in.
64 actual_images_subdir: the subdirectory actual images are stored in.
65 image_suffix: the suffix of images.
epoger@google.com9dddf6f2013-11-08 16:25:25 +000066 """
commit-bot@chromium.org9985ef52014-02-10 18:19:30 +000067 expected_image_locator = _sanitize_locator(expected_image_locator)
68 actual_image_locator = _sanitize_locator(actual_image_locator)
69
epoger@google.com9dddf6f2013-11-08 16:25:25 +000070 # Download the expected/actual images, if we don't have them already.
rmistry@google.com5861e522013-12-21 19:07:40 +000071 # TODO(rmistry): Add a parameter that makes _download_and_open_image raise
72 # an exception if images are not found locally (instead of trying to
73 # download them).
epoger@google.com9dddf6f2013-11-08 16:25:25 +000074 expected_image = _download_and_open_image(
rmistry@google.com5861e522013-12-21 19:07:40 +000075 os.path.join(storage_root, expected_images_subdir,
76 str(expected_image_locator) + image_suffix),
epoger@google.com9dddf6f2013-11-08 16:25:25 +000077 expected_image_url)
78 actual_image = _download_and_open_image(
rmistry@google.com5861e522013-12-21 19:07:40 +000079 os.path.join(storage_root, actual_images_subdir,
80 str(actual_image_locator) + image_suffix),
epoger@google.com9dddf6f2013-11-08 16:25:25 +000081 actual_image_url)
82
epoger@google.com214a0242013-11-22 19:26:18 +000083 # Generate the diff image (absolute diff at each pixel) and
84 # max_diff_per_channel.
epoger@google.com9dddf6f2013-11-08 16:25:25 +000085 diff_image = _generate_image_diff(actual_image, expected_image)
epoger@google.com214a0242013-11-22 19:26:18 +000086 diff_histogram = diff_image.histogram()
87 (diff_width, diff_height) = diff_image.size
88 self._weighted_diff_measure = _calculate_weighted_diff_metric(
89 diff_histogram, diff_width * diff_height)
90 self._max_diff_per_channel = _max_per_band(diff_histogram)
91
92 # Generate the whitediff image (any differing pixels show as white).
93 # This is tricky, because when you convert color images to grayscale or
94 # black & white in PIL, it has its own ideas about thresholds.
95 # We have to force it: if a pixel has any color at all, it's a '1'.
96 bands = diff_image.split()
97 graydiff_image = ImageChops.lighter(ImageChops.lighter(
98 bands[0], bands[1]), bands[2])
99 whitediff_image = (graydiff_image.point(lambda p: p > 0 and VALUES_PER_BAND)
100 .convert('1', dither=Image.NONE))
101
102 # Final touches on diff_image: use whitediff_image as an alpha mask.
103 # Unchanged pixels are transparent; differing pixels are opaque.
104 diff_image.putalpha(whitediff_image)
105
106 # Store the diff and whitediff images generated above.
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000107 diff_image_locator = _get_difference_locator(
108 expected_image_locator=expected_image_locator,
109 actual_image_locator=actual_image_locator)
rmistry@google.com5861e522013-12-21 19:07:40 +0000110 basename = str(diff_image_locator) + image_suffix
epoger@google.com214a0242013-11-22 19:26:18 +0000111 _save_image(diff_image, os.path.join(
112 storage_root, DIFFS_SUBDIR, basename))
113 _save_image(whitediff_image, os.path.join(
114 storage_root, WHITEDIFFS_SUBDIR, basename))
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000115
116 # Calculate difference metrics.
117 (self._width, self._height) = diff_image.size
epoger@google.com214a0242013-11-22 19:26:18 +0000118 self._num_pixels_differing = (
119 whitediff_image.histogram()[VALUES_PER_BAND - 1])
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000120
121 def get_num_pixels_differing(self):
122 """Returns the absolute number of pixels that differ."""
123 return self._num_pixels_differing
124
125 def get_percent_pixels_differing(self):
126 """Returns the percentage of pixels that differ, as a float between
127 0 and 100 (inclusive)."""
128 return ((float(self._num_pixels_differing) * 100) /
129 (self._width * self._height))
130
131 def get_weighted_diff_measure(self):
132 """Returns a weighted measure of image diffs, as a float between 0 and 100
133 (inclusive)."""
134 return self._weighted_diff_measure
135
epoger@google.com214a0242013-11-22 19:26:18 +0000136 def get_max_diff_per_channel(self):
137 """Returns the maximum difference between the expected and actual images
138 for each R/G/B channel, as a list."""
139 return self._max_diff_per_channel
140
commit-bot@chromium.org9985ef52014-02-10 18:19:30 +0000141 def as_dict(self):
142 """Returns a dictionary representation of this DiffRecord, as needed when
143 constructing the JSON representation."""
144 return {
145 'numDifferingPixels': self._num_pixels_differing,
146 'percentDifferingPixels': self.get_percent_pixels_differing(),
147 'weightedDiffMeasure': self.get_weighted_diff_measure(),
148 'maxDiffPerChannel': self._max_diff_per_channel,
149 }
150
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000151
152class ImageDiffDB(object):
153 """ Calculates differences between image pairs, maintaining a database of
154 them for download."""
155
156 def __init__(self, storage_root):
157 """
158 Args:
159 storage_root: string; root path within the DB will store all of its stuff
160 """
161 self._storage_root = storage_root
162
163 # Dictionary of DiffRecords, keyed by (expected_image_locator,
164 # actual_image_locator) tuples.
165 self._diff_dict = {}
166
167 def add_image_pair(self,
168 expected_image_url, expected_image_locator,
169 actual_image_url, actual_image_locator):
170 """Download this pair of images (unless we already have them on local disk),
171 and prepare a DiffRecord for them.
172
173 TODO(epoger): Make this asynchronously download images, rather than blocking
174 until the images have been downloaded and processed.
175 When we do that, we should probably add a new method that will block
176 until all of the images have been downloaded and processed. Otherwise,
177 we won't know when it's safe to start calling get_diff_record().
178 jcgregorio notes: maybe just make ImageDiffDB thread-safe and create a
179 thread-pool/worker queue at a higher level that just uses ImageDiffDB?
180
181 Args:
182 expected_image_url: file or HTTP url from which we will download the
183 expected image
184 expected_image_locator: a unique ID string under which we will store the
185 expected image within storage_root (probably including a checksum to
186 guarantee uniqueness)
187 actual_image_url: file or HTTP url from which we will download the
188 actual image
189 actual_image_locator: a unique ID string under which we will store the
190 actual image within storage_root (probably including a checksum to
191 guarantee uniqueness)
192 """
commit-bot@chromium.org9985ef52014-02-10 18:19:30 +0000193 expected_image_locator = _sanitize_locator(expected_image_locator)
194 actual_image_locator = _sanitize_locator(actual_image_locator)
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000195 key = (expected_image_locator, actual_image_locator)
196 if not key in self._diff_dict:
197 try:
198 new_diff_record = DiffRecord(
199 self._storage_root,
200 expected_image_url=expected_image_url,
201 expected_image_locator=expected_image_locator,
202 actual_image_url=actual_image_url,
203 actual_image_locator=actual_image_locator)
commit-bot@chromium.orga47e7ac2013-12-19 20:01:34 +0000204 except Exception:
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000205 logging.exception('got exception while creating new DiffRecord')
206 return
207 self._diff_dict[key] = new_diff_record
208
209 def get_diff_record(self, expected_image_locator, actual_image_locator):
210 """Returns the DiffRecord for this image pair.
211
212 Raises a KeyError if we don't have a DiffRecord for this image pair.
213 """
commit-bot@chromium.org9985ef52014-02-10 18:19:30 +0000214 key = (_sanitize_locator(expected_image_locator),
215 _sanitize_locator(actual_image_locator))
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000216 return self._diff_dict[key]
217
218
219# Utility functions
220
epoger@google.com214a0242013-11-22 19:26:18 +0000221def _calculate_weighted_diff_metric(histogram, num_pixels):
222 """Given the histogram of a diff image (per-channel diff at each
223 pixel between two images), calculate the weighted diff metric (a
224 stab at how different the two images really are).
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000225
226 Args:
epoger@google.com214a0242013-11-22 19:26:18 +0000227 histogram: PIL histogram of a per-channel diff between two images
228 num_pixels: integer; the total number of pixels in the diff image
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000229
230 Returns: a weighted diff metric, as a float between 0 and 100 (inclusive).
231 """
epoger@google.com214a0242013-11-22 19:26:18 +0000232 # TODO(epoger): As a wild guess at an appropriate metric, weight each
233 # different pixel by the square of its delta value. (The more different
234 # a pixel is from its expectation, the more we care about it.)
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000235 # In the long term, we will probably use some metric generated by
236 # skpdiff anyway.
epoger@google.com214a0242013-11-22 19:26:18 +0000237 assert(len(histogram) % VALUES_PER_BAND == 0)
238 num_bands = len(histogram) / VALUES_PER_BAND
239 max_diff = num_pixels * num_bands * (VALUES_PER_BAND - 1)**2
240 total_diff = 0
241 for index in xrange(len(histogram)):
242 total_diff += histogram[index] * (index % VALUES_PER_BAND)**2
243 return float(100 * total_diff) / max_diff
244
245def _max_per_band(histogram):
246 """Given the histogram of an image, return the maximum value of each band
247 (a.k.a. "color channel", such as R/G/B) across the entire image.
248
249 Args:
250 histogram: PIL histogram
251
252 Returns the maximum value of each band within the image histogram, as a list.
253 """
254 max_per_band = []
255 assert(len(histogram) % VALUES_PER_BAND == 0)
256 num_bands = len(histogram) / VALUES_PER_BAND
257 for band in xrange(num_bands):
258 # Assuming that VALUES_PER_BAND is 256...
259 # the 'R' band makes up indices 0-255 in the histogram,
260 # the 'G' band makes up indices 256-511 in the histogram,
261 # etc.
262 min_index = band * VALUES_PER_BAND
263 index = min_index + VALUES_PER_BAND
264 while index > min_index:
265 index -= 1
266 if histogram[index] > 0:
267 max_per_band.append(index - min_index)
268 break
269 return max_per_band
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000270
271def _generate_image_diff(image1, image2):
272 """Wrapper for ImageChops.difference(image1, image2) that will handle some
273 errors automatically, or at least yield more useful error messages.
274
275 TODO(epoger): Currently, some of the images generated by the bots are RGBA
276 and others are RGB. I'm not sure why that is. For now, to avoid confusion
277 within the UI, convert all to RGB when diffing.
278
279 Args:
280 image1: a PIL image object
281 image2: a PIL image object
282
283 Returns: per-pixel diffs between image1 and image2, as a PIL image object
284 """
285 try:
286 return ImageChops.difference(image1.convert('RGB'), image2.convert('RGB'))
287 except ValueError:
288 logging.error('Error diffing image1 [%s] and image2 [%s].' % (
289 repr(image1), repr(image2)))
290 raise
291
292def _download_and_open_image(local_filepath, url):
293 """Open the image at local_filepath; if there is no file at that path,
294 download it from url to that path and then open it.
295
296 Args:
297 local_filepath: path on local disk where the image should be stored
298 url: URL from which we can download the image if we don't have it yet
299
300 Returns: a PIL image object
301 """
302 if not os.path.exists(local_filepath):
303 _mkdir_unless_exists(os.path.dirname(local_filepath))
304 with contextlib.closing(urllib.urlopen(url)) as url_handle:
305 with open(local_filepath, 'wb') as file_handle:
306 shutil.copyfileobj(fsrc=url_handle, fdst=file_handle)
307 return _open_image(local_filepath)
308
309def _open_image(filepath):
310 """Wrapper for Image.open(filepath) that yields more useful error messages.
311
312 Args:
313 filepath: path on local disk to load image from
314
315 Returns: a PIL image object
316 """
317 try:
318 return Image.open(filepath)
319 except IOError:
320 logging.error('IOError loading image file %s' % filepath)
321 raise
322
epoger@google.com214a0242013-11-22 19:26:18 +0000323def _save_image(image, filepath, format='PNG'):
324 """Write an image to disk, creating any intermediate directories as needed.
325
326 Args:
327 image: a PIL image object
328 filepath: path on local disk to write image to
329 format: one of the PIL image formats, listed at
330 http://effbot.org/imagingbook/formats.htm
331 """
332 _mkdir_unless_exists(os.path.dirname(filepath))
333 image.save(filepath, format)
334
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000335def _mkdir_unless_exists(path):
336 """Unless path refers to an already-existing directory, create it.
337
338 Args:
339 path: path on local disk
340 """
341 if not os.path.isdir(path):
342 os.makedirs(path)
343
commit-bot@chromium.org9985ef52014-02-10 18:19:30 +0000344def _sanitize_locator(locator):
345 """Returns a sanitized version of a locator (one in which we know none of the
346 characters will have special meaning in filenames).
347
348 Args:
349 locator: string, or something that can be represented as a string
350 """
351 return DISALLOWED_FILEPATH_CHAR_REGEX.sub('_', str(locator))
352
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000353def _get_difference_locator(expected_image_locator, actual_image_locator):
354 """Returns the locator string used to look up the diffs between expected_image
355 and actual_image.
356
357 Args:
358 expected_image_locator: locator string pointing at expected image
359 actual_image_locator: locator string pointing at actual image
360
commit-bot@chromium.org9985ef52014-02-10 18:19:30 +0000361 Returns: already-sanitized locator where the diffs between expected and
362 actual images can be found
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000363 """
commit-bot@chromium.org9985ef52014-02-10 18:19:30 +0000364 return "%s-vs-%s" % (_sanitize_locator(expected_image_locator),
365 _sanitize_locator(actual_image_locator))