blob: f604cfb8c3b79b9c82a0ee68657acbc3ac5e10a7 [file] [log] [blame]
epoger@google.com9dddf6f2013-11-08 16:25:25 +00001#!/usr/bin/python
2
3"""
4Copyright 2013 Google Inc.
5
6Use of this source code is governed by a BSD-style license that can be
7found in the LICENSE file.
8
9Calulate differences between image pairs, and store them in a database.
10"""
11
12import contextlib
13import logging
14import os
15import shutil
16import urllib
17try:
18 from PIL import Image, ImageChops
19except ImportError:
20 raise ImportError('Requires PIL to be installed; see '
21 + 'http://www.pythonware.com/products/pil/')
22
23IMAGE_SUFFIX = '.png'
epoger@google.com9dddf6f2013-11-08 16:25:25 +000024
25IMAGES_SUBDIR = 'images'
26DIFFS_SUBDIR = 'diffs'
27WHITEDIFFS_SUBDIR = 'whitediffs'
28
epoger@google.com214a0242013-11-22 19:26:18 +000029VALUES_PER_BAND = 256
30
epoger@google.com9dddf6f2013-11-08 16:25:25 +000031
32class DiffRecord(object):
33 """ Record of differences between two images. """
34
35 def __init__(self, storage_root,
36 expected_image_url, expected_image_locator,
37 actual_image_url, actual_image_locator):
38 """Download this pair of images (unless we already have them on local disk),
39 and prepare a DiffRecord for them.
40
41 TODO(epoger): Make this asynchronously download images, rather than blocking
42 until the images have been downloaded and processed.
43
44 Args:
45 storage_root: root directory on local disk within which we store all
46 images
47 expected_image_url: file or HTTP url from which we will download the
48 expected image
49 expected_image_locator: a unique ID string under which we will store the
50 expected image within storage_root (probably including a checksum to
51 guarantee uniqueness)
52 actual_image_url: file or HTTP url from which we will download the
53 actual image
54 actual_image_locator: a unique ID string under which we will store the
55 actual image within storage_root (probably including a checksum to
56 guarantee uniqueness)
57 """
58 # Download the expected/actual images, if we don't have them already.
59 expected_image = _download_and_open_image(
60 os.path.join(storage_root, IMAGES_SUBDIR,
61 str(expected_image_locator) + IMAGE_SUFFIX),
62 expected_image_url)
63 actual_image = _download_and_open_image(
64 os.path.join(storage_root, IMAGES_SUBDIR,
65 str(actual_image_locator) + IMAGE_SUFFIX),
66 actual_image_url)
67
epoger@google.com214a0242013-11-22 19:26:18 +000068 # Generate the diff image (absolute diff at each pixel) and
69 # max_diff_per_channel.
epoger@google.com9dddf6f2013-11-08 16:25:25 +000070 diff_image = _generate_image_diff(actual_image, expected_image)
epoger@google.com214a0242013-11-22 19:26:18 +000071 diff_histogram = diff_image.histogram()
72 (diff_width, diff_height) = diff_image.size
73 self._weighted_diff_measure = _calculate_weighted_diff_metric(
74 diff_histogram, diff_width * diff_height)
75 self._max_diff_per_channel = _max_per_band(diff_histogram)
76
77 # Generate the whitediff image (any differing pixels show as white).
78 # This is tricky, because when you convert color images to grayscale or
79 # black & white in PIL, it has its own ideas about thresholds.
80 # We have to force it: if a pixel has any color at all, it's a '1'.
81 bands = diff_image.split()
82 graydiff_image = ImageChops.lighter(ImageChops.lighter(
83 bands[0], bands[1]), bands[2])
84 whitediff_image = (graydiff_image.point(lambda p: p > 0 and VALUES_PER_BAND)
85 .convert('1', dither=Image.NONE))
86
87 # Final touches on diff_image: use whitediff_image as an alpha mask.
88 # Unchanged pixels are transparent; differing pixels are opaque.
89 diff_image.putalpha(whitediff_image)
90
91 # Store the diff and whitediff images generated above.
epoger@google.com9dddf6f2013-11-08 16:25:25 +000092 diff_image_locator = _get_difference_locator(
93 expected_image_locator=expected_image_locator,
94 actual_image_locator=actual_image_locator)
epoger@google.com214a0242013-11-22 19:26:18 +000095 basename = str(diff_image_locator) + IMAGE_SUFFIX
96 _save_image(diff_image, os.path.join(
97 storage_root, DIFFS_SUBDIR, basename))
98 _save_image(whitediff_image, os.path.join(
99 storage_root, WHITEDIFFS_SUBDIR, basename))
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000100
101 # Calculate difference metrics.
102 (self._width, self._height) = diff_image.size
epoger@google.com214a0242013-11-22 19:26:18 +0000103 self._num_pixels_differing = (
104 whitediff_image.histogram()[VALUES_PER_BAND - 1])
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000105
106 def get_num_pixels_differing(self):
107 """Returns the absolute number of pixels that differ."""
108 return self._num_pixels_differing
109
110 def get_percent_pixels_differing(self):
111 """Returns the percentage of pixels that differ, as a float between
112 0 and 100 (inclusive)."""
113 return ((float(self._num_pixels_differing) * 100) /
114 (self._width * self._height))
115
116 def get_weighted_diff_measure(self):
117 """Returns a weighted measure of image diffs, as a float between 0 and 100
118 (inclusive)."""
119 return self._weighted_diff_measure
120
epoger@google.com214a0242013-11-22 19:26:18 +0000121 def get_max_diff_per_channel(self):
122 """Returns the maximum difference between the expected and actual images
123 for each R/G/B channel, as a list."""
124 return self._max_diff_per_channel
125
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000126
127class ImageDiffDB(object):
128 """ Calculates differences between image pairs, maintaining a database of
129 them for download."""
130
131 def __init__(self, storage_root):
132 """
133 Args:
134 storage_root: string; root path within the DB will store all of its stuff
135 """
136 self._storage_root = storage_root
137
138 # Dictionary of DiffRecords, keyed by (expected_image_locator,
139 # actual_image_locator) tuples.
140 self._diff_dict = {}
141
142 def add_image_pair(self,
143 expected_image_url, expected_image_locator,
144 actual_image_url, actual_image_locator):
145 """Download this pair of images (unless we already have them on local disk),
146 and prepare a DiffRecord for them.
147
148 TODO(epoger): Make this asynchronously download images, rather than blocking
149 until the images have been downloaded and processed.
150 When we do that, we should probably add a new method that will block
151 until all of the images have been downloaded and processed. Otherwise,
152 we won't know when it's safe to start calling get_diff_record().
153 jcgregorio notes: maybe just make ImageDiffDB thread-safe and create a
154 thread-pool/worker queue at a higher level that just uses ImageDiffDB?
155
156 Args:
157 expected_image_url: file or HTTP url from which we will download the
158 expected image
159 expected_image_locator: a unique ID string under which we will store the
160 expected image within storage_root (probably including a checksum to
161 guarantee uniqueness)
162 actual_image_url: file or HTTP url from which we will download the
163 actual image
164 actual_image_locator: a unique ID string under which we will store the
165 actual image within storage_root (probably including a checksum to
166 guarantee uniqueness)
167 """
168 key = (expected_image_locator, actual_image_locator)
169 if not key in self._diff_dict:
170 try:
171 new_diff_record = DiffRecord(
172 self._storage_root,
173 expected_image_url=expected_image_url,
174 expected_image_locator=expected_image_locator,
175 actual_image_url=actual_image_url,
176 actual_image_locator=actual_image_locator)
commit-bot@chromium.orga47e7ac2013-12-19 20:01:34 +0000177 except Exception:
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000178 logging.exception('got exception while creating new DiffRecord')
179 return
180 self._diff_dict[key] = new_diff_record
181
182 def get_diff_record(self, expected_image_locator, actual_image_locator):
183 """Returns the DiffRecord for this image pair.
184
185 Raises a KeyError if we don't have a DiffRecord for this image pair.
186 """
187 key = (expected_image_locator, actual_image_locator)
188 return self._diff_dict[key]
189
190
191# Utility functions
192
epoger@google.com214a0242013-11-22 19:26:18 +0000193def _calculate_weighted_diff_metric(histogram, num_pixels):
194 """Given the histogram of a diff image (per-channel diff at each
195 pixel between two images), calculate the weighted diff metric (a
196 stab at how different the two images really are).
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000197
198 Args:
epoger@google.com214a0242013-11-22 19:26:18 +0000199 histogram: PIL histogram of a per-channel diff between two images
200 num_pixels: integer; the total number of pixels in the diff image
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000201
202 Returns: a weighted diff metric, as a float between 0 and 100 (inclusive).
203 """
epoger@google.com214a0242013-11-22 19:26:18 +0000204 # TODO(epoger): As a wild guess at an appropriate metric, weight each
205 # different pixel by the square of its delta value. (The more different
206 # a pixel is from its expectation, the more we care about it.)
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000207 # In the long term, we will probably use some metric generated by
208 # skpdiff anyway.
epoger@google.com214a0242013-11-22 19:26:18 +0000209 assert(len(histogram) % VALUES_PER_BAND == 0)
210 num_bands = len(histogram) / VALUES_PER_BAND
211 max_diff = num_pixels * num_bands * (VALUES_PER_BAND - 1)**2
212 total_diff = 0
213 for index in xrange(len(histogram)):
214 total_diff += histogram[index] * (index % VALUES_PER_BAND)**2
215 return float(100 * total_diff) / max_diff
216
217def _max_per_band(histogram):
218 """Given the histogram of an image, return the maximum value of each band
219 (a.k.a. "color channel", such as R/G/B) across the entire image.
220
221 Args:
222 histogram: PIL histogram
223
224 Returns the maximum value of each band within the image histogram, as a list.
225 """
226 max_per_band = []
227 assert(len(histogram) % VALUES_PER_BAND == 0)
228 num_bands = len(histogram) / VALUES_PER_BAND
229 for band in xrange(num_bands):
230 # Assuming that VALUES_PER_BAND is 256...
231 # the 'R' band makes up indices 0-255 in the histogram,
232 # the 'G' band makes up indices 256-511 in the histogram,
233 # etc.
234 min_index = band * VALUES_PER_BAND
235 index = min_index + VALUES_PER_BAND
236 while index > min_index:
237 index -= 1
238 if histogram[index] > 0:
239 max_per_band.append(index - min_index)
240 break
241 return max_per_band
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000242
243def _generate_image_diff(image1, image2):
244 """Wrapper for ImageChops.difference(image1, image2) that will handle some
245 errors automatically, or at least yield more useful error messages.
246
247 TODO(epoger): Currently, some of the images generated by the bots are RGBA
248 and others are RGB. I'm not sure why that is. For now, to avoid confusion
249 within the UI, convert all to RGB when diffing.
250
251 Args:
252 image1: a PIL image object
253 image2: a PIL image object
254
255 Returns: per-pixel diffs between image1 and image2, as a PIL image object
256 """
257 try:
258 return ImageChops.difference(image1.convert('RGB'), image2.convert('RGB'))
259 except ValueError:
260 logging.error('Error diffing image1 [%s] and image2 [%s].' % (
261 repr(image1), repr(image2)))
262 raise
263
264def _download_and_open_image(local_filepath, url):
265 """Open the image at local_filepath; if there is no file at that path,
266 download it from url to that path and then open it.
267
268 Args:
269 local_filepath: path on local disk where the image should be stored
270 url: URL from which we can download the image if we don't have it yet
271
272 Returns: a PIL image object
273 """
274 if not os.path.exists(local_filepath):
275 _mkdir_unless_exists(os.path.dirname(local_filepath))
276 with contextlib.closing(urllib.urlopen(url)) as url_handle:
277 with open(local_filepath, 'wb') as file_handle:
278 shutil.copyfileobj(fsrc=url_handle, fdst=file_handle)
279 return _open_image(local_filepath)
280
281def _open_image(filepath):
282 """Wrapper for Image.open(filepath) that yields more useful error messages.
283
284 Args:
285 filepath: path on local disk to load image from
286
287 Returns: a PIL image object
288 """
289 try:
290 return Image.open(filepath)
291 except IOError:
292 logging.error('IOError loading image file %s' % filepath)
293 raise
294
epoger@google.com214a0242013-11-22 19:26:18 +0000295def _save_image(image, filepath, format='PNG'):
296 """Write an image to disk, creating any intermediate directories as needed.
297
298 Args:
299 image: a PIL image object
300 filepath: path on local disk to write image to
301 format: one of the PIL image formats, listed at
302 http://effbot.org/imagingbook/formats.htm
303 """
304 _mkdir_unless_exists(os.path.dirname(filepath))
305 image.save(filepath, format)
306
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000307def _mkdir_unless_exists(path):
308 """Unless path refers to an already-existing directory, create it.
309
310 Args:
311 path: path on local disk
312 """
313 if not os.path.isdir(path):
314 os.makedirs(path)
315
316def _get_difference_locator(expected_image_locator, actual_image_locator):
317 """Returns the locator string used to look up the diffs between expected_image
318 and actual_image.
319
320 Args:
321 expected_image_locator: locator string pointing at expected image
322 actual_image_locator: locator string pointing at actual image
323
324 Returns: locator where the diffs between expected and actual images can be
325 found
326 """
327 return "%s-vs-%s" % (expected_image_locator, actual_image_locator)