blob: 3a2ce63b954f6244454e76ddf085e4fca66ef643 [file] [log] [blame]
epoger@google.com9dddf6f2013-11-08 16:25:25 +00001#!/usr/bin/python
2
3"""
4Copyright 2013 Google Inc.
5
6Use of this source code is governed by a BSD-style license that can be
7found in the LICENSE file.
8
9Calulate differences between image pairs, and store them in a database.
10"""
11
12import contextlib
13import logging
14import os
15import shutil
16import urllib
17try:
18 from PIL import Image, ImageChops
19except ImportError:
20 raise ImportError('Requires PIL to be installed; see '
21 + 'http://www.pythonware.com/products/pil/')
22
rmistry@google.com5861e522013-12-21 19:07:40 +000023DEFAULT_IMAGE_SUFFIX = '.png'
24DEFAULT_IMAGES_SUBDIR = 'images'
epoger@google.com9dddf6f2013-11-08 16:25:25 +000025
epoger@google.com9dddf6f2013-11-08 16:25:25 +000026DIFFS_SUBDIR = 'diffs'
27WHITEDIFFS_SUBDIR = 'whitediffs'
28
epoger@google.com214a0242013-11-22 19:26:18 +000029VALUES_PER_BAND = 256
30
epoger@google.com9dddf6f2013-11-08 16:25:25 +000031
32class DiffRecord(object):
33 """ Record of differences between two images. """
34
35 def __init__(self, storage_root,
36 expected_image_url, expected_image_locator,
rmistry@google.com5861e522013-12-21 19:07:40 +000037 actual_image_url, actual_image_locator,
38 expected_images_subdir=DEFAULT_IMAGES_SUBDIR,
39 actual_images_subdir=DEFAULT_IMAGES_SUBDIR,
40 image_suffix=DEFAULT_IMAGE_SUFFIX):
epoger@google.com9dddf6f2013-11-08 16:25:25 +000041 """Download this pair of images (unless we already have them on local disk),
42 and prepare a DiffRecord for them.
43
44 TODO(epoger): Make this asynchronously download images, rather than blocking
45 until the images have been downloaded and processed.
46
47 Args:
48 storage_root: root directory on local disk within which we store all
49 images
50 expected_image_url: file or HTTP url from which we will download the
51 expected image
52 expected_image_locator: a unique ID string under which we will store the
53 expected image within storage_root (probably including a checksum to
54 guarantee uniqueness)
55 actual_image_url: file or HTTP url from which we will download the
56 actual image
57 actual_image_locator: a unique ID string under which we will store the
58 actual image within storage_root (probably including a checksum to
59 guarantee uniqueness)
rmistry@google.com5861e522013-12-21 19:07:40 +000060 expected_images_subdir: the subdirectory expected images are stored in.
61 actual_images_subdir: the subdirectory actual images are stored in.
62 image_suffix: the suffix of images.
epoger@google.com9dddf6f2013-11-08 16:25:25 +000063 """
64 # Download the expected/actual images, if we don't have them already.
rmistry@google.com5861e522013-12-21 19:07:40 +000065 # TODO(rmistry): Add a parameter that makes _download_and_open_image raise
66 # an exception if images are not found locally (instead of trying to
67 # download them).
epoger@google.com9dddf6f2013-11-08 16:25:25 +000068 expected_image = _download_and_open_image(
rmistry@google.com5861e522013-12-21 19:07:40 +000069 os.path.join(storage_root, expected_images_subdir,
70 str(expected_image_locator) + image_suffix),
epoger@google.com9dddf6f2013-11-08 16:25:25 +000071 expected_image_url)
72 actual_image = _download_and_open_image(
rmistry@google.com5861e522013-12-21 19:07:40 +000073 os.path.join(storage_root, actual_images_subdir,
74 str(actual_image_locator) + image_suffix),
epoger@google.com9dddf6f2013-11-08 16:25:25 +000075 actual_image_url)
76
epoger@google.com214a0242013-11-22 19:26:18 +000077 # Generate the diff image (absolute diff at each pixel) and
78 # max_diff_per_channel.
epoger@google.com9dddf6f2013-11-08 16:25:25 +000079 diff_image = _generate_image_diff(actual_image, expected_image)
epoger@google.com214a0242013-11-22 19:26:18 +000080 diff_histogram = diff_image.histogram()
81 (diff_width, diff_height) = diff_image.size
82 self._weighted_diff_measure = _calculate_weighted_diff_metric(
83 diff_histogram, diff_width * diff_height)
84 self._max_diff_per_channel = _max_per_band(diff_histogram)
85
86 # Generate the whitediff image (any differing pixels show as white).
87 # This is tricky, because when you convert color images to grayscale or
88 # black & white in PIL, it has its own ideas about thresholds.
89 # We have to force it: if a pixel has any color at all, it's a '1'.
90 bands = diff_image.split()
91 graydiff_image = ImageChops.lighter(ImageChops.lighter(
92 bands[0], bands[1]), bands[2])
93 whitediff_image = (graydiff_image.point(lambda p: p > 0 and VALUES_PER_BAND)
94 .convert('1', dither=Image.NONE))
95
96 # Final touches on diff_image: use whitediff_image as an alpha mask.
97 # Unchanged pixels are transparent; differing pixels are opaque.
98 diff_image.putalpha(whitediff_image)
99
100 # Store the diff and whitediff images generated above.
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000101 diff_image_locator = _get_difference_locator(
102 expected_image_locator=expected_image_locator,
103 actual_image_locator=actual_image_locator)
rmistry@google.com5861e522013-12-21 19:07:40 +0000104 basename = str(diff_image_locator) + image_suffix
epoger@google.com214a0242013-11-22 19:26:18 +0000105 _save_image(diff_image, os.path.join(
106 storage_root, DIFFS_SUBDIR, basename))
107 _save_image(whitediff_image, os.path.join(
108 storage_root, WHITEDIFFS_SUBDIR, basename))
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000109
110 # Calculate difference metrics.
111 (self._width, self._height) = diff_image.size
epoger@google.com214a0242013-11-22 19:26:18 +0000112 self._num_pixels_differing = (
113 whitediff_image.histogram()[VALUES_PER_BAND - 1])
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000114
115 def get_num_pixels_differing(self):
116 """Returns the absolute number of pixels that differ."""
117 return self._num_pixels_differing
118
119 def get_percent_pixels_differing(self):
120 """Returns the percentage of pixels that differ, as a float between
121 0 and 100 (inclusive)."""
122 return ((float(self._num_pixels_differing) * 100) /
123 (self._width * self._height))
124
125 def get_weighted_diff_measure(self):
126 """Returns a weighted measure of image diffs, as a float between 0 and 100
127 (inclusive)."""
128 return self._weighted_diff_measure
129
epoger@google.com214a0242013-11-22 19:26:18 +0000130 def get_max_diff_per_channel(self):
131 """Returns the maximum difference between the expected and actual images
132 for each R/G/B channel, as a list."""
133 return self._max_diff_per_channel
134
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000135
136class ImageDiffDB(object):
137 """ Calculates differences between image pairs, maintaining a database of
138 them for download."""
139
140 def __init__(self, storage_root):
141 """
142 Args:
143 storage_root: string; root path within the DB will store all of its stuff
144 """
145 self._storage_root = storage_root
146
147 # Dictionary of DiffRecords, keyed by (expected_image_locator,
148 # actual_image_locator) tuples.
149 self._diff_dict = {}
150
151 def add_image_pair(self,
152 expected_image_url, expected_image_locator,
153 actual_image_url, actual_image_locator):
154 """Download this pair of images (unless we already have them on local disk),
155 and prepare a DiffRecord for them.
156
157 TODO(epoger): Make this asynchronously download images, rather than blocking
158 until the images have been downloaded and processed.
159 When we do that, we should probably add a new method that will block
160 until all of the images have been downloaded and processed. Otherwise,
161 we won't know when it's safe to start calling get_diff_record().
162 jcgregorio notes: maybe just make ImageDiffDB thread-safe and create a
163 thread-pool/worker queue at a higher level that just uses ImageDiffDB?
164
165 Args:
166 expected_image_url: file or HTTP url from which we will download the
167 expected image
168 expected_image_locator: a unique ID string under which we will store the
169 expected image within storage_root (probably including a checksum to
170 guarantee uniqueness)
171 actual_image_url: file or HTTP url from which we will download the
172 actual image
173 actual_image_locator: a unique ID string under which we will store the
174 actual image within storage_root (probably including a checksum to
175 guarantee uniqueness)
176 """
177 key = (expected_image_locator, actual_image_locator)
178 if not key in self._diff_dict:
179 try:
180 new_diff_record = DiffRecord(
181 self._storage_root,
182 expected_image_url=expected_image_url,
183 expected_image_locator=expected_image_locator,
184 actual_image_url=actual_image_url,
185 actual_image_locator=actual_image_locator)
commit-bot@chromium.orga47e7ac2013-12-19 20:01:34 +0000186 except Exception:
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000187 logging.exception('got exception while creating new DiffRecord')
188 return
189 self._diff_dict[key] = new_diff_record
190
191 def get_diff_record(self, expected_image_locator, actual_image_locator):
192 """Returns the DiffRecord for this image pair.
193
194 Raises a KeyError if we don't have a DiffRecord for this image pair.
195 """
196 key = (expected_image_locator, actual_image_locator)
197 return self._diff_dict[key]
198
199
200# Utility functions
201
epoger@google.com214a0242013-11-22 19:26:18 +0000202def _calculate_weighted_diff_metric(histogram, num_pixels):
203 """Given the histogram of a diff image (per-channel diff at each
204 pixel between two images), calculate the weighted diff metric (a
205 stab at how different the two images really are).
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000206
207 Args:
epoger@google.com214a0242013-11-22 19:26:18 +0000208 histogram: PIL histogram of a per-channel diff between two images
209 num_pixels: integer; the total number of pixels in the diff image
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000210
211 Returns: a weighted diff metric, as a float between 0 and 100 (inclusive).
212 """
epoger@google.com214a0242013-11-22 19:26:18 +0000213 # TODO(epoger): As a wild guess at an appropriate metric, weight each
214 # different pixel by the square of its delta value. (The more different
215 # a pixel is from its expectation, the more we care about it.)
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000216 # In the long term, we will probably use some metric generated by
217 # skpdiff anyway.
epoger@google.com214a0242013-11-22 19:26:18 +0000218 assert(len(histogram) % VALUES_PER_BAND == 0)
219 num_bands = len(histogram) / VALUES_PER_BAND
220 max_diff = num_pixels * num_bands * (VALUES_PER_BAND - 1)**2
221 total_diff = 0
222 for index in xrange(len(histogram)):
223 total_diff += histogram[index] * (index % VALUES_PER_BAND)**2
224 return float(100 * total_diff) / max_diff
225
226def _max_per_band(histogram):
227 """Given the histogram of an image, return the maximum value of each band
228 (a.k.a. "color channel", such as R/G/B) across the entire image.
229
230 Args:
231 histogram: PIL histogram
232
233 Returns the maximum value of each band within the image histogram, as a list.
234 """
235 max_per_band = []
236 assert(len(histogram) % VALUES_PER_BAND == 0)
237 num_bands = len(histogram) / VALUES_PER_BAND
238 for band in xrange(num_bands):
239 # Assuming that VALUES_PER_BAND is 256...
240 # the 'R' band makes up indices 0-255 in the histogram,
241 # the 'G' band makes up indices 256-511 in the histogram,
242 # etc.
243 min_index = band * VALUES_PER_BAND
244 index = min_index + VALUES_PER_BAND
245 while index > min_index:
246 index -= 1
247 if histogram[index] > 0:
248 max_per_band.append(index - min_index)
249 break
250 return max_per_band
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000251
252def _generate_image_diff(image1, image2):
253 """Wrapper for ImageChops.difference(image1, image2) that will handle some
254 errors automatically, or at least yield more useful error messages.
255
256 TODO(epoger): Currently, some of the images generated by the bots are RGBA
257 and others are RGB. I'm not sure why that is. For now, to avoid confusion
258 within the UI, convert all to RGB when diffing.
259
260 Args:
261 image1: a PIL image object
262 image2: a PIL image object
263
264 Returns: per-pixel diffs between image1 and image2, as a PIL image object
265 """
266 try:
267 return ImageChops.difference(image1.convert('RGB'), image2.convert('RGB'))
268 except ValueError:
269 logging.error('Error diffing image1 [%s] and image2 [%s].' % (
270 repr(image1), repr(image2)))
271 raise
272
273def _download_and_open_image(local_filepath, url):
274 """Open the image at local_filepath; if there is no file at that path,
275 download it from url to that path and then open it.
276
277 Args:
278 local_filepath: path on local disk where the image should be stored
279 url: URL from which we can download the image if we don't have it yet
280
281 Returns: a PIL image object
282 """
283 if not os.path.exists(local_filepath):
284 _mkdir_unless_exists(os.path.dirname(local_filepath))
285 with contextlib.closing(urllib.urlopen(url)) as url_handle:
286 with open(local_filepath, 'wb') as file_handle:
287 shutil.copyfileobj(fsrc=url_handle, fdst=file_handle)
288 return _open_image(local_filepath)
289
290def _open_image(filepath):
291 """Wrapper for Image.open(filepath) that yields more useful error messages.
292
293 Args:
294 filepath: path on local disk to load image from
295
296 Returns: a PIL image object
297 """
298 try:
299 return Image.open(filepath)
300 except IOError:
301 logging.error('IOError loading image file %s' % filepath)
302 raise
303
epoger@google.com214a0242013-11-22 19:26:18 +0000304def _save_image(image, filepath, format='PNG'):
305 """Write an image to disk, creating any intermediate directories as needed.
306
307 Args:
308 image: a PIL image object
309 filepath: path on local disk to write image to
310 format: one of the PIL image formats, listed at
311 http://effbot.org/imagingbook/formats.htm
312 """
313 _mkdir_unless_exists(os.path.dirname(filepath))
314 image.save(filepath, format)
315
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000316def _mkdir_unless_exists(path):
317 """Unless path refers to an already-existing directory, create it.
318
319 Args:
320 path: path on local disk
321 """
322 if not os.path.isdir(path):
323 os.makedirs(path)
324
325def _get_difference_locator(expected_image_locator, actual_image_locator):
326 """Returns the locator string used to look up the diffs between expected_image
327 and actual_image.
328
329 Args:
330 expected_image_locator: locator string pointing at expected image
331 actual_image_locator: locator string pointing at actual image
332
333 Returns: locator where the diffs between expected and actual images can be
334 found
335 """
336 return "%s-vs-%s" % (expected_image_locator, actual_image_locator)