blob: 10fcc98f3b13c35dd57a7aa9550dd813edcbaf04 [file] [log] [blame]
epoger@google.com9dddf6f2013-11-08 16:25:25 +00001#!/usr/bin/python
2
3"""
4Copyright 2013 Google Inc.
5
6Use of this source code is governed by a BSD-style license that can be
7found in the LICENSE file.
8
9Calulate differences between image pairs, and store them in a database.
10"""
11
12import contextlib
commit-bot@chromium.org44546f82014-02-11 18:21:26 +000013import csv
commit-bot@chromium.org280ea822014-04-14 18:15:29 +000014import errno
epoger@google.com9dddf6f2013-11-08 16:25:25 +000015import logging
commit-bot@chromium.org280ea822014-04-14 18:15:29 +000016import Queue
epoger@google.com9dddf6f2013-11-08 16:25:25 +000017import os
commit-bot@chromium.org9985ef52014-02-10 18:19:30 +000018import re
epoger@google.com9dddf6f2013-11-08 16:25:25 +000019import shutil
commit-bot@chromium.org44546f82014-02-11 18:21:26 +000020import sys
21import tempfile
commit-bot@chromium.org280ea822014-04-14 18:15:29 +000022import time
23import threading
epoger@google.com9dddf6f2013-11-08 16:25:25 +000024import urllib
25try:
26 from PIL import Image, ImageChops
27except ImportError:
28 raise ImportError('Requires PIL to be installed; see '
29 + 'http://www.pythonware.com/products/pil/')
30
commit-bot@chromium.org44546f82014-02-11 18:21:26 +000031# Set the PYTHONPATH to include the tools directory.
32sys.path.append(
33 os.path.join(
34 os.path.dirname(os.path.realpath(__file__)), os.pardir, os.pardir,
35 'tools'))
36import find_run_binary
37
commit-bot@chromium.org4d0f0082014-02-18 14:38:22 +000038SKPDIFF_BINARY = find_run_binary.find_path_to_program('skpdiff')
commit-bot@chromium.org44546f82014-02-11 18:21:26 +000039
rmistry@google.com5861e522013-12-21 19:07:40 +000040DEFAULT_IMAGE_SUFFIX = '.png'
41DEFAULT_IMAGES_SUBDIR = 'images'
commit-bot@chromium.org280ea822014-04-14 18:15:29 +000042DEFAULT_NUM_WORKERS = 8
epoger@google.com9dddf6f2013-11-08 16:25:25 +000043
commit-bot@chromium.org9985ef52014-02-10 18:19:30 +000044DISALLOWED_FILEPATH_CHAR_REGEX = re.compile('[^\w\-]')
45
epoger@google.com9dddf6f2013-11-08 16:25:25 +000046DIFFS_SUBDIR = 'diffs'
47WHITEDIFFS_SUBDIR = 'whitediffs'
48
epoger@google.com214a0242013-11-22 19:26:18 +000049VALUES_PER_BAND = 256
50
commit-bot@chromium.org16f41802014-02-26 19:05:20 +000051# Keys used within DiffRecord dictionary representations.
52# NOTE: Keep these in sync with static/constants.js
53KEY__DIFFERENCE_DATA__MAX_DIFF_PER_CHANNEL = 'maxDiffPerChannel'
54KEY__DIFFERENCE_DATA__NUM_DIFF_PIXELS = 'numDifferingPixels'
55KEY__DIFFERENCE_DATA__PERCENT_DIFF_PIXELS = 'percentDifferingPixels'
56KEY__DIFFERENCE_DATA__PERCEPTUAL_DIFF = 'perceptualDifference'
57KEY__DIFFERENCE_DATA__WEIGHTED_DIFF = 'weightedDiffMeasure'
58
commit-bot@chromium.org280ea822014-04-14 18:15:29 +000059# Special values within ImageDiffDB._diff_dict
60DIFFRECORD_FAILED = 'failed'
61DIFFRECORD_PENDING = 'pending'
62
63# TODO(epoger): Temporary(?) list to keep track of how many times we download
64# the same file in multiple threads.
65global_file_collisions = 0
66
epoger@google.com9dddf6f2013-11-08 16:25:25 +000067
68class DiffRecord(object):
69 """ Record of differences between two images. """
70
71 def __init__(self, storage_root,
72 expected_image_url, expected_image_locator,
rmistry@google.com5861e522013-12-21 19:07:40 +000073 actual_image_url, actual_image_locator,
74 expected_images_subdir=DEFAULT_IMAGES_SUBDIR,
75 actual_images_subdir=DEFAULT_IMAGES_SUBDIR,
76 image_suffix=DEFAULT_IMAGE_SUFFIX):
epoger@google.com9dddf6f2013-11-08 16:25:25 +000077 """Download this pair of images (unless we already have them on local disk),
78 and prepare a DiffRecord for them.
79
epoger@google.com9dddf6f2013-11-08 16:25:25 +000080 Args:
81 storage_root: root directory on local disk within which we store all
82 images
83 expected_image_url: file or HTTP url from which we will download the
84 expected image
85 expected_image_locator: a unique ID string under which we will store the
86 expected image within storage_root (probably including a checksum to
87 guarantee uniqueness)
88 actual_image_url: file or HTTP url from which we will download the
89 actual image
90 actual_image_locator: a unique ID string under which we will store the
91 actual image within storage_root (probably including a checksum to
92 guarantee uniqueness)
rmistry@google.com5861e522013-12-21 19:07:40 +000093 expected_images_subdir: the subdirectory expected images are stored in.
94 actual_images_subdir: the subdirectory actual images are stored in.
95 image_suffix: the suffix of images.
epoger@google.com9dddf6f2013-11-08 16:25:25 +000096 """
commit-bot@chromium.org9985ef52014-02-10 18:19:30 +000097 expected_image_locator = _sanitize_locator(expected_image_locator)
98 actual_image_locator = _sanitize_locator(actual_image_locator)
99
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000100 # Download the expected/actual images, if we don't have them already.
rmistry@google.com5861e522013-12-21 19:07:40 +0000101 # TODO(rmistry): Add a parameter that makes _download_and_open_image raise
102 # an exception if images are not found locally (instead of trying to
103 # download them).
commit-bot@chromium.org8cc39a62014-03-04 16:46:22 +0000104 expected_image_file = os.path.join(
105 storage_root, expected_images_subdir,
106 str(expected_image_locator) + image_suffix)
107 actual_image_file = os.path.join(
108 storage_root, actual_images_subdir,
109 str(actual_image_locator) + image_suffix)
110 try:
111 expected_image = _download_and_open_image(
112 expected_image_file, expected_image_url)
113 except Exception:
114 logging.exception('unable to download expected_image_url %s to file %s' %
115 (expected_image_url, expected_image_file))
116 raise
117 try:
118 actual_image = _download_and_open_image(
119 actual_image_file, actual_image_url)
120 except Exception:
121 logging.exception('unable to download actual_image_url %s to file %s' %
122 (actual_image_url, actual_image_file))
123 raise
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000124
epoger@google.com214a0242013-11-22 19:26:18 +0000125 # Generate the diff image (absolute diff at each pixel) and
126 # max_diff_per_channel.
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000127 diff_image = _generate_image_diff(actual_image, expected_image)
epoger@google.com214a0242013-11-22 19:26:18 +0000128 diff_histogram = diff_image.histogram()
129 (diff_width, diff_height) = diff_image.size
130 self._weighted_diff_measure = _calculate_weighted_diff_metric(
131 diff_histogram, diff_width * diff_height)
132 self._max_diff_per_channel = _max_per_band(diff_histogram)
133
134 # Generate the whitediff image (any differing pixels show as white).
135 # This is tricky, because when you convert color images to grayscale or
136 # black & white in PIL, it has its own ideas about thresholds.
137 # We have to force it: if a pixel has any color at all, it's a '1'.
138 bands = diff_image.split()
139 graydiff_image = ImageChops.lighter(ImageChops.lighter(
140 bands[0], bands[1]), bands[2])
141 whitediff_image = (graydiff_image.point(lambda p: p > 0 and VALUES_PER_BAND)
142 .convert('1', dither=Image.NONE))
143
commit-bot@chromium.org44546f82014-02-11 18:21:26 +0000144 # Calculate the perceptual difference percentage.
145 skpdiff_csv_dir = tempfile.mkdtemp()
146 try:
147 skpdiff_csv_output = os.path.join(skpdiff_csv_dir, 'skpdiff-output.csv')
commit-bot@chromium.org44546f82014-02-11 18:21:26 +0000148 expected_img = os.path.join(storage_root, expected_images_subdir,
149 str(expected_image_locator) + image_suffix)
150 actual_img = os.path.join(storage_root, actual_images_subdir,
151 str(actual_image_locator) + image_suffix)
152 find_run_binary.run_command(
commit-bot@chromium.org4d0f0082014-02-18 14:38:22 +0000153 [SKPDIFF_BINARY, '-p', expected_img, actual_img,
commit-bot@chromium.org44546f82014-02-11 18:21:26 +0000154 '--csv', skpdiff_csv_output, '-d', 'perceptual'])
155 with contextlib.closing(open(skpdiff_csv_output)) as csv_file:
156 for row in csv.DictReader(csv_file):
157 perceptual_similarity = float(row[' perceptual'].strip())
158 if not 0 <= perceptual_similarity <= 1:
159 # skpdiff outputs -1 if the images are different sizes. Treat any
160 # output that does not lie in [0, 1] as having 0% perceptual
161 # similarity.
162 perceptual_similarity = 0
163 # skpdiff returns the perceptual similarity, convert it to get the
164 # perceptual difference percentage.
165 self._perceptual_difference = 100 - (perceptual_similarity * 100)
166 finally:
167 shutil.rmtree(skpdiff_csv_dir)
168
epoger@google.com214a0242013-11-22 19:26:18 +0000169 # Final touches on diff_image: use whitediff_image as an alpha mask.
170 # Unchanged pixels are transparent; differing pixels are opaque.
171 diff_image.putalpha(whitediff_image)
172
173 # Store the diff and whitediff images generated above.
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000174 diff_image_locator = _get_difference_locator(
175 expected_image_locator=expected_image_locator,
176 actual_image_locator=actual_image_locator)
rmistry@google.com5861e522013-12-21 19:07:40 +0000177 basename = str(diff_image_locator) + image_suffix
epoger@google.com214a0242013-11-22 19:26:18 +0000178 _save_image(diff_image, os.path.join(
179 storage_root, DIFFS_SUBDIR, basename))
180 _save_image(whitediff_image, os.path.join(
181 storage_root, WHITEDIFFS_SUBDIR, basename))
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000182
183 # Calculate difference metrics.
184 (self._width, self._height) = diff_image.size
epoger@google.com214a0242013-11-22 19:26:18 +0000185 self._num_pixels_differing = (
186 whitediff_image.histogram()[VALUES_PER_BAND - 1])
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000187
188 def get_num_pixels_differing(self):
189 """Returns the absolute number of pixels that differ."""
190 return self._num_pixels_differing
191
192 def get_percent_pixels_differing(self):
193 """Returns the percentage of pixels that differ, as a float between
194 0 and 100 (inclusive)."""
195 return ((float(self._num_pixels_differing) * 100) /
196 (self._width * self._height))
197
commit-bot@chromium.org44546f82014-02-11 18:21:26 +0000198 def get_perceptual_difference(self):
199 """Returns the perceptual difference percentage."""
200 return self._perceptual_difference
201
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000202 def get_weighted_diff_measure(self):
203 """Returns a weighted measure of image diffs, as a float between 0 and 100
commit-bot@chromium.org4d0f0082014-02-18 14:38:22 +0000204 (inclusive).
205
206 TODO(epoger): Delete this function, now that we have perceptual diff?
207 """
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000208 return self._weighted_diff_measure
209
epoger@google.com214a0242013-11-22 19:26:18 +0000210 def get_max_diff_per_channel(self):
211 """Returns the maximum difference between the expected and actual images
212 for each R/G/B channel, as a list."""
213 return self._max_diff_per_channel
214
commit-bot@chromium.org9985ef52014-02-10 18:19:30 +0000215 def as_dict(self):
216 """Returns a dictionary representation of this DiffRecord, as needed when
217 constructing the JSON representation."""
218 return {
commit-bot@chromium.org16f41802014-02-26 19:05:20 +0000219 KEY__DIFFERENCE_DATA__NUM_DIFF_PIXELS: self._num_pixels_differing,
220 KEY__DIFFERENCE_DATA__PERCENT_DIFF_PIXELS:
221 self.get_percent_pixels_differing(),
222 KEY__DIFFERENCE_DATA__WEIGHTED_DIFF: self.get_weighted_diff_measure(),
223 KEY__DIFFERENCE_DATA__MAX_DIFF_PER_CHANNEL: self._max_diff_per_channel,
224 KEY__DIFFERENCE_DATA__PERCEPTUAL_DIFF: self._perceptual_difference,
commit-bot@chromium.org9985ef52014-02-10 18:19:30 +0000225 }
226
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000227
228class ImageDiffDB(object):
229 """ Calculates differences between image pairs, maintaining a database of
230 them for download."""
231
commit-bot@chromium.org280ea822014-04-14 18:15:29 +0000232 def __init__(self, storage_root, num_workers=DEFAULT_NUM_WORKERS):
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000233 """
234 Args:
235 storage_root: string; root path within the DB will store all of its stuff
commit-bot@chromium.org280ea822014-04-14 18:15:29 +0000236 num_workers: integer; number of worker threads to spawn
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000237 """
238 self._storage_root = storage_root
239
240 # Dictionary of DiffRecords, keyed by (expected_image_locator,
241 # actual_image_locator) tuples.
commit-bot@chromium.org280ea822014-04-14 18:15:29 +0000242 # Values can also be DIFFRECORD_PENDING, DIFFRECORD_FAILED.
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000243 self._diff_dict = {}
244
commit-bot@chromium.org280ea822014-04-14 18:15:29 +0000245 # Set up the queue for asynchronously loading DiffRecords, and start the
246 # worker threads reading from it.
247 self._tasks_queue = Queue.Queue(maxsize=2*num_workers)
248 self._workers = []
249 for i in range(num_workers):
250 worker = threading.Thread(target=self.worker, args=(i,))
251 worker.daemon = True
252 worker.start()
253 self._workers.append(worker)
254
255 def worker(self, worker_num):
256 """Launch a worker thread that pulls tasks off self._tasks_queue.
257
258 Args:
259 worker_num: (integer) which worker this is
260 """
261 while True:
262 params = self._tasks_queue.get()
263 key, expected_image_url, actual_image_url = params
264 try:
265 diff_record = DiffRecord(
266 self._storage_root,
267 expected_image_url=expected_image_url,
268 expected_image_locator=key[0],
269 actual_image_url=actual_image_url,
270 actual_image_locator=key[1])
271 except Exception:
272 logging.exception(
273 'exception while creating DiffRecord for key %s' % str(key))
274 diff_record = DIFFRECORD_FAILED
275 self._diff_dict[key] = diff_record
276
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000277 def add_image_pair(self,
278 expected_image_url, expected_image_locator,
279 actual_image_url, actual_image_locator):
280 """Download this pair of images (unless we already have them on local disk),
281 and prepare a DiffRecord for them.
282
commit-bot@chromium.org280ea822014-04-14 18:15:29 +0000283 This method will block until the images are downloaded and DiffRecord is
284 available by calling get_diff_record().
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000285
286 Args:
287 expected_image_url: file or HTTP url from which we will download the
288 expected image
289 expected_image_locator: a unique ID string under which we will store the
290 expected image within storage_root (probably including a checksum to
291 guarantee uniqueness)
292 actual_image_url: file or HTTP url from which we will download the
293 actual image
294 actual_image_locator: a unique ID string under which we will store the
295 actual image within storage_root (probably including a checksum to
296 guarantee uniqueness)
commit-bot@chromium.org280ea822014-04-14 18:15:29 +0000297
298 Raises:
299 Exception if we are unable to create a DiffRecord for this image pair.
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000300 """
commit-bot@chromium.org280ea822014-04-14 18:15:29 +0000301 key = _generate_key(expected_image_locator, actual_image_locator)
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000302 if not key in self._diff_dict:
303 try:
304 new_diff_record = DiffRecord(
305 self._storage_root,
306 expected_image_url=expected_image_url,
307 expected_image_locator=expected_image_locator,
308 actual_image_url=actual_image_url,
309 actual_image_locator=actual_image_locator)
commit-bot@chromium.orga47e7ac2013-12-19 20:01:34 +0000310 except Exception:
commit-bot@chromium.org68449582014-04-01 22:16:33 +0000311 # If we can't create a real DiffRecord for this (expected, actual) pair,
312 # store None and the UI will show whatever information we DO have.
313 # Fixes http://skbug.com/2368 .
314 logging.exception(
315 'got exception while creating a DiffRecord for '
316 'expected_image_url=%s , actual_image_url=%s; returning None' % (
317 expected_image_url, actual_image_url))
318 new_diff_record = None
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000319 self._diff_dict[key] = new_diff_record
320
commit-bot@chromium.org280ea822014-04-14 18:15:29 +0000321 def add_image_pair_async(self,
322 expected_image_url, expected_image_locator,
323 actual_image_url, actual_image_locator):
324 """Download this pair of images (unless we already have them on local disk),
325 and prepare a DiffRecord for them.
326
327 This method will return quickly; calls to get_diff_record() will block
328 until the DiffRecord is available (or we have given up on creating it).
329
330 Args:
331 expected_image_url: file or HTTP url from which we will download the
332 expected image
333 expected_image_locator: a unique ID string under which we will store the
334 expected image within storage_root (probably including a checksum to
335 guarantee uniqueness)
336 actual_image_url: file or HTTP url from which we will download the
337 actual image
338 actual_image_locator: a unique ID string under which we will store the
339 actual image within storage_root (probably including a checksum to
340 guarantee uniqueness)
341 """
342 key = _generate_key(expected_image_locator, actual_image_locator)
343 if not key in self._diff_dict:
344 # If we have already requested a diff between these two images,
345 # we don't need to request it again.
346 #
347 # Threading note: If multiple threads called into this method with the
348 # same key at the same time, there will be multiple tasks on the queue
349 # with the same key. But that's OK; they will both complete successfully,
350 # and just waste a little time in the process. Nothing will break.
351 self._diff_dict[key] = DIFFRECORD_PENDING
352 self._tasks_queue.put((key, expected_image_url, actual_image_url))
353
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000354 def get_diff_record(self, expected_image_locator, actual_image_locator):
355 """Returns the DiffRecord for this image pair.
356
commit-bot@chromium.org280ea822014-04-14 18:15:29 +0000357 Args:
358 expected_image_locator: a unique ID string under which we will store the
359 expected image within storage_root (probably including a checksum to
360 guarantee uniqueness)
361 actual_image_locator: a unique ID string under which we will store the
362 actual image within storage_root (probably including a checksum to
363 guarantee uniqueness)
364
365 Returns the DiffRecord for this image pair, or None if we were unable to
366 generate one.
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000367 """
commit-bot@chromium.org280ea822014-04-14 18:15:29 +0000368 key = _generate_key(expected_image_locator, actual_image_locator)
369 diff_record = self._diff_dict[key]
370
371 # If we have no results yet, block until we do.
372 while diff_record == DIFFRECORD_PENDING:
373 time.sleep(1)
374 diff_record = self._diff_dict[key]
375
376 # Once we have the result...
377 if diff_record == DIFFRECORD_FAILED:
378 logging.error(
379 'failed to create a DiffRecord for expected_image_locator=%s , '
380 'actual_image_locator=%s' % (
381 expected_image_locator, actual_image_locator))
382 return None
383 else:
384 return diff_record
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000385
386
387# Utility functions
388
epoger@google.com214a0242013-11-22 19:26:18 +0000389def _calculate_weighted_diff_metric(histogram, num_pixels):
390 """Given the histogram of a diff image (per-channel diff at each
391 pixel between two images), calculate the weighted diff metric (a
392 stab at how different the two images really are).
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000393
commit-bot@chromium.org4d0f0082014-02-18 14:38:22 +0000394 TODO(epoger): Delete this function, now that we have perceptual diff?
395
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000396 Args:
epoger@google.com214a0242013-11-22 19:26:18 +0000397 histogram: PIL histogram of a per-channel diff between two images
398 num_pixels: integer; the total number of pixels in the diff image
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000399
400 Returns: a weighted diff metric, as a float between 0 and 100 (inclusive).
401 """
epoger@google.com214a0242013-11-22 19:26:18 +0000402 # TODO(epoger): As a wild guess at an appropriate metric, weight each
403 # different pixel by the square of its delta value. (The more different
404 # a pixel is from its expectation, the more we care about it.)
epoger@google.com214a0242013-11-22 19:26:18 +0000405 assert(len(histogram) % VALUES_PER_BAND == 0)
406 num_bands = len(histogram) / VALUES_PER_BAND
407 max_diff = num_pixels * num_bands * (VALUES_PER_BAND - 1)**2
408 total_diff = 0
409 for index in xrange(len(histogram)):
410 total_diff += histogram[index] * (index % VALUES_PER_BAND)**2
411 return float(100 * total_diff) / max_diff
412
commit-bot@chromium.org16f41802014-02-26 19:05:20 +0000413
epoger@google.com214a0242013-11-22 19:26:18 +0000414def _max_per_band(histogram):
415 """Given the histogram of an image, return the maximum value of each band
416 (a.k.a. "color channel", such as R/G/B) across the entire image.
417
418 Args:
419 histogram: PIL histogram
420
421 Returns the maximum value of each band within the image histogram, as a list.
422 """
423 max_per_band = []
424 assert(len(histogram) % VALUES_PER_BAND == 0)
425 num_bands = len(histogram) / VALUES_PER_BAND
426 for band in xrange(num_bands):
427 # Assuming that VALUES_PER_BAND is 256...
428 # the 'R' band makes up indices 0-255 in the histogram,
429 # the 'G' band makes up indices 256-511 in the histogram,
430 # etc.
431 min_index = band * VALUES_PER_BAND
432 index = min_index + VALUES_PER_BAND
433 while index > min_index:
434 index -= 1
435 if histogram[index] > 0:
436 max_per_band.append(index - min_index)
437 break
438 return max_per_band
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000439
commit-bot@chromium.org16f41802014-02-26 19:05:20 +0000440
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000441def _generate_image_diff(image1, image2):
442 """Wrapper for ImageChops.difference(image1, image2) that will handle some
443 errors automatically, or at least yield more useful error messages.
444
445 TODO(epoger): Currently, some of the images generated by the bots are RGBA
446 and others are RGB. I'm not sure why that is. For now, to avoid confusion
447 within the UI, convert all to RGB when diffing.
448
449 Args:
450 image1: a PIL image object
451 image2: a PIL image object
452
453 Returns: per-pixel diffs between image1 and image2, as a PIL image object
454 """
455 try:
456 return ImageChops.difference(image1.convert('RGB'), image2.convert('RGB'))
457 except ValueError:
458 logging.error('Error diffing image1 [%s] and image2 [%s].' % (
459 repr(image1), repr(image2)))
460 raise
461
commit-bot@chromium.org16f41802014-02-26 19:05:20 +0000462
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000463def _download_and_open_image(local_filepath, url):
464 """Open the image at local_filepath; if there is no file at that path,
465 download it from url to that path and then open it.
466
467 Args:
468 local_filepath: path on local disk where the image should be stored
469 url: URL from which we can download the image if we don't have it yet
470
471 Returns: a PIL image object
472 """
commit-bot@chromium.org280ea822014-04-14 18:15:29 +0000473 global global_file_collisions
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000474 if not os.path.exists(local_filepath):
475 _mkdir_unless_exists(os.path.dirname(local_filepath))
476 with contextlib.closing(urllib.urlopen(url)) as url_handle:
commit-bot@chromium.org280ea822014-04-14 18:15:29 +0000477
478 # First download the file contents into a unique filename, and
479 # then rename that file. That way, if multiple threads are downloading
480 # the same filename at the same time, they won't interfere with each
481 # other (they will both download the file, and one will "win" in the end)
482 temp_filename = '%s-%d' % (local_filepath,
483 threading.current_thread().ident)
484 with open(temp_filename, 'wb') as file_handle:
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000485 shutil.copyfileobj(fsrc=url_handle, fdst=file_handle)
commit-bot@chromium.org280ea822014-04-14 18:15:29 +0000486
487 # Keep count of how many colliding downloads we encounter;
488 # if it's a large number, we may want to change our download strategy
489 # to minimize repeated downloads.
490 if os.path.exists(local_filepath):
491 global_file_collisions += 1
492 else:
493 os.rename(temp_filename, local_filepath)
494
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000495 return _open_image(local_filepath)
496
commit-bot@chromium.org16f41802014-02-26 19:05:20 +0000497
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000498def _open_image(filepath):
499 """Wrapper for Image.open(filepath) that yields more useful error messages.
500
501 Args:
502 filepath: path on local disk to load image from
503
504 Returns: a PIL image object
505 """
506 try:
507 return Image.open(filepath)
508 except IOError:
commit-bot@chromium.orgda0ceb22014-03-26 13:38:29 +0000509 # If we are unable to load an image from the file, delete it from disk
510 # and we will try to fetch it again next time. Fixes http://skbug.com/2247
511 logging.error('IOError loading image file %s ; deleting it.' % filepath)
512 os.remove(filepath)
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000513 raise
514
commit-bot@chromium.org16f41802014-02-26 19:05:20 +0000515
epoger@google.com214a0242013-11-22 19:26:18 +0000516def _save_image(image, filepath, format='PNG'):
517 """Write an image to disk, creating any intermediate directories as needed.
518
519 Args:
520 image: a PIL image object
521 filepath: path on local disk to write image to
522 format: one of the PIL image formats, listed at
523 http://effbot.org/imagingbook/formats.htm
524 """
525 _mkdir_unless_exists(os.path.dirname(filepath))
526 image.save(filepath, format)
527
commit-bot@chromium.org16f41802014-02-26 19:05:20 +0000528
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000529def _mkdir_unless_exists(path):
530 """Unless path refers to an already-existing directory, create it.
531
532 Args:
533 path: path on local disk
534 """
commit-bot@chromium.org280ea822014-04-14 18:15:29 +0000535 try:
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000536 os.makedirs(path)
commit-bot@chromium.org280ea822014-04-14 18:15:29 +0000537 except OSError as e:
538 if e.errno == errno.EEXIST:
539 pass
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000540
commit-bot@chromium.org16f41802014-02-26 19:05:20 +0000541
commit-bot@chromium.org9985ef52014-02-10 18:19:30 +0000542def _sanitize_locator(locator):
543 """Returns a sanitized version of a locator (one in which we know none of the
544 characters will have special meaning in filenames).
545
546 Args:
547 locator: string, or something that can be represented as a string
548 """
549 return DISALLOWED_FILEPATH_CHAR_REGEX.sub('_', str(locator))
550
commit-bot@chromium.org16f41802014-02-26 19:05:20 +0000551
commit-bot@chromium.org280ea822014-04-14 18:15:29 +0000552def _generate_key(expected_image_locator, actual_image_locator):
553 """Returns a key suitable for looking up this image pair.
554
555 Args:
556 expected_image_locator: a unique ID string under which we will store the
557 expected image within storage_root (probably including a checksum to
558 guarantee uniqueness)
559 actual_image_locator: a unique ID string under which we will store the
560 actual image within storage_root (probably including a checksum to
561 guarantee uniqueness)
562 """
563 return (_sanitize_locator(expected_image_locator),
564 _sanitize_locator(actual_image_locator))
565
566
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000567def _get_difference_locator(expected_image_locator, actual_image_locator):
568 """Returns the locator string used to look up the diffs between expected_image
569 and actual_image.
570
commit-bot@chromium.org16f41802014-02-26 19:05:20 +0000571 We must keep this function in sync with getImageDiffRelativeUrl() in
572 static/loader.js
573
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000574 Args:
575 expected_image_locator: locator string pointing at expected image
576 actual_image_locator: locator string pointing at actual image
577
commit-bot@chromium.org9985ef52014-02-10 18:19:30 +0000578 Returns: already-sanitized locator where the diffs between expected and
579 actual images can be found
epoger@google.com9dddf6f2013-11-08 16:25:25 +0000580 """
commit-bot@chromium.org9985ef52014-02-10 18:19:30 +0000581 return "%s-vs-%s" % (_sanitize_locator(expected_image_locator),
582 _sanitize_locator(actual_image_locator))