Blame - gm/rebaseline_server/imagediffdb.py - platform/external/skia

blob: 0bc75cfca4d9c9dce9a47759dddb8ec99aa6383a [file] [log] [blame]

epoger@google.com	9dddf6f	2013-11-08 16:25:25 +0000	[diff] [blame]	1	#!/usr/bin/python
				2
				3	"""
				4	Copyright 2013 Google Inc.
				5
				6	Use of this source code is governed by a BSD-style license that can be
				7	found in the LICENSE file.
				8
				9	Calulate differences between image pairs, and store them in a database.
				10	"""
				11
epoger	66ed8dc	2014-07-17 12:54:16 -0700	[diff] [blame]	12	# System-level imports
epoger@google.com	9dddf6f	2013-11-08 16:25:25 +0000	[diff] [blame]	13	import contextlib
epoger	0b71276	2014-08-05 10:07:22 -0700	[diff] [blame]	14	import errno
epoger	54f1ad8	2014-07-02 07:43:04 -0700	[diff] [blame]	15	import json
epoger@google.com	9dddf6f	2013-11-08 16:25:25 +0000	[diff] [blame]	16	import logging
				17	import os
epoger	0b71276	2014-08-05 10:07:22 -0700	[diff] [blame]	18	import Queue
commit-bot@chromium.org	9985ef5	2014-02-10 18:19:30 +0000	[diff] [blame]	19	import re
epoger@google.com	9dddf6f	2013-11-08 16:25:25 +0000	[diff] [blame]	20	import shutil
commit-bot@chromium.org	44546f8	2014-02-11 18:21:26 +0000	[diff] [blame]	21	import tempfile
epoger	0b71276	2014-08-05 10:07:22 -0700	[diff] [blame]	22	import threading
				23	import time
epoger@google.com	9dddf6f	2013-11-08 16:25:25 +0000	[diff] [blame]	24	import urllib
epoger@google.com	9dddf6f	2013-11-08 16:25:25 +0000	[diff] [blame]	25
epoger	66ed8dc	2014-07-17 12:54:16 -0700	[diff] [blame]	26	# Must fix up PYTHONPATH before importing from within Skia
stephana	3b5c86c	2014-08-18 13:37:59 -0700	[diff] [blame]	27	import rs_fixpypath # pylint: disable=W0611
epoger	66ed8dc	2014-07-17 12:54:16 -0700	[diff] [blame]	28
				29	# Imports from within Skia
commit-bot@chromium.org	44546f8	2014-02-11 18:21:26 +0000	[diff] [blame]	30	import find_run_binary
epoger	0b71276	2014-08-05 10:07:22 -0700	[diff] [blame]	31	from py.utils import gs_utils
				32
commit-bot@chromium.org	44546f8	2014-02-11 18:21:26 +0000	[diff] [blame]	33
commit-bot@chromium.org	4d0f008	2014-02-18 14:38:22 +0000	[diff] [blame]	34	SKPDIFF_BINARY = find_run_binary.find_path_to_program('skpdiff')
commit-bot@chromium.org	44546f8	2014-02-11 18:21:26 +0000	[diff] [blame]	35
rmistry@google.com	5861e52	2013-12-21 19:07:40 +0000	[diff] [blame]	36	DEFAULT_IMAGE_SUFFIX = '.png'
				37	DEFAULT_IMAGES_SUBDIR = 'images'
epoger	0b71276	2014-08-05 10:07:22 -0700	[diff] [blame]	38	# TODO(epoger): Figure out a better default number of threads; for now,
				39	# using a conservative default value.
				40	DEFAULT_NUM_WORKER_THREADS = 1
epoger@google.com	9dddf6f	2013-11-08 16:25:25 +0000	[diff] [blame]	41
commit-bot@chromium.org	9985ef5	2014-02-10 18:19:30 +0000	[diff] [blame]	42	DISALLOWED_FILEPATH_CHAR_REGEX = re.compile('[^\w\-]')
				43
epoger	54f1ad8	2014-07-02 07:43:04 -0700	[diff] [blame]	44	RGBDIFFS_SUBDIR = 'diffs'
epoger@google.com	9dddf6f	2013-11-08 16:25:25 +0000	[diff] [blame]	45	WHITEDIFFS_SUBDIR = 'whitediffs'
				46
commit-bot@chromium.org	16f4180	2014-02-26 19:05:20 +0000	[diff] [blame]	47	# Keys used within DiffRecord dictionary representations.
				48	# NOTE: Keep these in sync with static/constants.js
commit-bot@chromium.org	68a3815	2014-05-12 20:40:29 +0000	[diff] [blame]	49	KEY__DIFFERENCES__MAX_DIFF_PER_CHANNEL = 'maxDiffPerChannel'
				50	KEY__DIFFERENCES__NUM_DIFF_PIXELS = 'numDifferingPixels'
				51	KEY__DIFFERENCES__PERCENT_DIFF_PIXELS = 'percentDifferingPixels'
				52	KEY__DIFFERENCES__PERCEPTUAL_DIFF = 'perceptualDifference'
stephana	a1aa5c2	2014-08-15 06:53:23 -0700	[diff] [blame]	53	KEY__DIFFERENCES__DIFF_URL = 'diffUrl'
				54	KEY__DIFFERENCES__WHITE_DIFF_URL = 'whiteDiffUrl'
commit-bot@chromium.org	16f4180	2014-02-26 19:05:20 +0000	[diff] [blame]	55
epoger	0b71276	2014-08-05 10:07:22 -0700	[diff] [blame]	56	# Special values within ImageDiffDB._diff_dict
				57	_DIFFRECORD_FAILED = 'failed'
				58	_DIFFRECORD_PENDING = 'pending'
				59
epoger	9c7695b	2014-08-12 08:30:09 -0700	[diff] [blame]	60	# How often to report tasks_queue size
				61	QUEUE_LOGGING_GRANULARITY = 1000
				62
epoger	0b71276	2014-08-05 10:07:22 -0700	[diff] [blame]	63	# Temporary variable to keep track of how many times we download
				64	# the same file in multiple threads.
				65	# TODO(epoger): Delete this, once we see that the number stays close to 0.
				66	global_file_collisions = 0
				67
epoger@google.com	9dddf6f	2013-11-08 16:25:25 +0000	[diff] [blame]	68
				69	class DiffRecord(object):
				70	""" Record of differences between two images. """
				71
epoger	0b71276	2014-08-05 10:07:22 -0700	[diff] [blame]	72	def __init__(self, gs, storage_root,
epoger@google.com	9dddf6f	2013-11-08 16:25:25 +0000	[diff] [blame]	73	expected_image_url, expected_image_locator,
rmistry@google.com	5861e52	2013-12-21 19:07:40 +0000	[diff] [blame]	74	actual_image_url, actual_image_locator,
				75	expected_images_subdir=DEFAULT_IMAGES_SUBDIR,
				76	actual_images_subdir=DEFAULT_IMAGES_SUBDIR,
				77	image_suffix=DEFAULT_IMAGE_SUFFIX):
epoger@google.com	9dddf6f	2013-11-08 16:25:25 +0000	[diff] [blame]	78	"""Download this pair of images (unless we already have them on local disk),
				79	and prepare a DiffRecord for them.
				80
epoger@google.com	9dddf6f	2013-11-08 16:25:25 +0000	[diff] [blame]	81	Args:
epoger	0b71276	2014-08-05 10:07:22 -0700	[diff] [blame]	82	gs: instance of GSUtils object we can use to download images
epoger@google.com	9dddf6f	2013-11-08 16:25:25 +0000	[diff] [blame]	83	storage_root: root directory on local disk within which we store all
				84	images
epoger	0b71276	2014-08-05 10:07:22 -0700	[diff] [blame]	85	expected_image_url: file, GS, or HTTP url from which we will download the
epoger@google.com	9dddf6f	2013-11-08 16:25:25 +0000	[diff] [blame]	86	expected image
				87	expected_image_locator: a unique ID string under which we will store the
				88	expected image within storage_root (probably including a checksum to
				89	guarantee uniqueness)
epoger	0b71276	2014-08-05 10:07:22 -0700	[diff] [blame]	90	actual_image_url: file, GS, or HTTP url from which we will download the
epoger@google.com	9dddf6f	2013-11-08 16:25:25 +0000	[diff] [blame]	91	actual image
				92	actual_image_locator: a unique ID string under which we will store the
				93	actual image within storage_root (probably including a checksum to
				94	guarantee uniqueness)
rmistry@google.com	5861e52	2013-12-21 19:07:40 +0000	[diff] [blame]	95	expected_images_subdir: the subdirectory expected images are stored in.
				96	actual_images_subdir: the subdirectory actual images are stored in.
				97	image_suffix: the suffix of images.
epoger@google.com	9dddf6f	2013-11-08 16:25:25 +0000	[diff] [blame]	98	"""
commit-bot@chromium.org	9985ef5	2014-02-10 18:19:30 +0000	[diff] [blame]	99	expected_image_locator = _sanitize_locator(expected_image_locator)
				100	actual_image_locator = _sanitize_locator(actual_image_locator)
				101
epoger@google.com	9dddf6f	2013-11-08 16:25:25 +0000	[diff] [blame]	102	# Download the expected/actual images, if we don't have them already.
commit-bot@chromium.org	8cc39a6	2014-03-04 16:46:22 +0000	[diff] [blame]	103	expected_image_file = os.path.join(
				104	storage_root, expected_images_subdir,
				105	str(expected_image_locator) + image_suffix)
				106	actual_image_file = os.path.join(
				107	storage_root, actual_images_subdir,
				108	str(actual_image_locator) + image_suffix)
epoger	3facc7c	2014-08-06 10:56:50 -0700	[diff] [blame]	109	for image_file, image_url in [
				110	(expected_image_file, expected_image_url),
				111	(actual_image_file, actual_image_url)]:
				112	if image_file and image_url:
				113	try:
				114	_download_file(gs, image_file, image_url)
				115	except Exception:
				116	logging.exception('unable to download image_url %s to file %s' %
				117	(image_url, image_file))
				118	raise
epoger@google.com	9dddf6f	2013-11-08 16:25:25 +0000	[diff] [blame]	119
epoger	3facc7c	2014-08-06 10:56:50 -0700	[diff] [blame]	120	# Return early if we do not need to generate diffs.
				121	if (expected_image_url == actual_image_url or
				122	not expected_image_url or not actual_image_url):
				123	return
				124
				125	# Get all diff images and values using the skpdiff binary.
epoger	54f1ad8	2014-07-02 07:43:04 -0700	[diff] [blame]	126	skpdiff_output_dir = tempfile.mkdtemp()
commit-bot@chromium.org	44546f8	2014-02-11 18:21:26 +0000	[diff] [blame]	127	try:
epoger	54f1ad8	2014-07-02 07:43:04 -0700	[diff] [blame]	128	skpdiff_summary_file = os.path.join(skpdiff_output_dir,
				129	'skpdiff-output.json')
stephana	21b342d	2014-08-13 10:36:06 -0700	[diff] [blame]	130	skpdiff_rgbdiff_dir = os.path.join(storage_root, RGBDIFFS_SUBDIR)
				131	skpdiff_whitediff_dir = os.path.join(storage_root, WHITEDIFFS_SUBDIR)
				132	_mkdir_unless_exists(skpdiff_rgbdiff_dir)
				133	_mkdir_unless_exists(skpdiff_rgbdiff_dir)
epoger	54f1ad8	2014-07-02 07:43:04 -0700	[diff] [blame]	134
epoger	0b71276	2014-08-05 10:07:22 -0700	[diff] [blame]	135	# TODO(epoger): Consider calling skpdiff ONCE for all image pairs,
				136	# instead of calling it separately for each image pair.
				137	# Pro: we'll incur less overhead from making repeated system calls,
				138	# spinning up the skpdiff binary, etc.
				139	# Con: we would have to wait until all image pairs were loaded before
				140	# generating any of the diffs?
stephana	21b342d	2014-08-13 10:36:06 -0700	[diff] [blame]	141	# Note(stephana): '--longnames' was added to allow for this
				142	# case (multiple files at once) versus specifying output diffs
				143	# directly.
commit-bot@chromium.org	44546f8	2014-02-11 18:21:26 +0000	[diff] [blame]	144	find_run_binary.run_command(
epoger	3facc7c	2014-08-06 10:56:50 -0700	[diff] [blame]	145	[SKPDIFF_BINARY, '-p', expected_image_file, actual_image_file,
epoger	54f1ad8	2014-07-02 07:43:04 -0700	[diff] [blame]	146	'--jsonp', 'false',
stephana	21b342d	2014-08-13 10:36:06 -0700	[diff] [blame]	147	'--longnames', 'true',
epoger	54f1ad8	2014-07-02 07:43:04 -0700	[diff] [blame]	148	'--output', skpdiff_summary_file,
				149	'--differs', 'perceptual', 'different_pixels',
				150	'--rgbDiffDir', skpdiff_rgbdiff_dir,
				151	'--whiteDiffDir', skpdiff_whitediff_dir,
				152	])
				153
				154	# Get information out of the skpdiff_summary_file.
				155	with contextlib.closing(open(skpdiff_summary_file)) as fp:
				156	data = json.load(fp)
				157
				158	# For now, we can assume there is only one record in the output summary,
				159	# since we passed skpdiff only one pair of images.
				160	record = data['records'][0]
				161	self._width = record['width']
				162	self._height = record['height']
stephana	a1aa5c2	2014-08-15 06:53:23 -0700	[diff] [blame]	163	self._diffUrl = os.path.split(record['rgbDiffPath'])[1]
				164	self._whiteDiffUrl = os.path.split(record['whiteDiffPath'])[1]
				165
epoger	54f1ad8	2014-07-02 07:43:04 -0700	[diff] [blame]	166	# TODO: make max_diff_per_channel a tuple instead of a list, because the
				167	# structure is meaningful (first element is red, second is green, etc.)
				168	# See http://stackoverflow.com/a/626871
				169	self._max_diff_per_channel = [
				170	record['maxRedDiff'], record['maxGreenDiff'], record['maxBlueDiff']]
epoger	54f1ad8	2014-07-02 07:43:04 -0700	[diff] [blame]	171	per_differ_stats = record['diffs']
				172	for stats in per_differ_stats:
				173	differ_name = stats['differName']
				174	if differ_name == 'different_pixels':
				175	self._num_pixels_differing = stats['pointsOfInterest']
				176	elif differ_name == 'perceptual':
				177	perceptual_similarity = stats['result']
				178
				179	# skpdiff returns the perceptual similarity; convert it to get the
				180	# perceptual difference percentage.
				181	# skpdiff outputs -1 if the images are different sizes. Treat any
				182	# output that does not lie in [0, 1] as having 0% perceptual
				183	# similarity.
				184	if not 0 <= perceptual_similarity <= 1:
				185	perceptual_similarity = 0
				186	self._perceptual_difference = 100 - (perceptual_similarity * 100)
commit-bot@chromium.org	44546f8	2014-02-11 18:21:26 +0000	[diff] [blame]	187	finally:
epoger	54f1ad8	2014-07-02 07:43:04 -0700	[diff] [blame]	188	shutil.rmtree(skpdiff_output_dir)
epoger@google.com	9dddf6f	2013-11-08 16:25:25 +0000	[diff] [blame]	189
epoger	6132b43	2014-07-09 07:59:06 -0700	[diff] [blame]	190	# TODO(epoger): Use properties instead of getters throughout.
				191	# See http://stackoverflow.com/a/6618176
epoger@google.com	9dddf6f	2013-11-08 16:25:25 +0000	[diff] [blame]	192	def get_num_pixels_differing(self):
				193	"""Returns the absolute number of pixels that differ."""
				194	return self._num_pixels_differing
				195
				196	def get_percent_pixels_differing(self):
				197	"""Returns the percentage of pixels that differ, as a float between
				198	0 and 100 (inclusive)."""
				199	return ((float(self._num_pixels_differing) * 100) /
				200	(self._width * self._height))
				201
commit-bot@chromium.org	44546f8	2014-02-11 18:21:26 +0000	[diff] [blame]	202	def get_perceptual_difference(self):
				203	"""Returns the perceptual difference percentage."""
				204	return self._perceptual_difference
				205
epoger@google.com	214a024	2013-11-22 19:26:18 +0000	[diff] [blame]	206	def get_max_diff_per_channel(self):
				207	"""Returns the maximum difference between the expected and actual images
				208	for each R/G/B channel, as a list."""
				209	return self._max_diff_per_channel
				210
commit-bot@chromium.org	9985ef5	2014-02-10 18:19:30 +0000	[diff] [blame]	211	def as_dict(self):
				212	"""Returns a dictionary representation of this DiffRecord, as needed when
				213	constructing the JSON representation."""
				214	return {
commit-bot@chromium.org	68a3815	2014-05-12 20:40:29 +0000	[diff] [blame]	215	KEY__DIFFERENCES__NUM_DIFF_PIXELS: self._num_pixels_differing,
				216	KEY__DIFFERENCES__PERCENT_DIFF_PIXELS:
commit-bot@chromium.org	16f4180	2014-02-26 19:05:20 +0000	[diff] [blame]	217	self.get_percent_pixels_differing(),
commit-bot@chromium.org	68a3815	2014-05-12 20:40:29 +0000	[diff] [blame]	218	KEY__DIFFERENCES__MAX_DIFF_PER_CHANNEL: self._max_diff_per_channel,
				219	KEY__DIFFERENCES__PERCEPTUAL_DIFF: self._perceptual_difference,
stephana	a1aa5c2	2014-08-15 06:53:23 -0700	[diff] [blame]	220	KEY__DIFFERENCES__DIFF_URL: self._diffUrl,
				221	KEY__DIFFERENCES__WHITE_DIFF_URL: self._whiteDiffUrl,
commit-bot@chromium.org	9985ef5	2014-02-10 18:19:30 +0000	[diff] [blame]	222	}
				223
epoger@google.com	9dddf6f	2013-11-08 16:25:25 +0000	[diff] [blame]	224
stephana	a1aa5c2	2014-08-15 06:53:23 -0700	[diff] [blame]	225
epoger@google.com	9dddf6f	2013-11-08 16:25:25 +0000	[diff] [blame]	226	class ImageDiffDB(object):
				227	""" Calculates differences between image pairs, maintaining a database of
				228	them for download."""
				229
epoger	0b71276	2014-08-05 10:07:22 -0700	[diff] [blame]	230	def __init__(self, storage_root, gs=None,
				231	num_worker_threads=DEFAULT_NUM_WORKER_THREADS):
epoger@google.com	9dddf6f	2013-11-08 16:25:25 +0000	[diff] [blame]	232	"""
				233	Args:
				234	storage_root: string; root path within the DB will store all of its stuff
epoger	0b71276	2014-08-05 10:07:22 -0700	[diff] [blame]	235	gs: instance of GSUtils object we can use to download images
				236	num_worker_threads: how many threads that download images and
				237	generate diffs simultaneously
epoger@google.com	9dddf6f	2013-11-08 16:25:25 +0000	[diff] [blame]	238	"""
				239	self._storage_root = storage_root
epoger	0b71276	2014-08-05 10:07:22 -0700	[diff] [blame]	240	self._gs = gs
epoger@google.com	9dddf6f	2013-11-08 16:25:25 +0000	[diff] [blame]	241
epoger	9c7695b	2014-08-12 08:30:09 -0700	[diff] [blame]	242	# Mechanism for reporting queue size periodically.
				243	self._last_queue_size_reported = None
				244	self._queue_size_report_lock = threading.RLock()
				245
epoger@google.com	9dddf6f	2013-11-08 16:25:25 +0000	[diff] [blame]	246	# Dictionary of DiffRecords, keyed by (expected_image_locator,
				247	# actual_image_locator) tuples.
epoger	0b71276	2014-08-05 10:07:22 -0700	[diff] [blame]	248	# Values can also be _DIFFRECORD_PENDING, _DIFFRECORD_FAILED.
				249	#
				250	# Any thread that modifies _diff_dict must first acquire
				251	# _diff_dict_writelock!
				252	#
				253	# TODO(epoger): Disk is limitless, but RAM is not... so, we should probably
				254	# remove items from self._diff_dict if they haven't been accessed for a
				255	# long time. We can always regenerate them by diffing the images we
				256	# previously downloaded to local disk.
				257	# I guess we should figure out how expensive it is to download vs diff the
				258	# image pairs... if diffing them is expensive too, we can write these
				259	# _diff_dict objects out to disk if there's too many to hold in RAM.
				260	# Or we could use virtual memory to handle that automatically.
epoger@google.com	9dddf6f	2013-11-08 16:25:25 +0000	[diff] [blame]	261	self._diff_dict = {}
epoger	0b71276	2014-08-05 10:07:22 -0700	[diff] [blame]	262	self._diff_dict_writelock = threading.RLock()
				263
				264	# Set up the queue for asynchronously loading DiffRecords, and start the
				265	# worker threads reading from it.
epoger	3facc7c	2014-08-06 10:56:50 -0700	[diff] [blame]	266	# The queue maxsize must be 0 (infinite size queue), so that asynchronous
				267	# calls can return as soon as possible.
				268	self._tasks_queue = Queue.Queue(maxsize=0)
epoger	0b71276	2014-08-05 10:07:22 -0700	[diff] [blame]	269	self._workers = []
				270	for i in range(num_worker_threads):
				271	worker = threading.Thread(target=self.worker, args=(i,))
				272	worker.daemon = True
				273	worker.start()
				274	self._workers.append(worker)
				275
epoger	9c7695b	2014-08-12 08:30:09 -0700	[diff] [blame]	276	def log_queue_size_if_changed(self, limit_verbosity=True):
				277	"""Log the size of self._tasks_queue, if it has changed since the last call.
				278
				279	Reports the current queue size, using log.info(), unless the queue is the
				280	same size as the last time we reported it.
				281
				282	Args:
				283	limit_verbosity: if True, only log if the queue size is a multiple of
				284	QUEUE_LOGGING_GRANULARITY
				285	"""
				286	# Acquire the lock, to synchronize access to self._last_queue_size_reported
				287	self._queue_size_report_lock.acquire()
				288	try:
				289	size = self._tasks_queue.qsize()
				290	if size == self._last_queue_size_reported:
				291	return
				292	if limit_verbosity and (size % QUEUE_LOGGING_GRANULARITY != 0):
				293	return
				294	logging.info('tasks_queue size is %d' % size)
				295	self._last_queue_size_reported = size
				296	finally:
				297	self._queue_size_report_lock.release()
				298
epoger	0b71276	2014-08-05 10:07:22 -0700	[diff] [blame]	299	def worker(self, worker_num):
				300	"""Launch a worker thread that pulls tasks off self._tasks_queue.
				301
				302	Args:
				303	worker_num: (integer) which worker this is
				304	"""
				305	while True:
epoger	9c7695b	2014-08-12 08:30:09 -0700	[diff] [blame]	306	self.log_queue_size_if_changed()
epoger	0b71276	2014-08-05 10:07:22 -0700	[diff] [blame]	307	params = self._tasks_queue.get()
				308	key, expected_image_url, actual_image_url = params
				309	try:
				310	diff_record = DiffRecord(
				311	self._gs, self._storage_root,
				312	expected_image_url=expected_image_url,
				313	expected_image_locator=key[0],
				314	actual_image_url=actual_image_url,
				315	actual_image_locator=key[1])
				316	except Exception:
				317	logging.exception(
				318	'exception while creating DiffRecord for key %s' % str(key))
				319	diff_record = _DIFFRECORD_FAILED
				320	self._diff_dict_writelock.acquire()
				321	try:
				322	self._diff_dict[key] = diff_record
				323	finally:
				324	self._diff_dict_writelock.release()
epoger@google.com	9dddf6f	2013-11-08 16:25:25 +0000	[diff] [blame]	325
epoger	6132b43	2014-07-09 07:59:06 -0700	[diff] [blame]	326	@property
				327	def storage_root(self):
				328	return self._storage_root
				329
epoger@google.com	9dddf6f	2013-11-08 16:25:25 +0000	[diff] [blame]	330	def add_image_pair(self,
				331	expected_image_url, expected_image_locator,
				332	actual_image_url, actual_image_locator):
epoger	0b71276	2014-08-05 10:07:22 -0700	[diff] [blame]	333	"""Asynchronously prepare a DiffRecord for a pair of images.
epoger@google.com	9dddf6f	2013-11-08 16:25:25 +0000	[diff] [blame]	334
epoger	0b71276	2014-08-05 10:07:22 -0700	[diff] [blame]	335	This method will return quickly; calls to get_diff_record() will block
				336	until the DiffRecord is available (or we have given up on creating it).
				337
				338	If we already have a DiffRecord for this particular image pair, no work
				339	will be done.
epoger@google.com	9dddf6f	2013-11-08 16:25:25 +0000	[diff] [blame]	340
epoger	3facc7c	2014-08-06 10:56:50 -0700	[diff] [blame]	341	If expected_image_url (or its locator) is None, just download actual_image.
				342	If actual_image_url (or its locator) is None, just download expected_image.
				343
epoger@google.com	9dddf6f	2013-11-08 16:25:25 +0000	[diff] [blame]	344	Args:
epoger	0b71276	2014-08-05 10:07:22 -0700	[diff] [blame]	345	expected_image_url: file, GS, or HTTP url from which we will download the
epoger@google.com	9dddf6f	2013-11-08 16:25:25 +0000	[diff] [blame]	346	expected image
				347	expected_image_locator: a unique ID string under which we will store the
				348	expected image within storage_root (probably including a checksum to
				349	guarantee uniqueness)
epoger	0b71276	2014-08-05 10:07:22 -0700	[diff] [blame]	350	actual_image_url: file, GS, or HTTP url from which we will download the
epoger@google.com	9dddf6f	2013-11-08 16:25:25 +0000	[diff] [blame]	351	actual image
				352	actual_image_locator: a unique ID string under which we will store the
				353	actual image within storage_root (probably including a checksum to
				354	guarantee uniqueness)
				355	"""
commit-bot@chromium.org	c9b511f	2014-04-15 18:50:12 +0000	[diff] [blame]	356	expected_image_locator = _sanitize_locator(expected_image_locator)
				357	actual_image_locator = _sanitize_locator(actual_image_locator)
				358	key = (expected_image_locator, actual_image_locator)
epoger	0b71276	2014-08-05 10:07:22 -0700	[diff] [blame]	359	must_add_to_queue = False
				360
				361	self._diff_dict_writelock.acquire()
				362	try:
				363	if not key in self._diff_dict:
				364	# If we have already requested a diff between these two images,
				365	# we don't need to request it again.
				366	must_add_to_queue = True
				367	self._diff_dict[key] = _DIFFRECORD_PENDING
				368	finally:
				369	self._diff_dict_writelock.release()
				370
				371	if must_add_to_queue:
				372	self._tasks_queue.put((key, expected_image_url, actual_image_url))
epoger	9c7695b	2014-08-12 08:30:09 -0700	[diff] [blame]	373	self.log_queue_size_if_changed()
epoger@google.com	9dddf6f	2013-11-08 16:25:25 +0000	[diff] [blame]	374
				375	def get_diff_record(self, expected_image_locator, actual_image_locator):
				376	"""Returns the DiffRecord for this image pair.
				377
epoger	0b71276	2014-08-05 10:07:22 -0700	[diff] [blame]	378	This call will block until the diff record is available, or we were unable
				379	to generate it.
				380
				381	Args:
				382	expected_image_locator: a unique ID string under which we will store the
				383	expected image within storage_root (probably including a checksum to
				384	guarantee uniqueness)
				385	actual_image_locator: a unique ID string under which we will store the
				386	actual image within storage_root (probably including a checksum to
				387	guarantee uniqueness)
				388
				389	Returns the DiffRecord for this image pair, or None if we were unable to
				390	generate one.
epoger@google.com	9dddf6f	2013-11-08 16:25:25 +0000	[diff] [blame]	391	"""
commit-bot@chromium.org	c9b511f	2014-04-15 18:50:12 +0000	[diff] [blame]	392	key = (_sanitize_locator(expected_image_locator),
				393	_sanitize_locator(actual_image_locator))
epoger	0b71276	2014-08-05 10:07:22 -0700	[diff] [blame]	394	diff_record = self._diff_dict[key]
				395
				396	# If we have no results yet, block until we do.
				397	while diff_record == _DIFFRECORD_PENDING:
				398	time.sleep(1)
				399	diff_record = self._diff_dict[key]
				400
				401	# Once we have the result...
				402	if diff_record == _DIFFRECORD_FAILED:
				403	logging.error(
				404	'failed to create a DiffRecord for expected_image_locator=%s , '
				405	'actual_image_locator=%s' % (
				406	expected_image_locator, actual_image_locator))
				407	return None
				408	else:
				409	return diff_record
epoger@google.com	9dddf6f	2013-11-08 16:25:25 +0000	[diff] [blame]	410
				411
				412	# Utility functions
				413
epoger	0b71276	2014-08-05 10:07:22 -0700	[diff] [blame]	414	def _download_file(gs, local_filepath, url):
epoger	54f1ad8	2014-07-02 07:43:04 -0700	[diff] [blame]	415	"""Download a file from url to local_filepath, unless it is already there.
epoger@google.com	9dddf6f	2013-11-08 16:25:25 +0000	[diff] [blame]	416
				417	Args:
epoger	0b71276	2014-08-05 10:07:22 -0700	[diff] [blame]	418	gs: instance of GSUtils object, in case the url points at Google Storage
epoger@google.com	9dddf6f	2013-11-08 16:25:25 +0000	[diff] [blame]	419	local_filepath: path on local disk where the image should be stored
epoger	0b71276	2014-08-05 10:07:22 -0700	[diff] [blame]	420	url: HTTP or GS URL from which we can download the image if we don't have
				421	it yet
epoger@google.com	9dddf6f	2013-11-08 16:25:25 +0000	[diff] [blame]	422	"""
epoger	0b71276	2014-08-05 10:07:22 -0700	[diff] [blame]	423	global global_file_collisions
epoger@google.com	9dddf6f	2013-11-08 16:25:25 +0000	[diff] [blame]	424	if not os.path.exists(local_filepath):
				425	_mkdir_unless_exists(os.path.dirname(local_filepath))
epoger	0b71276	2014-08-05 10:07:22 -0700	[diff] [blame]	426
				427	# First download the file contents into a unique filename, and
				428	# then rename that file. That way, if multiple threads are downloading
				429	# the same filename at the same time, they won't interfere with each
				430	# other (they will both download the file, and one will "win" in the end)
				431	temp_filename = '%s-%d' % (local_filepath,
				432	threading.current_thread().ident)
				433	if gs_utils.GSUtils.is_gs_url(url):
				434	(bucket, path) = gs_utils.GSUtils.split_gs_url(url)
				435	gs.download_file(source_bucket=bucket, source_path=path,
				436	dest_path=temp_filename)
				437	else:
				438	with contextlib.closing(urllib.urlopen(url)) as url_handle:
				439	with open(temp_filename, 'wb') as file_handle:
				440	shutil.copyfileobj(fsrc=url_handle, fdst=file_handle)
				441
				442	# Rename the file to its real filename.
				443	# Keep count of how many colliding downloads we encounter;
				444	# if it's a large number, we may want to change our download strategy
				445	# to minimize repeated downloads.
				446	if os.path.exists(local_filepath):
				447	global_file_collisions += 1
				448	else:
				449	os.rename(temp_filename, local_filepath)
epoger@google.com	214a024	2013-11-22 19:26:18 +0000	[diff] [blame]	450
commit-bot@chromium.org	16f4180	2014-02-26 19:05:20 +0000	[diff] [blame]	451
epoger@google.com	9dddf6f	2013-11-08 16:25:25 +0000	[diff] [blame]	452	def _mkdir_unless_exists(path):
				453	"""Unless path refers to an already-existing directory, create it.
				454
				455	Args:
				456	path: path on local disk
				457	"""
epoger	0b71276	2014-08-05 10:07:22 -0700	[diff] [blame]	458	try:
epoger@google.com	9dddf6f	2013-11-08 16:25:25 +0000	[diff] [blame]	459	os.makedirs(path)
epoger	0b71276	2014-08-05 10:07:22 -0700	[diff] [blame]	460	except OSError as e:
				461	if e.errno == errno.EEXIST:
				462	pass
epoger@google.com	9dddf6f	2013-11-08 16:25:25 +0000	[diff] [blame]	463
commit-bot@chromium.org	16f4180	2014-02-26 19:05:20 +0000	[diff] [blame]	464
commit-bot@chromium.org	9985ef5	2014-02-10 18:19:30 +0000	[diff] [blame]	465	def _sanitize_locator(locator):
				466	"""Returns a sanitized version of a locator (one in which we know none of the
				467	characters will have special meaning in filenames).
				468
				469	Args:
epoger	3facc7c	2014-08-06 10:56:50 -0700	[diff] [blame]	470	locator: string, or something that can be represented as a string.
				471	If None or '', it is returned without modification, because empty
				472	locators have a particular meaning ("there is no image for this")
commit-bot@chromium.org	9985ef5	2014-02-10 18:19:30 +0000	[diff] [blame]	473	"""
epoger	3facc7c	2014-08-06 10:56:50 -0700	[diff] [blame]	474	if locator:
				475	return DISALLOWED_FILEPATH_CHAR_REGEX.sub('_', str(locator))
				476	else:
				477	return locator