Blame - gm/rebaseline_server/imagediffdb.py - fp2-dev/platform/external/skia

blob: 936301e1cdef393e736ad852283768482037a63c [file] [log] [blame]

epoger@google.com	9dddf6f	2013-11-08 16:25:25 +0000	[diff] [blame]	1	#!/usr/bin/python
				2
				3	"""
				4	Copyright 2013 Google Inc.
				5
				6	Use of this source code is governed by a BSD-style license that can be
				7	found in the LICENSE file.
				8
				9	Calulate differences between image pairs, and store them in a database.
				10	"""
				11
				12	import contextlib
				13	import logging
				14	import os
commit-bot@chromium.org	9985ef5	2014-02-10 18:19:30 +0000	[diff] [blame^]	15	import re
epoger@google.com	9dddf6f	2013-11-08 16:25:25 +0000	[diff] [blame]	16	import shutil
				17	import urllib
				18	try:
				19	from PIL import Image, ImageChops
				20	except ImportError:
				21	raise ImportError('Requires PIL to be installed; see '
				22	+ 'http://www.pythonware.com/products/pil/')
				23
rmistry@google.com	5861e52	2013-12-21 19:07:40 +0000	[diff] [blame]	24	DEFAULT_IMAGE_SUFFIX = '.png'
				25	DEFAULT_IMAGES_SUBDIR = 'images'
epoger@google.com	9dddf6f	2013-11-08 16:25:25 +0000	[diff] [blame]	26
commit-bot@chromium.org	9985ef5	2014-02-10 18:19:30 +0000	[diff] [blame^]	27	DISALLOWED_FILEPATH_CHAR_REGEX = re.compile('[^\w\-]')
				28
epoger@google.com	9dddf6f	2013-11-08 16:25:25 +0000	[diff] [blame]	29	DIFFS_SUBDIR = 'diffs'
				30	WHITEDIFFS_SUBDIR = 'whitediffs'
				31
epoger@google.com	214a024	2013-11-22 19:26:18 +0000	[diff] [blame]	32	VALUES_PER_BAND = 256
				33
epoger@google.com	9dddf6f	2013-11-08 16:25:25 +0000	[diff] [blame]	34
				35	class DiffRecord(object):
				36	""" Record of differences between two images. """
				37
				38	def __init__(self, storage_root,
				39	expected_image_url, expected_image_locator,
rmistry@google.com	5861e52	2013-12-21 19:07:40 +0000	[diff] [blame]	40	actual_image_url, actual_image_locator,
				41	expected_images_subdir=DEFAULT_IMAGES_SUBDIR,
				42	actual_images_subdir=DEFAULT_IMAGES_SUBDIR,
				43	image_suffix=DEFAULT_IMAGE_SUFFIX):
epoger@google.com	9dddf6f	2013-11-08 16:25:25 +0000	[diff] [blame]	44	"""Download this pair of images (unless we already have them on local disk),
				45	and prepare a DiffRecord for them.
				46
				47	TODO(epoger): Make this asynchronously download images, rather than blocking
				48	until the images have been downloaded and processed.
				49
				50	Args:
				51	storage_root: root directory on local disk within which we store all
				52	images
				53	expected_image_url: file or HTTP url from which we will download the
				54	expected image
				55	expected_image_locator: a unique ID string under which we will store the
				56	expected image within storage_root (probably including a checksum to
				57	guarantee uniqueness)
				58	actual_image_url: file or HTTP url from which we will download the
				59	actual image
				60	actual_image_locator: a unique ID string under which we will store the
				61	actual image within storage_root (probably including a checksum to
				62	guarantee uniqueness)
rmistry@google.com	5861e52	2013-12-21 19:07:40 +0000	[diff] [blame]	63	expected_images_subdir: the subdirectory expected images are stored in.
				64	actual_images_subdir: the subdirectory actual images are stored in.
				65	image_suffix: the suffix of images.
epoger@google.com	9dddf6f	2013-11-08 16:25:25 +0000	[diff] [blame]	66	"""
commit-bot@chromium.org	9985ef5	2014-02-10 18:19:30 +0000	[diff] [blame^]	67	expected_image_locator = _sanitize_locator(expected_image_locator)
				68	actual_image_locator = _sanitize_locator(actual_image_locator)
				69
epoger@google.com	9dddf6f	2013-11-08 16:25:25 +0000	[diff] [blame]	70	# Download the expected/actual images, if we don't have them already.
rmistry@google.com	5861e52	2013-12-21 19:07:40 +0000	[diff] [blame]	71	# TODO(rmistry): Add a parameter that makes _download_and_open_image raise
				72	# an exception if images are not found locally (instead of trying to
				73	# download them).
epoger@google.com	9dddf6f	2013-11-08 16:25:25 +0000	[diff] [blame]	74	expected_image = _download_and_open_image(
rmistry@google.com	5861e52	2013-12-21 19:07:40 +0000	[diff] [blame]	75	os.path.join(storage_root, expected_images_subdir,
				76	str(expected_image_locator) + image_suffix),
epoger@google.com	9dddf6f	2013-11-08 16:25:25 +0000	[diff] [blame]	77	expected_image_url)
				78	actual_image = _download_and_open_image(
rmistry@google.com	5861e52	2013-12-21 19:07:40 +0000	[diff] [blame]	79	os.path.join(storage_root, actual_images_subdir,
				80	str(actual_image_locator) + image_suffix),
epoger@google.com	9dddf6f	2013-11-08 16:25:25 +0000	[diff] [blame]	81	actual_image_url)
				82
epoger@google.com	214a024	2013-11-22 19:26:18 +0000	[diff] [blame]	83	# Generate the diff image (absolute diff at each pixel) and
				84	# max_diff_per_channel.
epoger@google.com	9dddf6f	2013-11-08 16:25:25 +0000	[diff] [blame]	85	diff_image = _generate_image_diff(actual_image, expected_image)
epoger@google.com	214a024	2013-11-22 19:26:18 +0000	[diff] [blame]	86	diff_histogram = diff_image.histogram()
				87	(diff_width, diff_height) = diff_image.size
				88	self._weighted_diff_measure = _calculate_weighted_diff_metric(
				89	diff_histogram, diff_width * diff_height)
				90	self._max_diff_per_channel = _max_per_band(diff_histogram)
				91
				92	# Generate the whitediff image (any differing pixels show as white).
				93	# This is tricky, because when you convert color images to grayscale or
				94	# black & white in PIL, it has its own ideas about thresholds.
				95	# We have to force it: if a pixel has any color at all, it's a '1'.
				96	bands = diff_image.split()
				97	graydiff_image = ImageChops.lighter(ImageChops.lighter(
				98	bands[0], bands[1]), bands[2])
				99	whitediff_image = (graydiff_image.point(lambda p: p > 0 and VALUES_PER_BAND)
				100	.convert('1', dither=Image.NONE))
				101
				102	# Final touches on diff_image: use whitediff_image as an alpha mask.
				103	# Unchanged pixels are transparent; differing pixels are opaque.
				104	diff_image.putalpha(whitediff_image)
				105
				106	# Store the diff and whitediff images generated above.
epoger@google.com	9dddf6f	2013-11-08 16:25:25 +0000	[diff] [blame]	107	diff_image_locator = _get_difference_locator(
				108	expected_image_locator=expected_image_locator,
				109	actual_image_locator=actual_image_locator)
rmistry@google.com	5861e52	2013-12-21 19:07:40 +0000	[diff] [blame]	110	basename = str(diff_image_locator) + image_suffix
epoger@google.com	214a024	2013-11-22 19:26:18 +0000	[diff] [blame]	111	_save_image(diff_image, os.path.join(
				112	storage_root, DIFFS_SUBDIR, basename))
				113	_save_image(whitediff_image, os.path.join(
				114	storage_root, WHITEDIFFS_SUBDIR, basename))
epoger@google.com	9dddf6f	2013-11-08 16:25:25 +0000	[diff] [blame]	115
				116	# Calculate difference metrics.
				117	(self._width, self._height) = diff_image.size
epoger@google.com	214a024	2013-11-22 19:26:18 +0000	[diff] [blame]	118	self._num_pixels_differing = (
				119	whitediff_image.histogram()[VALUES_PER_BAND - 1])
epoger@google.com	9dddf6f	2013-11-08 16:25:25 +0000	[diff] [blame]	120
				121	def get_num_pixels_differing(self):
				122	"""Returns the absolute number of pixels that differ."""
				123	return self._num_pixels_differing
				124
				125	def get_percent_pixels_differing(self):
				126	"""Returns the percentage of pixels that differ, as a float between
				127	0 and 100 (inclusive)."""
				128	return ((float(self._num_pixels_differing) * 100) /
				129	(self._width * self._height))
				130
				131	def get_weighted_diff_measure(self):
				132	"""Returns a weighted measure of image diffs, as a float between 0 and 100
				133	(inclusive)."""
				134	return self._weighted_diff_measure
				135
epoger@google.com	214a024	2013-11-22 19:26:18 +0000	[diff] [blame]	136	def get_max_diff_per_channel(self):
				137	"""Returns the maximum difference between the expected and actual images
				138	for each R/G/B channel, as a list."""
				139	return self._max_diff_per_channel
				140
commit-bot@chromium.org	9985ef5	2014-02-10 18:19:30 +0000	[diff] [blame^]	141	def as_dict(self):
				142	"""Returns a dictionary representation of this DiffRecord, as needed when
				143	constructing the JSON representation."""
				144	return {
				145	'numDifferingPixels': self._num_pixels_differing,
				146	'percentDifferingPixels': self.get_percent_pixels_differing(),
				147	'weightedDiffMeasure': self.get_weighted_diff_measure(),
				148	'maxDiffPerChannel': self._max_diff_per_channel,
				149	}
				150
epoger@google.com	9dddf6f	2013-11-08 16:25:25 +0000	[diff] [blame]	151
				152	class ImageDiffDB(object):
				153	""" Calculates differences between image pairs, maintaining a database of
				154	them for download."""
				155
				156	def __init__(self, storage_root):
				157	"""
				158	Args:
				159	storage_root: string; root path within the DB will store all of its stuff
				160	"""
				161	self._storage_root = storage_root
				162
				163	# Dictionary of DiffRecords, keyed by (expected_image_locator,
				164	# actual_image_locator) tuples.
				165	self._diff_dict = {}
				166
				167	def add_image_pair(self,
				168	expected_image_url, expected_image_locator,
				169	actual_image_url, actual_image_locator):
				170	"""Download this pair of images (unless we already have them on local disk),
				171	and prepare a DiffRecord for them.
				172
				173	TODO(epoger): Make this asynchronously download images, rather than blocking
				174	until the images have been downloaded and processed.
				175	When we do that, we should probably add a new method that will block
				176	until all of the images have been downloaded and processed. Otherwise,
				177	we won't know when it's safe to start calling get_diff_record().
				178	jcgregorio notes: maybe just make ImageDiffDB thread-safe and create a
				179	thread-pool/worker queue at a higher level that just uses ImageDiffDB?
				180
				181	Args:
				182	expected_image_url: file or HTTP url from which we will download the
				183	expected image
				184	expected_image_locator: a unique ID string under which we will store the
				185	expected image within storage_root (probably including a checksum to
				186	guarantee uniqueness)
				187	actual_image_url: file or HTTP url from which we will download the
				188	actual image
				189	actual_image_locator: a unique ID string under which we will store the
				190	actual image within storage_root (probably including a checksum to
				191	guarantee uniqueness)
				192	"""
commit-bot@chromium.org	9985ef5	2014-02-10 18:19:30 +0000	[diff] [blame^]	193	expected_image_locator = _sanitize_locator(expected_image_locator)
				194	actual_image_locator = _sanitize_locator(actual_image_locator)
epoger@google.com	9dddf6f	2013-11-08 16:25:25 +0000	[diff] [blame]	195	key = (expected_image_locator, actual_image_locator)
				196	if not key in self._diff_dict:
				197	try:
				198	new_diff_record = DiffRecord(
				199	self._storage_root,
				200	expected_image_url=expected_image_url,
				201	expected_image_locator=expected_image_locator,
				202	actual_image_url=actual_image_url,
				203	actual_image_locator=actual_image_locator)
commit-bot@chromium.org	a47e7ac	2013-12-19 20:01:34 +0000	[diff] [blame]	204	except Exception:
epoger@google.com	9dddf6f	2013-11-08 16:25:25 +0000	[diff] [blame]	205	logging.exception('got exception while creating new DiffRecord')
				206	return
				207	self._diff_dict[key] = new_diff_record
				208
				209	def get_diff_record(self, expected_image_locator, actual_image_locator):
				210	"""Returns the DiffRecord for this image pair.
				211
				212	Raises a KeyError if we don't have a DiffRecord for this image pair.
				213	"""
commit-bot@chromium.org	9985ef5	2014-02-10 18:19:30 +0000	[diff] [blame^]	214	key = (_sanitize_locator(expected_image_locator),
				215	_sanitize_locator(actual_image_locator))
epoger@google.com	9dddf6f	2013-11-08 16:25:25 +0000	[diff] [blame]	216	return self._diff_dict[key]
				217
				218
				219	# Utility functions
				220
epoger@google.com	214a024	2013-11-22 19:26:18 +0000	[diff] [blame]	221	def _calculate_weighted_diff_metric(histogram, num_pixels):
				222	"""Given the histogram of a diff image (per-channel diff at each
				223	pixel between two images), calculate the weighted diff metric (a
				224	stab at how different the two images really are).
epoger@google.com	9dddf6f	2013-11-08 16:25:25 +0000	[diff] [blame]	225
				226	Args:
epoger@google.com	214a024	2013-11-22 19:26:18 +0000	[diff] [blame]	227	histogram: PIL histogram of a per-channel diff between two images
				228	num_pixels: integer; the total number of pixels in the diff image
epoger@google.com	9dddf6f	2013-11-08 16:25:25 +0000	[diff] [blame]	229
				230	Returns: a weighted diff metric, as a float between 0 and 100 (inclusive).
				231	"""
epoger@google.com	214a024	2013-11-22 19:26:18 +0000	[diff] [blame]	232	# TODO(epoger): As a wild guess at an appropriate metric, weight each
				233	# different pixel by the square of its delta value. (The more different
				234	# a pixel is from its expectation, the more we care about it.)
epoger@google.com	9dddf6f	2013-11-08 16:25:25 +0000	[diff] [blame]	235	# In the long term, we will probably use some metric generated by
				236	# skpdiff anyway.
epoger@google.com	214a024	2013-11-22 19:26:18 +0000	[diff] [blame]	237	assert(len(histogram) % VALUES_PER_BAND == 0)
				238	num_bands = len(histogram) / VALUES_PER_BAND
				239	max_diff = num_pixels * num_bands * (VALUES_PER_BAND - 1)**2
				240	total_diff = 0
				241	for index in xrange(len(histogram)):
				242	total_diff += histogram[index] * (index % VALUES_PER_BAND)**2
				243	return float(100 * total_diff) / max_diff
				244
				245	def _max_per_band(histogram):
				246	"""Given the histogram of an image, return the maximum value of each band
				247	(a.k.a. "color channel", such as R/G/B) across the entire image.
				248
				249	Args:
				250	histogram: PIL histogram
				251
				252	Returns the maximum value of each band within the image histogram, as a list.
				253	"""
				254	max_per_band = []
				255	assert(len(histogram) % VALUES_PER_BAND == 0)
				256	num_bands = len(histogram) / VALUES_PER_BAND
				257	for band in xrange(num_bands):
				258	# Assuming that VALUES_PER_BAND is 256...
				259	# the 'R' band makes up indices 0-255 in the histogram,
				260	# the 'G' band makes up indices 256-511 in the histogram,
				261	# etc.
				262	min_index = band * VALUES_PER_BAND
				263	index = min_index + VALUES_PER_BAND
				264	while index > min_index:
				265	index -= 1
				266	if histogram[index] > 0:
				267	max_per_band.append(index - min_index)
				268	break
				269	return max_per_band
epoger@google.com	9dddf6f	2013-11-08 16:25:25 +0000	[diff] [blame]	270
				271	def _generate_image_diff(image1, image2):
				272	"""Wrapper for ImageChops.difference(image1, image2) that will handle some
				273	errors automatically, or at least yield more useful error messages.
				274
				275	TODO(epoger): Currently, some of the images generated by the bots are RGBA
				276	and others are RGB. I'm not sure why that is. For now, to avoid confusion
				277	within the UI, convert all to RGB when diffing.
				278
				279	Args:
				280	image1: a PIL image object
				281	image2: a PIL image object
				282
				283	Returns: per-pixel diffs between image1 and image2, as a PIL image object
				284	"""
				285	try:
				286	return ImageChops.difference(image1.convert('RGB'), image2.convert('RGB'))
				287	except ValueError:
				288	logging.error('Error diffing image1 [%s] and image2 [%s].' % (
				289	repr(image1), repr(image2)))
				290	raise
				291
				292	def _download_and_open_image(local_filepath, url):
				293	"""Open the image at local_filepath; if there is no file at that path,
				294	download it from url to that path and then open it.
				295
				296	Args:
				297	local_filepath: path on local disk where the image should be stored
				298	url: URL from which we can download the image if we don't have it yet
				299
				300	Returns: a PIL image object
				301	"""
				302	if not os.path.exists(local_filepath):
				303	_mkdir_unless_exists(os.path.dirname(local_filepath))
				304	with contextlib.closing(urllib.urlopen(url)) as url_handle:
				305	with open(local_filepath, 'wb') as file_handle:
				306	shutil.copyfileobj(fsrc=url_handle, fdst=file_handle)
				307	return _open_image(local_filepath)
				308
				309	def _open_image(filepath):
				310	"""Wrapper for Image.open(filepath) that yields more useful error messages.
				311
				312	Args:
				313	filepath: path on local disk to load image from
				314
				315	Returns: a PIL image object
				316	"""
				317	try:
				318	return Image.open(filepath)
				319	except IOError:
				320	logging.error('IOError loading image file %s' % filepath)
				321	raise
				322
epoger@google.com	214a024	2013-11-22 19:26:18 +0000	[diff] [blame]	323	def _save_image(image, filepath, format='PNG'):
				324	"""Write an image to disk, creating any intermediate directories as needed.
				325
				326	Args:
				327	image: a PIL image object
				328	filepath: path on local disk to write image to
				329	format: one of the PIL image formats, listed at
				330	http://effbot.org/imagingbook/formats.htm
				331	"""
				332	_mkdir_unless_exists(os.path.dirname(filepath))
				333	image.save(filepath, format)
				334
epoger@google.com	9dddf6f	2013-11-08 16:25:25 +0000	[diff] [blame]	335	def _mkdir_unless_exists(path):
				336	"""Unless path refers to an already-existing directory, create it.
				337
				338	Args:
				339	path: path on local disk
				340	"""
				341	if not os.path.isdir(path):
				342	os.makedirs(path)
				343
commit-bot@chromium.org	9985ef5	2014-02-10 18:19:30 +0000	[diff] [blame^]	344	def _sanitize_locator(locator):
				345	"""Returns a sanitized version of a locator (one in which we know none of the
				346	characters will have special meaning in filenames).
				347
				348	Args:
				349	locator: string, or something that can be represented as a string
				350	"""
				351	return DISALLOWED_FILEPATH_CHAR_REGEX.sub('_', str(locator))
				352
epoger@google.com	9dddf6f	2013-11-08 16:25:25 +0000	[diff] [blame]	353	def _get_difference_locator(expected_image_locator, actual_image_locator):
				354	"""Returns the locator string used to look up the diffs between expected_image
				355	and actual_image.
				356
				357	Args:
				358	expected_image_locator: locator string pointing at expected image
				359	actual_image_locator: locator string pointing at actual image
				360
commit-bot@chromium.org	9985ef5	2014-02-10 18:19:30 +0000	[diff] [blame^]	361	Returns: already-sanitized locator where the diffs between expected and
				362	actual images can be found
epoger@google.com	9dddf6f	2013-11-08 16:25:25 +0000	[diff] [blame]	363	"""
commit-bot@chromium.org	9985ef5	2014-02-10 18:19:30 +0000	[diff] [blame^]	364	return "%s-vs-%s" % (_sanitize_locator(expected_image_locator),
				365	_sanitize_locator(actual_image_locator))