Blame - gm/rebaseline_server/imagediffdb.py - fp2-dev/platform/external/skia

blob: f604cfb8c3b79b9c82a0ee68657acbc3ac5e10a7 [file] [log] [blame]

epoger@google.com	9dddf6f	2013-11-08 16:25:25 +0000	[diff] [blame]	1	#!/usr/bin/python
				2
				3	"""
				4	Copyright 2013 Google Inc.
				5
				6	Use of this source code is governed by a BSD-style license that can be
				7	found in the LICENSE file.
				8
				9	Calulate differences between image pairs, and store them in a database.
				10	"""
				11
				12	import contextlib
				13	import logging
				14	import os
				15	import shutil
				16	import urllib
				17	try:
				18	from PIL import Image, ImageChops
				19	except ImportError:
				20	raise ImportError('Requires PIL to be installed; see '
				21	+ 'http://www.pythonware.com/products/pil/')
				22
				23	IMAGE_SUFFIX = '.png'
epoger@google.com	9dddf6f	2013-11-08 16:25:25 +0000	[diff] [blame]	24
				25	IMAGES_SUBDIR = 'images'
				26	DIFFS_SUBDIR = 'diffs'
				27	WHITEDIFFS_SUBDIR = 'whitediffs'
				28
epoger@google.com	214a024	2013-11-22 19:26:18 +0000	[diff] [blame]	29	VALUES_PER_BAND = 256
				30
epoger@google.com	9dddf6f	2013-11-08 16:25:25 +0000	[diff] [blame]	31
				32	class DiffRecord(object):
				33	""" Record of differences between two images. """
				34
				35	def __init__(self, storage_root,
				36	expected_image_url, expected_image_locator,
				37	actual_image_url, actual_image_locator):
				38	"""Download this pair of images (unless we already have them on local disk),
				39	and prepare a DiffRecord for them.
				40
				41	TODO(epoger): Make this asynchronously download images, rather than blocking
				42	until the images have been downloaded and processed.
				43
				44	Args:
				45	storage_root: root directory on local disk within which we store all
				46	images
				47	expected_image_url: file or HTTP url from which we will download the
				48	expected image
				49	expected_image_locator: a unique ID string under which we will store the
				50	expected image within storage_root (probably including a checksum to
				51	guarantee uniqueness)
				52	actual_image_url: file or HTTP url from which we will download the
				53	actual image
				54	actual_image_locator: a unique ID string under which we will store the
				55	actual image within storage_root (probably including a checksum to
				56	guarantee uniqueness)
				57	"""
				58	# Download the expected/actual images, if we don't have them already.
				59	expected_image = _download_and_open_image(
				60	os.path.join(storage_root, IMAGES_SUBDIR,
				61	str(expected_image_locator) + IMAGE_SUFFIX),
				62	expected_image_url)
				63	actual_image = _download_and_open_image(
				64	os.path.join(storage_root, IMAGES_SUBDIR,
				65	str(actual_image_locator) + IMAGE_SUFFIX),
				66	actual_image_url)
				67
epoger@google.com	214a024	2013-11-22 19:26:18 +0000	[diff] [blame]	68	# Generate the diff image (absolute diff at each pixel) and
				69	# max_diff_per_channel.
epoger@google.com	9dddf6f	2013-11-08 16:25:25 +0000	[diff] [blame]	70	diff_image = _generate_image_diff(actual_image, expected_image)
epoger@google.com	214a024	2013-11-22 19:26:18 +0000	[diff] [blame]	71	diff_histogram = diff_image.histogram()
				72	(diff_width, diff_height) = diff_image.size
				73	self._weighted_diff_measure = _calculate_weighted_diff_metric(
				74	diff_histogram, diff_width * diff_height)
				75	self._max_diff_per_channel = _max_per_band(diff_histogram)
				76
				77	# Generate the whitediff image (any differing pixels show as white).
				78	# This is tricky, because when you convert color images to grayscale or
				79	# black & white in PIL, it has its own ideas about thresholds.
				80	# We have to force it: if a pixel has any color at all, it's a '1'.
				81	bands = diff_image.split()
				82	graydiff_image = ImageChops.lighter(ImageChops.lighter(
				83	bands[0], bands[1]), bands[2])
				84	whitediff_image = (graydiff_image.point(lambda p: p > 0 and VALUES_PER_BAND)
				85	.convert('1', dither=Image.NONE))
				86
				87	# Final touches on diff_image: use whitediff_image as an alpha mask.
				88	# Unchanged pixels are transparent; differing pixels are opaque.
				89	diff_image.putalpha(whitediff_image)
				90
				91	# Store the diff and whitediff images generated above.
epoger@google.com	9dddf6f	2013-11-08 16:25:25 +0000	[diff] [blame]	92	diff_image_locator = _get_difference_locator(
				93	expected_image_locator=expected_image_locator,
				94	actual_image_locator=actual_image_locator)
epoger@google.com	214a024	2013-11-22 19:26:18 +0000	[diff] [blame]	95	basename = str(diff_image_locator) + IMAGE_SUFFIX
				96	_save_image(diff_image, os.path.join(
				97	storage_root, DIFFS_SUBDIR, basename))
				98	_save_image(whitediff_image, os.path.join(
				99	storage_root, WHITEDIFFS_SUBDIR, basename))
epoger@google.com	9dddf6f	2013-11-08 16:25:25 +0000	[diff] [blame]	100
				101	# Calculate difference metrics.
				102	(self._width, self._height) = diff_image.size
epoger@google.com	214a024	2013-11-22 19:26:18 +0000	[diff] [blame]	103	self._num_pixels_differing = (
				104	whitediff_image.histogram()[VALUES_PER_BAND - 1])
epoger@google.com	9dddf6f	2013-11-08 16:25:25 +0000	[diff] [blame]	105
				106	def get_num_pixels_differing(self):
				107	"""Returns the absolute number of pixels that differ."""
				108	return self._num_pixels_differing
				109
				110	def get_percent_pixels_differing(self):
				111	"""Returns the percentage of pixels that differ, as a float between
				112	0 and 100 (inclusive)."""
				113	return ((float(self._num_pixels_differing) * 100) /
				114	(self._width * self._height))
				115
				116	def get_weighted_diff_measure(self):
				117	"""Returns a weighted measure of image diffs, as a float between 0 and 100
				118	(inclusive)."""
				119	return self._weighted_diff_measure
				120
epoger@google.com	214a024	2013-11-22 19:26:18 +0000	[diff] [blame]	121	def get_max_diff_per_channel(self):
				122	"""Returns the maximum difference between the expected and actual images
				123	for each R/G/B channel, as a list."""
				124	return self._max_diff_per_channel
				125
epoger@google.com	9dddf6f	2013-11-08 16:25:25 +0000	[diff] [blame]	126
				127	class ImageDiffDB(object):
				128	""" Calculates differences between image pairs, maintaining a database of
				129	them for download."""
				130
				131	def __init__(self, storage_root):
				132	"""
				133	Args:
				134	storage_root: string; root path within the DB will store all of its stuff
				135	"""
				136	self._storage_root = storage_root
				137
				138	# Dictionary of DiffRecords, keyed by (expected_image_locator,
				139	# actual_image_locator) tuples.
				140	self._diff_dict = {}
				141
				142	def add_image_pair(self,
				143	expected_image_url, expected_image_locator,
				144	actual_image_url, actual_image_locator):
				145	"""Download this pair of images (unless we already have them on local disk),
				146	and prepare a DiffRecord for them.
				147
				148	TODO(epoger): Make this asynchronously download images, rather than blocking
				149	until the images have been downloaded and processed.
				150	When we do that, we should probably add a new method that will block
				151	until all of the images have been downloaded and processed. Otherwise,
				152	we won't know when it's safe to start calling get_diff_record().
				153	jcgregorio notes: maybe just make ImageDiffDB thread-safe and create a
				154	thread-pool/worker queue at a higher level that just uses ImageDiffDB?
				155
				156	Args:
				157	expected_image_url: file or HTTP url from which we will download the
				158	expected image
				159	expected_image_locator: a unique ID string under which we will store the
				160	expected image within storage_root (probably including a checksum to
				161	guarantee uniqueness)
				162	actual_image_url: file or HTTP url from which we will download the
				163	actual image
				164	actual_image_locator: a unique ID string under which we will store the
				165	actual image within storage_root (probably including a checksum to
				166	guarantee uniqueness)
				167	"""
				168	key = (expected_image_locator, actual_image_locator)
				169	if not key in self._diff_dict:
				170	try:
				171	new_diff_record = DiffRecord(
				172	self._storage_root,
				173	expected_image_url=expected_image_url,
				174	expected_image_locator=expected_image_locator,
				175	actual_image_url=actual_image_url,
				176	actual_image_locator=actual_image_locator)
commit-bot@chromium.org	a47e7ac	2013-12-19 20:01:34 +0000	[diff] [blame^]	177	except Exception:
epoger@google.com	9dddf6f	2013-11-08 16:25:25 +0000	[diff] [blame]	178	logging.exception('got exception while creating new DiffRecord')
				179	return
				180	self._diff_dict[key] = new_diff_record
				181
				182	def get_diff_record(self, expected_image_locator, actual_image_locator):
				183	"""Returns the DiffRecord for this image pair.
				184
				185	Raises a KeyError if we don't have a DiffRecord for this image pair.
				186	"""
				187	key = (expected_image_locator, actual_image_locator)
				188	return self._diff_dict[key]
				189
				190
				191	# Utility functions
				192
epoger@google.com	214a024	2013-11-22 19:26:18 +0000	[diff] [blame]	193	def _calculate_weighted_diff_metric(histogram, num_pixels):
				194	"""Given the histogram of a diff image (per-channel diff at each
				195	pixel between two images), calculate the weighted diff metric (a
				196	stab at how different the two images really are).
epoger@google.com	9dddf6f	2013-11-08 16:25:25 +0000	[diff] [blame]	197
				198	Args:
epoger@google.com	214a024	2013-11-22 19:26:18 +0000	[diff] [blame]	199	histogram: PIL histogram of a per-channel diff between two images
				200	num_pixels: integer; the total number of pixels in the diff image
epoger@google.com	9dddf6f	2013-11-08 16:25:25 +0000	[diff] [blame]	201
				202	Returns: a weighted diff metric, as a float between 0 and 100 (inclusive).
				203	"""
epoger@google.com	214a024	2013-11-22 19:26:18 +0000	[diff] [blame]	204	# TODO(epoger): As a wild guess at an appropriate metric, weight each
				205	# different pixel by the square of its delta value. (The more different
				206	# a pixel is from its expectation, the more we care about it.)
epoger@google.com	9dddf6f	2013-11-08 16:25:25 +0000	[diff] [blame]	207	# In the long term, we will probably use some metric generated by
				208	# skpdiff anyway.
epoger@google.com	214a024	2013-11-22 19:26:18 +0000	[diff] [blame]	209	assert(len(histogram) % VALUES_PER_BAND == 0)
				210	num_bands = len(histogram) / VALUES_PER_BAND
				211	max_diff = num_pixels * num_bands * (VALUES_PER_BAND - 1)**2
				212	total_diff = 0
				213	for index in xrange(len(histogram)):
				214	total_diff += histogram[index] * (index % VALUES_PER_BAND)**2
				215	return float(100 * total_diff) / max_diff
				216
				217	def _max_per_band(histogram):
				218	"""Given the histogram of an image, return the maximum value of each band
				219	(a.k.a. "color channel", such as R/G/B) across the entire image.
				220
				221	Args:
				222	histogram: PIL histogram
				223
				224	Returns the maximum value of each band within the image histogram, as a list.
				225	"""
				226	max_per_band = []
				227	assert(len(histogram) % VALUES_PER_BAND == 0)
				228	num_bands = len(histogram) / VALUES_PER_BAND
				229	for band in xrange(num_bands):
				230	# Assuming that VALUES_PER_BAND is 256...
				231	# the 'R' band makes up indices 0-255 in the histogram,
				232	# the 'G' band makes up indices 256-511 in the histogram,
				233	# etc.
				234	min_index = band * VALUES_PER_BAND
				235	index = min_index + VALUES_PER_BAND
				236	while index > min_index:
				237	index -= 1
				238	if histogram[index] > 0:
				239	max_per_band.append(index - min_index)
				240	break
				241	return max_per_band
epoger@google.com	9dddf6f	2013-11-08 16:25:25 +0000	[diff] [blame]	242
				243	def _generate_image_diff(image1, image2):
				244	"""Wrapper for ImageChops.difference(image1, image2) that will handle some
				245	errors automatically, or at least yield more useful error messages.
				246
				247	TODO(epoger): Currently, some of the images generated by the bots are RGBA
				248	and others are RGB. I'm not sure why that is. For now, to avoid confusion
				249	within the UI, convert all to RGB when diffing.
				250
				251	Args:
				252	image1: a PIL image object
				253	image2: a PIL image object
				254
				255	Returns: per-pixel diffs between image1 and image2, as a PIL image object
				256	"""
				257	try:
				258	return ImageChops.difference(image1.convert('RGB'), image2.convert('RGB'))
				259	except ValueError:
				260	logging.error('Error diffing image1 [%s] and image2 [%s].' % (
				261	repr(image1), repr(image2)))
				262	raise
				263
				264	def _download_and_open_image(local_filepath, url):
				265	"""Open the image at local_filepath; if there is no file at that path,
				266	download it from url to that path and then open it.
				267
				268	Args:
				269	local_filepath: path on local disk where the image should be stored
				270	url: URL from which we can download the image if we don't have it yet
				271
				272	Returns: a PIL image object
				273	"""
				274	if not os.path.exists(local_filepath):
				275	_mkdir_unless_exists(os.path.dirname(local_filepath))
				276	with contextlib.closing(urllib.urlopen(url)) as url_handle:
				277	with open(local_filepath, 'wb') as file_handle:
				278	shutil.copyfileobj(fsrc=url_handle, fdst=file_handle)
				279	return _open_image(local_filepath)
				280
				281	def _open_image(filepath):
				282	"""Wrapper for Image.open(filepath) that yields more useful error messages.
				283
				284	Args:
				285	filepath: path on local disk to load image from
				286
				287	Returns: a PIL image object
				288	"""
				289	try:
				290	return Image.open(filepath)
				291	except IOError:
				292	logging.error('IOError loading image file %s' % filepath)
				293	raise
				294
epoger@google.com	214a024	2013-11-22 19:26:18 +0000	[diff] [blame]	295	def _save_image(image, filepath, format='PNG'):
				296	"""Write an image to disk, creating any intermediate directories as needed.
				297
				298	Args:
				299	image: a PIL image object
				300	filepath: path on local disk to write image to
				301	format: one of the PIL image formats, listed at
				302	http://effbot.org/imagingbook/formats.htm
				303	"""
				304	_mkdir_unless_exists(os.path.dirname(filepath))
				305	image.save(filepath, format)
				306
epoger@google.com	9dddf6f	2013-11-08 16:25:25 +0000	[diff] [blame]	307	def _mkdir_unless_exists(path):
				308	"""Unless path refers to an already-existing directory, create it.
				309
				310	Args:
				311	path: path on local disk
				312	"""
				313	if not os.path.isdir(path):
				314	os.makedirs(path)
				315
				316	def _get_difference_locator(expected_image_locator, actual_image_locator):
				317	"""Returns the locator string used to look up the diffs between expected_image
				318	and actual_image.
				319
				320	Args:
				321	expected_image_locator: locator string pointing at expected image
				322	actual_image_locator: locator string pointing at actual image
				323
				324	Returns: locator where the diffs between expected and actual images can be
				325	found
				326	"""
				327	return "%s-vs-%s" % (expected_image_locator, actual_image_locator)