Blame - gm/rebaseline_server/imagediffdb.py - fp2-dev/platform/external/skia

blob: 3a2ce63b954f6244454e76ddf085e4fca66ef643 [file] [log] [blame]

epoger@google.com	9dddf6f	2013-11-08 16:25:25 +0000	[diff] [blame]	1	#!/usr/bin/python
				2
				3	"""
				4	Copyright 2013 Google Inc.
				5
				6	Use of this source code is governed by a BSD-style license that can be
				7	found in the LICENSE file.
				8
				9	Calulate differences between image pairs, and store them in a database.
				10	"""
				11
				12	import contextlib
				13	import logging
				14	import os
				15	import shutil
				16	import urllib
				17	try:
				18	from PIL import Image, ImageChops
				19	except ImportError:
				20	raise ImportError('Requires PIL to be installed; see '
				21	+ 'http://www.pythonware.com/products/pil/')
				22
rmistry@google.com	5861e52	2013-12-21 19:07:40 +0000	[diff] [blame]	23	DEFAULT_IMAGE_SUFFIX = '.png'
				24	DEFAULT_IMAGES_SUBDIR = 'images'
epoger@google.com	9dddf6f	2013-11-08 16:25:25 +0000	[diff] [blame]	25
epoger@google.com	9dddf6f	2013-11-08 16:25:25 +0000	[diff] [blame]	26	DIFFS_SUBDIR = 'diffs'
				27	WHITEDIFFS_SUBDIR = 'whitediffs'
				28
epoger@google.com	214a024	2013-11-22 19:26:18 +0000	[diff] [blame]	29	VALUES_PER_BAND = 256
				30
epoger@google.com	9dddf6f	2013-11-08 16:25:25 +0000	[diff] [blame]	31
				32	class DiffRecord(object):
				33	""" Record of differences between two images. """
				34
				35	def __init__(self, storage_root,
				36	expected_image_url, expected_image_locator,
rmistry@google.com	5861e52	2013-12-21 19:07:40 +0000	[diff] [blame]	37	actual_image_url, actual_image_locator,
				38	expected_images_subdir=DEFAULT_IMAGES_SUBDIR,
				39	actual_images_subdir=DEFAULT_IMAGES_SUBDIR,
				40	image_suffix=DEFAULT_IMAGE_SUFFIX):
epoger@google.com	9dddf6f	2013-11-08 16:25:25 +0000	[diff] [blame]	41	"""Download this pair of images (unless we already have them on local disk),
				42	and prepare a DiffRecord for them.
				43
				44	TODO(epoger): Make this asynchronously download images, rather than blocking
				45	until the images have been downloaded and processed.
				46
				47	Args:
				48	storage_root: root directory on local disk within which we store all
				49	images
				50	expected_image_url: file or HTTP url from which we will download the
				51	expected image
				52	expected_image_locator: a unique ID string under which we will store the
				53	expected image within storage_root (probably including a checksum to
				54	guarantee uniqueness)
				55	actual_image_url: file or HTTP url from which we will download the
				56	actual image
				57	actual_image_locator: a unique ID string under which we will store the
				58	actual image within storage_root (probably including a checksum to
				59	guarantee uniqueness)
rmistry@google.com	5861e52	2013-12-21 19:07:40 +0000	[diff] [blame]	60	expected_images_subdir: the subdirectory expected images are stored in.
				61	actual_images_subdir: the subdirectory actual images are stored in.
				62	image_suffix: the suffix of images.
epoger@google.com	9dddf6f	2013-11-08 16:25:25 +0000	[diff] [blame]	63	"""
				64	# Download the expected/actual images, if we don't have them already.
rmistry@google.com	5861e52	2013-12-21 19:07:40 +0000	[diff] [blame]	65	# TODO(rmistry): Add a parameter that makes _download_and_open_image raise
				66	# an exception if images are not found locally (instead of trying to
				67	# download them).
epoger@google.com	9dddf6f	2013-11-08 16:25:25 +0000	[diff] [blame]	68	expected_image = _download_and_open_image(
rmistry@google.com	5861e52	2013-12-21 19:07:40 +0000	[diff] [blame]	69	os.path.join(storage_root, expected_images_subdir,
				70	str(expected_image_locator) + image_suffix),
epoger@google.com	9dddf6f	2013-11-08 16:25:25 +0000	[diff] [blame]	71	expected_image_url)
				72	actual_image = _download_and_open_image(
rmistry@google.com	5861e52	2013-12-21 19:07:40 +0000	[diff] [blame]	73	os.path.join(storage_root, actual_images_subdir,
				74	str(actual_image_locator) + image_suffix),
epoger@google.com	9dddf6f	2013-11-08 16:25:25 +0000	[diff] [blame]	75	actual_image_url)
				76
epoger@google.com	214a024	2013-11-22 19:26:18 +0000	[diff] [blame]	77	# Generate the diff image (absolute diff at each pixel) and
				78	# max_diff_per_channel.
epoger@google.com	9dddf6f	2013-11-08 16:25:25 +0000	[diff] [blame]	79	diff_image = _generate_image_diff(actual_image, expected_image)
epoger@google.com	214a024	2013-11-22 19:26:18 +0000	[diff] [blame]	80	diff_histogram = diff_image.histogram()
				81	(diff_width, diff_height) = diff_image.size
				82	self._weighted_diff_measure = _calculate_weighted_diff_metric(
				83	diff_histogram, diff_width * diff_height)
				84	self._max_diff_per_channel = _max_per_band(diff_histogram)
				85
				86	# Generate the whitediff image (any differing pixels show as white).
				87	# This is tricky, because when you convert color images to grayscale or
				88	# black & white in PIL, it has its own ideas about thresholds.
				89	# We have to force it: if a pixel has any color at all, it's a '1'.
				90	bands = diff_image.split()
				91	graydiff_image = ImageChops.lighter(ImageChops.lighter(
				92	bands[0], bands[1]), bands[2])
				93	whitediff_image = (graydiff_image.point(lambda p: p > 0 and VALUES_PER_BAND)
				94	.convert('1', dither=Image.NONE))
				95
				96	# Final touches on diff_image: use whitediff_image as an alpha mask.
				97	# Unchanged pixels are transparent; differing pixels are opaque.
				98	diff_image.putalpha(whitediff_image)
				99
				100	# Store the diff and whitediff images generated above.
epoger@google.com	9dddf6f	2013-11-08 16:25:25 +0000	[diff] [blame]	101	diff_image_locator = _get_difference_locator(
				102	expected_image_locator=expected_image_locator,
				103	actual_image_locator=actual_image_locator)
rmistry@google.com	5861e52	2013-12-21 19:07:40 +0000	[diff] [blame]	104	basename = str(diff_image_locator) + image_suffix
epoger@google.com	214a024	2013-11-22 19:26:18 +0000	[diff] [blame]	105	_save_image(diff_image, os.path.join(
				106	storage_root, DIFFS_SUBDIR, basename))
				107	_save_image(whitediff_image, os.path.join(
				108	storage_root, WHITEDIFFS_SUBDIR, basename))
epoger@google.com	9dddf6f	2013-11-08 16:25:25 +0000	[diff] [blame]	109
				110	# Calculate difference metrics.
				111	(self._width, self._height) = diff_image.size
epoger@google.com	214a024	2013-11-22 19:26:18 +0000	[diff] [blame]	112	self._num_pixels_differing = (
				113	whitediff_image.histogram()[VALUES_PER_BAND - 1])
epoger@google.com	9dddf6f	2013-11-08 16:25:25 +0000	[diff] [blame]	114
				115	def get_num_pixels_differing(self):
				116	"""Returns the absolute number of pixels that differ."""
				117	return self._num_pixels_differing
				118
				119	def get_percent_pixels_differing(self):
				120	"""Returns the percentage of pixels that differ, as a float between
				121	0 and 100 (inclusive)."""
				122	return ((float(self._num_pixels_differing) * 100) /
				123	(self._width * self._height))
				124
				125	def get_weighted_diff_measure(self):
				126	"""Returns a weighted measure of image diffs, as a float between 0 and 100
				127	(inclusive)."""
				128	return self._weighted_diff_measure
				129
epoger@google.com	214a024	2013-11-22 19:26:18 +0000	[diff] [blame]	130	def get_max_diff_per_channel(self):
				131	"""Returns the maximum difference between the expected and actual images
				132	for each R/G/B channel, as a list."""
				133	return self._max_diff_per_channel
				134
epoger@google.com	9dddf6f	2013-11-08 16:25:25 +0000	[diff] [blame]	135
				136	class ImageDiffDB(object):
				137	""" Calculates differences between image pairs, maintaining a database of
				138	them for download."""
				139
				140	def __init__(self, storage_root):
				141	"""
				142	Args:
				143	storage_root: string; root path within the DB will store all of its stuff
				144	"""
				145	self._storage_root = storage_root
				146
				147	# Dictionary of DiffRecords, keyed by (expected_image_locator,
				148	# actual_image_locator) tuples.
				149	self._diff_dict = {}
				150
				151	def add_image_pair(self,
				152	expected_image_url, expected_image_locator,
				153	actual_image_url, actual_image_locator):
				154	"""Download this pair of images (unless we already have them on local disk),
				155	and prepare a DiffRecord for them.
				156
				157	TODO(epoger): Make this asynchronously download images, rather than blocking
				158	until the images have been downloaded and processed.
				159	When we do that, we should probably add a new method that will block
				160	until all of the images have been downloaded and processed. Otherwise,
				161	we won't know when it's safe to start calling get_diff_record().
				162	jcgregorio notes: maybe just make ImageDiffDB thread-safe and create a
				163	thread-pool/worker queue at a higher level that just uses ImageDiffDB?
				164
				165	Args:
				166	expected_image_url: file or HTTP url from which we will download the
				167	expected image
				168	expected_image_locator: a unique ID string under which we will store the
				169	expected image within storage_root (probably including a checksum to
				170	guarantee uniqueness)
				171	actual_image_url: file or HTTP url from which we will download the
				172	actual image
				173	actual_image_locator: a unique ID string under which we will store the
				174	actual image within storage_root (probably including a checksum to
				175	guarantee uniqueness)
				176	"""
				177	key = (expected_image_locator, actual_image_locator)
				178	if not key in self._diff_dict:
				179	try:
				180	new_diff_record = DiffRecord(
				181	self._storage_root,
				182	expected_image_url=expected_image_url,
				183	expected_image_locator=expected_image_locator,
				184	actual_image_url=actual_image_url,
				185	actual_image_locator=actual_image_locator)
commit-bot@chromium.org	a47e7ac	2013-12-19 20:01:34 +0000	[diff] [blame]	186	except Exception:
epoger@google.com	9dddf6f	2013-11-08 16:25:25 +0000	[diff] [blame]	187	logging.exception('got exception while creating new DiffRecord')
				188	return
				189	self._diff_dict[key] = new_diff_record
				190
				191	def get_diff_record(self, expected_image_locator, actual_image_locator):
				192	"""Returns the DiffRecord for this image pair.
				193
				194	Raises a KeyError if we don't have a DiffRecord for this image pair.
				195	"""
				196	key = (expected_image_locator, actual_image_locator)
				197	return self._diff_dict[key]
				198
				199
				200	# Utility functions
				201
epoger@google.com	214a024	2013-11-22 19:26:18 +0000	[diff] [blame]	202	def _calculate_weighted_diff_metric(histogram, num_pixels):
				203	"""Given the histogram of a diff image (per-channel diff at each
				204	pixel between two images), calculate the weighted diff metric (a
				205	stab at how different the two images really are).
epoger@google.com	9dddf6f	2013-11-08 16:25:25 +0000	[diff] [blame]	206
				207	Args:
epoger@google.com	214a024	2013-11-22 19:26:18 +0000	[diff] [blame]	208	histogram: PIL histogram of a per-channel diff between two images
				209	num_pixels: integer; the total number of pixels in the diff image
epoger@google.com	9dddf6f	2013-11-08 16:25:25 +0000	[diff] [blame]	210
				211	Returns: a weighted diff metric, as a float between 0 and 100 (inclusive).
				212	"""
epoger@google.com	214a024	2013-11-22 19:26:18 +0000	[diff] [blame]	213	# TODO(epoger): As a wild guess at an appropriate metric, weight each
				214	# different pixel by the square of its delta value. (The more different
				215	# a pixel is from its expectation, the more we care about it.)
epoger@google.com	9dddf6f	2013-11-08 16:25:25 +0000	[diff] [blame]	216	# In the long term, we will probably use some metric generated by
				217	# skpdiff anyway.
epoger@google.com	214a024	2013-11-22 19:26:18 +0000	[diff] [blame]	218	assert(len(histogram) % VALUES_PER_BAND == 0)
				219	num_bands = len(histogram) / VALUES_PER_BAND
				220	max_diff = num_pixels * num_bands * (VALUES_PER_BAND - 1)**2
				221	total_diff = 0
				222	for index in xrange(len(histogram)):
				223	total_diff += histogram[index] * (index % VALUES_PER_BAND)**2
				224	return float(100 * total_diff) / max_diff
				225
				226	def _max_per_band(histogram):
				227	"""Given the histogram of an image, return the maximum value of each band
				228	(a.k.a. "color channel", such as R/G/B) across the entire image.
				229
				230	Args:
				231	histogram: PIL histogram
				232
				233	Returns the maximum value of each band within the image histogram, as a list.
				234	"""
				235	max_per_band = []
				236	assert(len(histogram) % VALUES_PER_BAND == 0)
				237	num_bands = len(histogram) / VALUES_PER_BAND
				238	for band in xrange(num_bands):
				239	# Assuming that VALUES_PER_BAND is 256...
				240	# the 'R' band makes up indices 0-255 in the histogram,
				241	# the 'G' band makes up indices 256-511 in the histogram,
				242	# etc.
				243	min_index = band * VALUES_PER_BAND
				244	index = min_index + VALUES_PER_BAND
				245	while index > min_index:
				246	index -= 1
				247	if histogram[index] > 0:
				248	max_per_band.append(index - min_index)
				249	break
				250	return max_per_band
epoger@google.com	9dddf6f	2013-11-08 16:25:25 +0000	[diff] [blame]	251
				252	def _generate_image_diff(image1, image2):
				253	"""Wrapper for ImageChops.difference(image1, image2) that will handle some
				254	errors automatically, or at least yield more useful error messages.
				255
				256	TODO(epoger): Currently, some of the images generated by the bots are RGBA
				257	and others are RGB. I'm not sure why that is. For now, to avoid confusion
				258	within the UI, convert all to RGB when diffing.
				259
				260	Args:
				261	image1: a PIL image object
				262	image2: a PIL image object
				263
				264	Returns: per-pixel diffs between image1 and image2, as a PIL image object
				265	"""
				266	try:
				267	return ImageChops.difference(image1.convert('RGB'), image2.convert('RGB'))
				268	except ValueError:
				269	logging.error('Error diffing image1 [%s] and image2 [%s].' % (
				270	repr(image1), repr(image2)))
				271	raise
				272
				273	def _download_and_open_image(local_filepath, url):
				274	"""Open the image at local_filepath; if there is no file at that path,
				275	download it from url to that path and then open it.
				276
				277	Args:
				278	local_filepath: path on local disk where the image should be stored
				279	url: URL from which we can download the image if we don't have it yet
				280
				281	Returns: a PIL image object
				282	"""
				283	if not os.path.exists(local_filepath):
				284	_mkdir_unless_exists(os.path.dirname(local_filepath))
				285	with contextlib.closing(urllib.urlopen(url)) as url_handle:
				286	with open(local_filepath, 'wb') as file_handle:
				287	shutil.copyfileobj(fsrc=url_handle, fdst=file_handle)
				288	return _open_image(local_filepath)
				289
				290	def _open_image(filepath):
				291	"""Wrapper for Image.open(filepath) that yields more useful error messages.
				292
				293	Args:
				294	filepath: path on local disk to load image from
				295
				296	Returns: a PIL image object
				297	"""
				298	try:
				299	return Image.open(filepath)
				300	except IOError:
				301	logging.error('IOError loading image file %s' % filepath)
				302	raise
				303
epoger@google.com	214a024	2013-11-22 19:26:18 +0000	[diff] [blame]	304	def _save_image(image, filepath, format='PNG'):
				305	"""Write an image to disk, creating any intermediate directories as needed.
				306
				307	Args:
				308	image: a PIL image object
				309	filepath: path on local disk to write image to
				310	format: one of the PIL image formats, listed at
				311	http://effbot.org/imagingbook/formats.htm
				312	"""
				313	_mkdir_unless_exists(os.path.dirname(filepath))
				314	image.save(filepath, format)
				315
epoger@google.com	9dddf6f	2013-11-08 16:25:25 +0000	[diff] [blame]	316	def _mkdir_unless_exists(path):
				317	"""Unless path refers to an already-existing directory, create it.
				318
				319	Args:
				320	path: path on local disk
				321	"""
				322	if not os.path.isdir(path):
				323	os.makedirs(path)
				324
				325	def _get_difference_locator(expected_image_locator, actual_image_locator):
				326	"""Returns the locator string used to look up the diffs between expected_image
				327	and actual_image.
				328
				329	Args:
				330	expected_image_locator: locator string pointing at expected image
				331	actual_image_locator: locator string pointing at actual image
				332
				333	Returns: locator where the diffs between expected and actual images can be
				334	found
				335	"""
				336	return "%s-vs-%s" % (expected_image_locator, actual_image_locator)