Blame - gm/rebaseline_server/imagediffdb.py - fp2-dev/platform/external/skia

blob: 69d282f1d94a2071df7c8a9ef6d42d9fc6cf5776 [file] [log] [blame]

epoger@google.com	9dddf6f	2013-11-08 16:25:25 +0000	[diff] [blame^]	1	#!/usr/bin/python
				2
				3	"""
				4	Copyright 2013 Google Inc.
				5
				6	Use of this source code is governed by a BSD-style license that can be
				7	found in the LICENSE file.
				8
				9	Calulate differences between image pairs, and store them in a database.
				10	"""
				11
				12	import contextlib
				13	import logging
				14	import os
				15	import shutil
				16	import urllib
				17	try:
				18	from PIL import Image, ImageChops
				19	except ImportError:
				20	raise ImportError('Requires PIL to be installed; see '
				21	+ 'http://www.pythonware.com/products/pil/')
				22
				23	IMAGE_SUFFIX = '.png'
				24	IMAGE_FORMAT = 'PNG' # must match one of the PIL image formats, listed at
				25	# http://effbot.org/imagingbook/formats.htm
				26
				27	IMAGES_SUBDIR = 'images'
				28	DIFFS_SUBDIR = 'diffs'
				29	WHITEDIFFS_SUBDIR = 'whitediffs'
				30
				31
				32	class DiffRecord(object):
				33	""" Record of differences between two images. """
				34
				35	def __init__(self, storage_root,
				36	expected_image_url, expected_image_locator,
				37	actual_image_url, actual_image_locator):
				38	"""Download this pair of images (unless we already have them on local disk),
				39	and prepare a DiffRecord for them.
				40
				41	TODO(epoger): Make this asynchronously download images, rather than blocking
				42	until the images have been downloaded and processed.
				43
				44	Args:
				45	storage_root: root directory on local disk within which we store all
				46	images
				47	expected_image_url: file or HTTP url from which we will download the
				48	expected image
				49	expected_image_locator: a unique ID string under which we will store the
				50	expected image within storage_root (probably including a checksum to
				51	guarantee uniqueness)
				52	actual_image_url: file or HTTP url from which we will download the
				53	actual image
				54	actual_image_locator: a unique ID string under which we will store the
				55	actual image within storage_root (probably including a checksum to
				56	guarantee uniqueness)
				57	"""
				58	# Download the expected/actual images, if we don't have them already.
				59	expected_image = _download_and_open_image(
				60	os.path.join(storage_root, IMAGES_SUBDIR,
				61	str(expected_image_locator) + IMAGE_SUFFIX),
				62	expected_image_url)
				63	actual_image = _download_and_open_image(
				64	os.path.join(storage_root, IMAGES_SUBDIR,
				65	str(actual_image_locator) + IMAGE_SUFFIX),
				66	actual_image_url)
				67
				68	# Store the diff image (absolute diff at each pixel).
				69	diff_image = _generate_image_diff(actual_image, expected_image)
				70	self._weighted_diff_measure = _calculate_weighted_diff_metric(diff_image)
				71	diff_image_locator = _get_difference_locator(
				72	expected_image_locator=expected_image_locator,
				73	actual_image_locator=actual_image_locator)
				74	diff_image_filepath = os.path.join(
				75	storage_root, DIFFS_SUBDIR, str(diff_image_locator) + IMAGE_SUFFIX)
				76	_mkdir_unless_exists(os.path.join(storage_root, DIFFS_SUBDIR))
				77	diff_image.save(diff_image_filepath, IMAGE_FORMAT)
				78
				79	# Store the whitediff image (any differing pixels show as white).
				80	#
				81	# TODO(epoger): From http://effbot.org/imagingbook/image.htm , it seems
				82	# like we should be able to use im.point(function, mode) to perform both
				83	# the point() and convert('1') operations simultaneously, but I couldn't
				84	# get it to work.
				85	whitediff_image = (diff_image.point(lambda p: (0, 256)[p!=0])
				86	.convert('1'))
				87	whitediff_image_filepath = os.path.join(
				88	storage_root, WHITEDIFFS_SUBDIR, str(diff_image_locator) + IMAGE_SUFFIX)
				89	_mkdir_unless_exists(os.path.join(storage_root, WHITEDIFFS_SUBDIR))
				90	whitediff_image.save(whitediff_image_filepath, IMAGE_FORMAT)
				91
				92	# Calculate difference metrics.
				93	(self._width, self._height) = diff_image.size
				94	self._num_pixels_differing = whitediff_image.histogram()[255]
				95
				96	def get_num_pixels_differing(self):
				97	"""Returns the absolute number of pixels that differ."""
				98	return self._num_pixels_differing
				99
				100	def get_percent_pixels_differing(self):
				101	"""Returns the percentage of pixels that differ, as a float between
				102	0 and 100 (inclusive)."""
				103	return ((float(self._num_pixels_differing) * 100) /
				104	(self._width * self._height))
				105
				106	def get_weighted_diff_measure(self):
				107	"""Returns a weighted measure of image diffs, as a float between 0 and 100
				108	(inclusive)."""
				109	return self._weighted_diff_measure
				110
				111
				112	class ImageDiffDB(object):
				113	""" Calculates differences between image pairs, maintaining a database of
				114	them for download."""
				115
				116	def __init__(self, storage_root):
				117	"""
				118	Args:
				119	storage_root: string; root path within the DB will store all of its stuff
				120	"""
				121	self._storage_root = storage_root
				122
				123	# Dictionary of DiffRecords, keyed by (expected_image_locator,
				124	# actual_image_locator) tuples.
				125	self._diff_dict = {}
				126
				127	def add_image_pair(self,
				128	expected_image_url, expected_image_locator,
				129	actual_image_url, actual_image_locator):
				130	"""Download this pair of images (unless we already have them on local disk),
				131	and prepare a DiffRecord for them.
				132
				133	TODO(epoger): Make this asynchronously download images, rather than blocking
				134	until the images have been downloaded and processed.
				135	When we do that, we should probably add a new method that will block
				136	until all of the images have been downloaded and processed. Otherwise,
				137	we won't know when it's safe to start calling get_diff_record().
				138	jcgregorio notes: maybe just make ImageDiffDB thread-safe and create a
				139	thread-pool/worker queue at a higher level that just uses ImageDiffDB?
				140
				141	Args:
				142	expected_image_url: file or HTTP url from which we will download the
				143	expected image
				144	expected_image_locator: a unique ID string under which we will store the
				145	expected image within storage_root (probably including a checksum to
				146	guarantee uniqueness)
				147	actual_image_url: file or HTTP url from which we will download the
				148	actual image
				149	actual_image_locator: a unique ID string under which we will store the
				150	actual image within storage_root (probably including a checksum to
				151	guarantee uniqueness)
				152	"""
				153	key = (expected_image_locator, actual_image_locator)
				154	if not key in self._diff_dict:
				155	try:
				156	new_diff_record = DiffRecord(
				157	self._storage_root,
				158	expected_image_url=expected_image_url,
				159	expected_image_locator=expected_image_locator,
				160	actual_image_url=actual_image_url,
				161	actual_image_locator=actual_image_locator)
				162	except:
				163	logging.exception('got exception while creating new DiffRecord')
				164	return
				165	self._diff_dict[key] = new_diff_record
				166
				167	def get_diff_record(self, expected_image_locator, actual_image_locator):
				168	"""Returns the DiffRecord for this image pair.
				169
				170	Raises a KeyError if we don't have a DiffRecord for this image pair.
				171	"""
				172	key = (expected_image_locator, actual_image_locator)
				173	return self._diff_dict[key]
				174
				175
				176	# Utility functions
				177
				178	def _calculate_weighted_diff_metric(image):
				179	"""Given a diff image (per-channel diff at each pixel between two images),
				180	calculate the weighted diff metric (a stab at how different the two images
				181	really are).
				182
				183	Args:
				184	image: PIL image; a per-channel diff between two images
				185
				186	Returns: a weighted diff metric, as a float between 0 and 100 (inclusive).
				187	"""
				188	# TODO(epoger): This is just a wild guess at an appropriate metric.
				189	# In the long term, we will probably use some metric generated by
				190	# skpdiff anyway.
				191	(width, height) = image.size
				192	maxdiff = 3 * (width * height) * 255**2
				193	h = image.histogram()
				194	assert(len(h) % 256 == 0)
				195	totaldiff = sum(map(lambda index,value: value * (index%256)**2,
				196	range(len(h)), h))
				197	return float(100 * totaldiff) / maxdiff
				198
				199	def _generate_image_diff(image1, image2):
				200	"""Wrapper for ImageChops.difference(image1, image2) that will handle some
				201	errors automatically, or at least yield more useful error messages.
				202
				203	TODO(epoger): Currently, some of the images generated by the bots are RGBA
				204	and others are RGB. I'm not sure why that is. For now, to avoid confusion
				205	within the UI, convert all to RGB when diffing.
				206
				207	Args:
				208	image1: a PIL image object
				209	image2: a PIL image object
				210
				211	Returns: per-pixel diffs between image1 and image2, as a PIL image object
				212	"""
				213	try:
				214	return ImageChops.difference(image1.convert('RGB'), image2.convert('RGB'))
				215	except ValueError:
				216	logging.error('Error diffing image1 [%s] and image2 [%s].' % (
				217	repr(image1), repr(image2)))
				218	raise
				219
				220	def _download_and_open_image(local_filepath, url):
				221	"""Open the image at local_filepath; if there is no file at that path,
				222	download it from url to that path and then open it.
				223
				224	Args:
				225	local_filepath: path on local disk where the image should be stored
				226	url: URL from which we can download the image if we don't have it yet
				227
				228	Returns: a PIL image object
				229	"""
				230	if not os.path.exists(local_filepath):
				231	_mkdir_unless_exists(os.path.dirname(local_filepath))
				232	with contextlib.closing(urllib.urlopen(url)) as url_handle:
				233	with open(local_filepath, 'wb') as file_handle:
				234	shutil.copyfileobj(fsrc=url_handle, fdst=file_handle)
				235	return _open_image(local_filepath)
				236
				237	def _open_image(filepath):
				238	"""Wrapper for Image.open(filepath) that yields more useful error messages.
				239
				240	Args:
				241	filepath: path on local disk to load image from
				242
				243	Returns: a PIL image object
				244	"""
				245	try:
				246	return Image.open(filepath)
				247	except IOError:
				248	logging.error('IOError loading image file %s' % filepath)
				249	raise
				250
				251	def _mkdir_unless_exists(path):
				252	"""Unless path refers to an already-existing directory, create it.
				253
				254	Args:
				255	path: path on local disk
				256	"""
				257	if not os.path.isdir(path):
				258	os.makedirs(path)
				259
				260	def _get_difference_locator(expected_image_locator, actual_image_locator):
				261	"""Returns the locator string used to look up the diffs between expected_image
				262	and actual_image.
				263
				264	Args:
				265	expected_image_locator: locator string pointing at expected image
				266	actual_image_locator: locator string pointing at actual image
				267
				268	Returns: locator where the diffs between expected and actual images can be
				269	found
				270	"""
				271	return "%s-vs-%s" % (expected_image_locator, actual_image_locator)