Blame - bench/bench_util.py - platform/external/skqp

blob: b6fecb7ca801c62c6e11dae12b053994d54602bc [file] [log] [blame]

Mike Klein	e530eb3	2014-08-08 17:28:53 -0400	[diff] [blame]	1	'''
				2	Created on May 19, 2011
				3
				4	@author: bungeman
				5	'''
				6
				7	import os
				8	import re
				9	import math
				10
				11	# bench representation algorithm constant names
				12	ALGORITHM_AVERAGE = 'avg'
				13	ALGORITHM_MEDIAN = 'med'
				14	ALGORITHM_MINIMUM = 'min'
				15	ALGORITHM_25TH_PERCENTILE = '25th'
				16
				17	# Regular expressions used throughout.
				18	PER_SETTING_RE = '([^\s=]+)(?:=(\S+))?'
				19	SETTINGS_RE = 'skia bench:((?:\s+' + PER_SETTING_RE + ')*)'
				20	BENCH_RE = 'running bench (?:\[\d+ \d+\] )?\s*(\S+)'
				21	TIME_RE = '(?:(\w)msecs = )?\s((?:\d+\.\d+)(?:,\s\d+\.\d+))'
				22	# non-per-tile benches have configs that don't end with ']' or '>'
				23	CONFIG_RE = '(\S+[^\]>]):\s+((?:' + TIME_RE + '\s+)+)'
				24	# per-tile bench lines are in the following format. Note that there are
				25	# non-averaged bench numbers in separate lines, which we ignore now due to
				26	# their inaccuracy.
				27	TILE_RE = (' tile_(\S+): tile \[\d+,\d+\] out of \[\d+,\d+\] <averaged>:'
				28	' ((?:' + TIME_RE + '\s+)+)')
				29	# for extracting tile layout
				30	TILE_LAYOUT_RE = ' out of \[(\d+),(\d+)\] <averaged>: '
				31
				32	PER_SETTING_RE_COMPILED = re.compile(PER_SETTING_RE)
				33	SETTINGS_RE_COMPILED = re.compile(SETTINGS_RE)
				34	BENCH_RE_COMPILED = re.compile(BENCH_RE)
				35	TIME_RE_COMPILED = re.compile(TIME_RE)
				36	CONFIG_RE_COMPILED = re.compile(CONFIG_RE)
				37	TILE_RE_COMPILED = re.compile(TILE_RE)
				38	TILE_LAYOUT_RE_COMPILED = re.compile(TILE_LAYOUT_RE)
				39
				40	class BenchDataPoint:
				41	"""A single data point produced by bench.
				42	"""
				43	def __init__(self, bench, config, time_type, time, settings,
				44	tile_layout='', per_tile_values=[], per_iter_time=[]):
				45	# string name of the benchmark to measure
				46	self.bench = bench
				47	# string name of the configurations to run
				48	self.config = config
				49	# type of the timer in string: '' (walltime), 'c' (cpu) or 'g' (gpu)
				50	self.time_type = time_type
				51	# float number of the bench time value
				52	self.time = time
				53	# dictionary of the run settings
				54	self.settings = settings
				55	# how tiles cover the whole picture: '5x3' means 5 columns and 3 rows
				56	self.tile_layout = tile_layout
				57	# list of float for per_tile bench values, if applicable
				58	self.per_tile_values = per_tile_values
				59	# list of float for per-iteration bench time, if applicable
				60	self.per_iter_time = per_iter_time
				61
				62	def __repr__(self):
				63	return "BenchDataPoint(%s, %s, %s, %s, %s)" % (
				64	str(self.bench),
				65	str(self.config),
				66	str(self.time_type),
				67	str(self.time),
				68	str(self.settings),
				69	)
				70
				71	class _ExtremeType(object):
				72	"""Instances of this class compare greater or less than other objects."""
				73	def __init__(self, cmpr, rep):
				74	object.__init__(self)
				75	self._cmpr = cmpr
				76	self._rep = rep
				77
				78	def __cmp__(self, other):
				79	if isinstance(other, self.__class__) and other._cmpr == self._cmpr:
				80	return 0
				81	return self._cmpr
				82
				83	def __repr__(self):
				84	return self._rep
				85
				86	Max = _ExtremeType(1, "Max")
				87	Min = _ExtremeType(-1, "Min")
				88
				89	class _ListAlgorithm(object):
				90	"""Algorithm for selecting the representation value from a given list.
				91	representation is one of the ALGORITHM_XXX representation types."""
				92	def __init__(self, data, representation=None):
				93	if not representation:
				94	representation = ALGORITHM_AVERAGE # default algorithm
				95	self._data = data
				96	self._len = len(data)
				97	if representation == ALGORITHM_AVERAGE:
				98	self._rep = sum(self._data) / self._len
				99	else:
				100	self._data.sort()
				101	if representation == ALGORITHM_MINIMUM:
				102	self._rep = self._data[0]
				103	else:
				104	# for percentiles, we use the value below which x% of values are
				105	# found, which allows for better detection of quantum behaviors.
				106	if representation == ALGORITHM_MEDIAN:
				107	x = int(round(0.5 * self._len + 0.5))
				108	elif representation == ALGORITHM_25TH_PERCENTILE:
				109	x = int(round(0.25 * self._len + 0.5))
				110	else:
				111	raise Exception("invalid representation algorithm %s!" %
				112	representation)
				113	self._rep = self._data[x - 1]
				114
				115	def compute(self):
				116	return self._rep
				117
				118	def _ParseAndStoreTimes(config_re_compiled, is_per_tile, line, bench,
				119	value_dic, layout_dic):
				120	"""Parses given bench time line with regex and adds data to value_dic.
				121
				122	config_re_compiled: precompiled regular expression for parsing the config
				123	line.
				124	is_per_tile: boolean indicating whether this is a per-tile bench.
				125	If so, we add tile layout into layout_dic as well.
				126	line: input string line to parse.
				127	bench: name of bench for the time values.
				128	value_dic: dictionary to store bench values. See bench_dic in parse() below.
				129	layout_dic: dictionary to store tile layouts. See parse() for descriptions.
				130	"""
				131
				132	for config in config_re_compiled.finditer(line):
				133	current_config = config.group(1)
				134	tile_layout = ''
				135	if is_per_tile: # per-tile bench, add name prefix
				136	current_config = 'tile_' + current_config
				137	layouts = TILE_LAYOUT_RE_COMPILED.search(line)
				138	if layouts and len(layouts.groups()) == 2:
				139	tile_layout = '%sx%s' % layouts.groups()
				140	times = config.group(2)
				141	for new_time in TIME_RE_COMPILED.finditer(times):
				142	current_time_type = new_time.group(1)
				143	iters = [float(i) for i in
				144	new_time.group(2).strip().split(',')]
				145	value_dic.setdefault(bench, {}).setdefault(
				146	current_config, {}).setdefault(current_time_type, []).append(
				147	iters)
				148	layout_dic.setdefault(bench, {}).setdefault(
				149	current_config, {}).setdefault(current_time_type, tile_layout)
				150
				151	def parse_skp_bench_data(directory, revision, rep, default_settings=None):
				152	"""Parses all the skp bench data in the given directory.
				153
				154	Args:
				155	directory: string of path to input data directory.
				156	revision: git hash revision that matches the data to process.
				157	rep: bench representation algorithm, see bench_util.py.
				158	default_settings: dictionary of other run settings. See writer.option() in
				159	bench/benchmain.cpp.
				160
				161	Returns:
				162	A list of BenchDataPoint objects.
				163	"""
				164	revision_data_points = []
				165	file_list = os.listdir(directory)
				166	file_list.sort()
				167	for bench_file in file_list:
				168	scalar_type = None
				169	# Scalar type, if any, is in the bench filename after 'scalar_'.
				170	if (bench_file.startswith('bench_' + revision + '_data_')):
				171	if bench_file.find('scalar_') > 0:
				172	components = bench_file.split('_')
				173	scalar_type = components[components.index('scalar') + 1]
				174	else: # Skips non skp bench files.
				175	continue
				176
				177	with open('/'.join([directory, bench_file]), 'r') as file_handle:
				178	settings = dict(default_settings or {})
				179	settings['scalar'] = scalar_type
				180	revision_data_points.extend(parse(settings, file_handle, rep))
				181
				182	return revision_data_points
				183
				184	# TODO(bensong): switch to reading JSON output when available. This way we don't
				185	# need the RE complexities.
				186	def parse(settings, lines, representation=None):
				187	"""Parses bench output into a useful data structure.
				188
				189	({str:str}, __iter__ -> str) -> [BenchDataPoint]
				190	representation is one of the ALGORITHM_XXX types."""
				191
				192	benches = []
				193	current_bench = None
				194	# [bench][config][time_type] -> [[per-iter values]] where per-tile config
				195	# has per-iter value list for each tile [[<tile1_iter1>,<tile1_iter2>,...],
				196	# [<tile2_iter1>,<tile2_iter2>,...],...], while non-per-tile config only
				197	# contains one list of iterations [[iter1, iter2, ...]].
				198	bench_dic = {}
				199	# [bench][config][time_type] -> tile_layout
				200	layout_dic = {}
				201
				202	for line in lines:
				203
				204	# see if this line is a settings line
				205	settingsMatch = SETTINGS_RE_COMPILED.search(line)
				206	if (settingsMatch):
				207	settings = dict(settings)
				208	for settingMatch in PER_SETTING_RE_COMPILED.finditer(settingsMatch.group(1)):
				209	if (settingMatch.group(2)):
				210	settings[settingMatch.group(1)] = settingMatch.group(2)
				211	else:
				212	settings[settingMatch.group(1)] = True
				213
				214	# see if this line starts a new bench
				215	new_bench = BENCH_RE_COMPILED.search(line)
				216	if new_bench:
				217	current_bench = new_bench.group(1)
				218
				219	# add configs on this line to the bench_dic
				220	if current_bench:
				221	if line.startswith(' tile_') :
				222	_ParseAndStoreTimes(TILE_RE_COMPILED, True, line, current_bench,
				223	bench_dic, layout_dic)
				224	else:
				225	_ParseAndStoreTimes(CONFIG_RE_COMPILED, False, line,
				226	current_bench, bench_dic, layout_dic)
				227
				228	# append benches to list
				229	for bench in bench_dic:
				230	for config in bench_dic[bench]:
				231	for time_type in bench_dic[bench][config]:
				232	tile_layout = ''
				233	per_tile_values = [] # empty for non-per-tile configs
				234	per_iter_time = [] # empty for per-tile configs
				235	bench_summary = None # a single final bench value
				236	if len(bench_dic[bench][config][time_type]) > 1:
				237	# per-tile config; compute representation for each tile
				238	per_tile_values = [
				239	_ListAlgorithm(iters, representation).compute()
				240	for iters in bench_dic[bench][config][time_type]]
				241	# use sum of each tile representation for total bench value
				242	bench_summary = sum(per_tile_values)
				243	# extract tile layout
				244	tile_layout = layout_dic[bench][config][time_type]
				245	else:
				246	# get the list of per-iteration values
				247	per_iter_time = bench_dic[bench][config][time_type][0]
				248	bench_summary = _ListAlgorithm(
				249	per_iter_time, representation).compute()
				250	benches.append(BenchDataPoint(
				251	bench,
				252	config,
				253	time_type,
				254	bench_summary,
				255	settings,
				256	tile_layout,
				257	per_tile_values,
				258	per_iter_time))
				259
				260	return benches
				261
				262	class LinearRegression:
				263	"""Linear regression data based on a set of data points.
				264
				265	([(Number,Number)])
				266	There must be at least two points for this to make sense."""
				267	def __init__(self, points):
				268	n = len(points)
				269	max_x = Min
				270	min_x = Max
				271
				272	Sx = 0.0
				273	Sy = 0.0
				274	Sxx = 0.0
				275	Sxy = 0.0
				276	Syy = 0.0
				277	for point in points:
				278	x = point[0]
				279	y = point[1]
				280	max_x = max(max_x, x)
				281	min_x = min(min_x, x)
				282
				283	Sx += x
				284	Sy += y
				285	Sxx += x*x
				286	Sxy += x*y
				287	Syy += y*y
				288
				289	denom = nSxx - SxSx
				290	if (denom != 0.0):
				291	B = (nSxy - SxSy) / denom
				292	else:
				293	B = 0.0
				294	a = (1.0/n)(Sy - BSx)
				295
				296	se2 = 0
				297	sB2 = 0
				298	sa2 = 0
				299	if (n >= 3 and denom != 0.0):
				300	se2 = (1.0/(n(n-2)) (nSyy - SySy - BBdenom))
				301	sB2 = (n*se2) / denom
				302	sa2 = sB2 * (1.0/n) * Sxx
				303
				304
				305	self.slope = B
				306	self.intercept = a
				307	self.serror = math.sqrt(max(0, se2))
				308	self.serror_slope = math.sqrt(max(0, sB2))
				309	self.serror_intercept = math.sqrt(max(0, sa2))
				310	self.max_x = max_x
				311	self.min_x = min_x
				312
				313	def __repr__(self):
				314	return "LinearRegression(%s, %s, %s, %s, %s)" % (
				315	str(self.slope),
				316	str(self.intercept),
				317	str(self.serror),
				318	str(self.serror_slope),
				319	str(self.serror_intercept),
				320	)
				321
				322	def find_min_slope(self):
				323	"""Finds the minimal slope given one standard deviation."""
				324	slope = self.slope
				325	intercept = self.intercept
				326	error = self.serror
				327	regr_start = self.min_x
				328	regr_end = self.max_x
				329	regr_width = regr_end - regr_start
				330
				331	if slope < 0:
				332	lower_left_y = slope*regr_start + intercept - error
				333	upper_right_y = slope*regr_end + intercept + error
				334	return min(0, (upper_right_y - lower_left_y) / regr_width)
				335
				336	elif slope > 0:
				337	upper_left_y = slope*regr_start + intercept + error
				338	lower_right_y = slope*regr_end + intercept - error
				339	return max(0, (lower_right_y - upper_left_y) / regr_width)
				340
				341	return 0
				342
				343	def CreateRevisionLink(revision_number):
				344	"""Returns HTML displaying the given revision number and linking to
				345	that revision's change page at code.google.com, e.g.
				346	http://code.google.com/p/skia/source/detail?r=2056
				347	"""
				348	return '<a href="http://code.google.com/p/skia/source/detail?r=%s">%s</a>'%(
				349	revision_number, revision_number)
				350
				351	def main():
				352	foo = [[0.0, 0.0], [0.0, 1.0], [0.0, 2.0], [0.0, 3.0]]
				353	LinearRegression(foo)
				354
				355	if __name__ == "__main__":
				356	main()