Blame - testing/tools/safetynet_conclusions.py - platform/external/pdfium

blob: fdbc10df14be7aeac28523f0b7eae045d0d16f17 [file] [log] [blame]

Henrique Nakashima	f24fc1e	2017-08-03 13:29:22 -0400	[diff] [blame]	1	# Copyright 2017 The PDFium Authors. All rights reserved.
				2	# Use of this source code is governed by a BSD-style license that can be
				3	# found in the LICENSE file.
				4
				5	"""Classes that draw conclusions out of a comparison and represent them."""
				6
				7	from collections import Counter
				8
				9
				10	FORMAT_RED = '\033[01;31m{0}\033[00m'
				11	FORMAT_GREEN = '\033[01;32m{0}\033[00m'
				12	FORMAT_MAGENTA = '\033[01;35m{0}\033[00m'
				13	FORMAT_CYAN = '\033[01;36m{0}\033[00m'
				14	FORMAT_NORMAL = '{0}'
				15
				16	RATING_FAILURE = 'failure'
				17	RATING_REGRESSION = 'regression'
				18	RATING_IMPROVEMENT = 'improvement'
				19	RATING_NO_CHANGE = 'no_change'
				20	RATING_SMALL_CHANGE = 'small_change'
				21
				22	RATINGS = [
				23	RATING_FAILURE,
				24	RATING_REGRESSION,
				25	RATING_IMPROVEMENT,
				26	RATING_NO_CHANGE,
				27	RATING_SMALL_CHANGE
				28	]
				29
				30	RATING_TO_COLOR = {
				31	RATING_FAILURE: FORMAT_MAGENTA,
				32	RATING_REGRESSION: FORMAT_RED,
				33	RATING_IMPROVEMENT: FORMAT_CYAN,
				34	RATING_NO_CHANGE: FORMAT_GREEN,
				35	RATING_SMALL_CHANGE: FORMAT_NORMAL,
				36	}
				37
				38
				39	class ComparisonConclusions(object):
				40	"""All conclusions drawn from a comparison.
				41
				42	This is initialized empty and then processes pairs of results for each test
				43	case, determining the rating for that case, which can be:
				44	"failure" if either or both runs for the case failed.
				45	"regression" if there is a significant increase in time for the test case.
				46	"improvement" if there is a significant decrease in time for the test case.
				47	"no_change" if the time for the test case did not change at all.
				48	"small_change" if the time for the test case changed but within the threshold.
				49	"""
				50
				51	def __init__(self, threshold_significant):
				52	"""Initializes an empty ComparisonConclusions.
				53
				54	Args:
				55	threshold_significant: Float with the tolerance beyond which changes in
				56	measurements are considered significant.
				57
				58	The change is considered as a multiplication rather than an addition
				59	of a fraction of the previous measurement, that is, a
				60	threshold_significant of 1.0 will flag test cases that became over
				61	100% slower (> 200% of the previous time measured) or over 100% faster
				62	(< 50% of the previous time measured).
				63
				64	threshold_significant 0.02 -> 98.04% to 102% is not significant
				65	threshold_significant 0.1 -> 90.9% to 110% is not significant
				66	threshold_significant 0.25 -> 80% to 125% is not significant
				67	threshold_significant 1 -> 50% to 200% is not significant
				68	threshold_significant 4 -> 20% to 500% is not significant
				69
				70	"""
				71	self.threshold_significant = threshold_significant
				72	self.threshold_significant_negative = (1 / (1 + threshold_significant)) - 1
				73
				74	self.params = {'threshold': threshold_significant}
				75	self.summary = ComparisonSummary()
				76	self.case_results = {}
				77
				78	def ProcessCase(self, case_name, before, after):
				79	"""Feeds a test case results to the ComparisonConclusions.
				80
				81	Args:
				82	case_name: String identifying the case.
				83	before: Measurement for the "before" version of the code.
				84	after: Measurement for the "after" version of the code.
				85	"""
				86
				87	# Switch 0 to None to simplify the json dict output. All zeros are
				88	# considered failed runs, so they will be represented by "null".
				89	if not before:
				90	before = None
				91	if not after:
				92	after = None
				93
				94	if not before or not after:
				95	ratio = None
				96	rating = RATING_FAILURE
				97	else:
				98	ratio = (float(after) / before) - 1.0
				99	if ratio > self.threshold_significant:
				100	rating = RATING_REGRESSION
				101	elif ratio < self.threshold_significant_negative:
				102	rating = RATING_IMPROVEMENT
				103	elif ratio == 0:
				104	rating = RATING_NO_CHANGE
				105	else:
				106	rating = RATING_SMALL_CHANGE
				107
				108	case_result = CaseResult(case_name, before, after, ratio, rating)
				109
				110	self.summary.ProcessCaseResult(case_result)
				111	self.case_results[case_name] = case_result
				112
				113	def GetSummary(self):
				114	"""Gets the ComparisonSummary with consolidated totals."""
				115	return self.summary
				116
				117	def GetCaseResults(self):
				118	"""Gets a dict mapping each test case identifier to its CaseResult."""
				119	return self.case_results
				120
				121	def GetOutputDict(self):
				122	"""Returns a conclusions dict with all the conclusions drawn.
				123
				124	Returns:
				125	A serializable dict with the format illustrated below:
				126	{
Henrique Nakashima	f76741e	2017-09-07 12:16:05 -0400	[diff] [blame]	127	"version": 1,
Henrique Nakashima	f24fc1e	2017-08-03 13:29:22 -0400	[diff] [blame]	128	"params": {
				129	"threshold": 0.02
				130	},
				131	"summary": {
				132	"total": 123,
				133	"failure": 1,
				134	"regression": 2,
				135	"improvement": 1,
				136	"no_change": 100,
				137	"small_change": 19
				138	},
				139	"comparison_by_case": {
				140	"testing/resources/new_test.pdf": {
				141	"before": None,
				142	"after": 1000,
				143	"ratio": None,
				144	"rating": "failure"
				145	},
				146	"testing/resources/test1.pdf": {
				147	"before": 100,
				148	"after": 120,
				149	"ratio": 0.2,
				150	"rating": "regression"
				151	},
				152	"testing/resources/test2.pdf": {
				153	"before": 100,
				154	"after": 2000,
				155	"ratio": 19.0,
				156	"rating": "regression"
				157	},
				158	"testing/resources/test3.pdf": {
				159	"before": 1000,
				160	"after": 1005,
				161	"ratio": 0.005,
				162	"rating": "small_change"
				163	},
				164	"testing/resources/test4.pdf": {
				165	"before": 1000,
				166	"after": 1000,
				167	"ratio": 0.0,
				168	"rating": "no_change"
				169	},
				170	"testing/resources/test5.pdf": {
				171	"before": 1000,
				172	"after": 600,
				173	"ratio": -0.4,
				174	"rating": "improvement"
				175	}
				176	}
				177	}
				178	"""
				179	output_dict = {}
Henrique Nakashima	f76741e	2017-09-07 12:16:05 -0400	[diff] [blame]	180	output_dict['version'] = 1
Henrique Nakashima	f24fc1e	2017-08-03 13:29:22 -0400	[diff] [blame]	181	output_dict['params'] = {'threshold': self.threshold_significant}
				182	output_dict['summary'] = self.summary.GetOutputDict()
				183	output_dict['comparison_by_case'] = {
Henrique Nakashima	0da39e6	2017-08-15 14:37:58 -0400	[diff] [blame]	184	cr.case_name.decode('utf-8'): cr.GetOutputDict()
Henrique Nakashima	f24fc1e	2017-08-03 13:29:22 -0400	[diff] [blame]	185	for cr in self.GetCaseResults().values()
				186	}
				187	return output_dict
				188
				189
				190	class ComparisonSummary(object):
				191	"""Totals computed for a comparison."""
				192
				193	def __init__(self):
				194	self.rating_counter = Counter()
				195
				196	def ProcessCaseResult(self, case_result):
				197	self.rating_counter[case_result.rating] += 1
				198
				199	def GetTotal(self):
				200	"""Gets the number of test cases processed."""
				201	return sum(self.rating_counter.values())
				202
				203	def GetCount(self, rating):
				204	"""Gets the number of test cases processed with a given rating."""
				205	return self.rating_counter[rating]
				206
				207	def GetOutputDict(self):
				208	"""Returns a dict that can be serialized with all the totals."""
				209	result = {'total': self.GetTotal()}
				210	for rating in RATINGS:
				211	result[rating] = self.GetCount(rating)
				212	return result
				213
				214
				215	class CaseResult(object):
				216	"""The conclusion for the comparison of a single test case."""
				217
				218	def __init__(self, case_name, before, after, ratio, rating):
				219	"""Initializes an empty ComparisonConclusions.
				220
				221	Args:
				222	case_name: String identifying the case.
				223	before: Measurement for the "before" version of the code.
				224	after: Measurement for the "after" version of the code.
				225	ratio: Difference between \|after\| and \|before\| as a fraction of \|before\|.
				226	rating: Rating for this test case.
				227	"""
				228	self.case_name = case_name
				229	self.before = before
				230	self.after = after
				231	self.ratio = ratio
				232	self.rating = rating
				233
				234	def GetOutputDict(self):
				235	"""Returns a dict with the test case's conclusions."""
				236	return {'before': self.before,
				237	'after': self.after,
				238	'ratio': self.ratio,
				239	'rating': self.rating}
				240
				241
				242	def PrintConclusionsDictHumanReadable(conclusions_dict, colored, key=None):
				243	"""Prints a conclusions dict in a human-readable way.
				244
				245	Args:
				246	conclusions_dict: Dict to print.
				247	colored: Whether to color the output to highlight significant changes.
				248	key: String with the CaseResult dictionary key to sort the cases.
				249	"""
				250	# Print header
				251	print '=' * 80
				252	print '{0:>11s} {1:>15s} {2}' .format(
				253	'% Change',
				254	'Time after',
				255	'Test case')
				256	print '-' * 80
				257
				258	color = FORMAT_NORMAL
				259
				260	# Print cases
				261	if key is not None:
				262	case_pairs = sorted(conclusions_dict['comparison_by_case'].iteritems(),
				263	key=lambda kv: kv[1][key])
				264	else:
				265	case_pairs = sorted(conclusions_dict['comparison_by_case'].iteritems())
				266
				267	for case_name, case_dict in case_pairs:
Henrique Nakashima	f24fc1e	2017-08-03 13:29:22 -0400	[diff] [blame]	268	if colored:
				269	color = RATING_TO_COLOR[case_dict['rating']]
				270
Henrique Nakashima	e6f3fcc	2017-08-22 11:03:33 -0400	[diff] [blame]	271	if case_dict['rating'] == RATING_FAILURE:
				272	print u'{} to measure time for {}'.format(
				273	color.format('Failed'),
				274	case_name).encode('utf-8')
				275	continue
				276
Henrique Nakashima	0da39e6	2017-08-15 14:37:58 -0400	[diff] [blame]	277	print u'{0} {1:15,d} {2}' .format(
Henrique Nakashima	f24fc1e	2017-08-03 13:29:22 -0400	[diff] [blame]	278	color.format('{:+11.4%}'.format(case_dict['ratio'])),
				279	case_dict['after'],
Henrique Nakashima	0da39e6	2017-08-15 14:37:58 -0400	[diff] [blame]	280	case_name).encode('utf-8')
Henrique Nakashima	f24fc1e	2017-08-03 13:29:22 -0400	[diff] [blame]	281
				282	# Print totals
				283	totals = conclusions_dict['summary']
				284	print '=' * 80
				285	print 'Test cases run: %d' % totals['total']
				286
				287	if colored:
				288	color = FORMAT_MAGENTA if totals[RATING_FAILURE] else FORMAT_GREEN
				289	print ('Failed to measure: %s'
				290	% color.format(totals[RATING_FAILURE]))
				291
				292	if colored:
				293	color = FORMAT_RED if totals[RATING_REGRESSION] else FORMAT_GREEN
				294	print ('Regressions: %s'
				295	% color.format(totals[RATING_REGRESSION]))
				296
				297	if colored:
				298	color = FORMAT_CYAN if totals[RATING_IMPROVEMENT] else FORMAT_GREEN
				299	print ('Improvements: %s'
				300	% color.format(totals[RATING_IMPROVEMENT]))