blob: fdbc10df14be7aeac28523f0b7eae045d0d16f17 [file] [log] [blame]
Henrique Nakashimaf24fc1e2017-08-03 13:29:22 -04001# Copyright 2017 The PDFium Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5"""Classes that draw conclusions out of a comparison and represent them."""
6
7from collections import Counter
8
9
10FORMAT_RED = '\033[01;31m{0}\033[00m'
11FORMAT_GREEN = '\033[01;32m{0}\033[00m'
12FORMAT_MAGENTA = '\033[01;35m{0}\033[00m'
13FORMAT_CYAN = '\033[01;36m{0}\033[00m'
14FORMAT_NORMAL = '{0}'
15
16RATING_FAILURE = 'failure'
17RATING_REGRESSION = 'regression'
18RATING_IMPROVEMENT = 'improvement'
19RATING_NO_CHANGE = 'no_change'
20RATING_SMALL_CHANGE = 'small_change'
21
22RATINGS = [
23 RATING_FAILURE,
24 RATING_REGRESSION,
25 RATING_IMPROVEMENT,
26 RATING_NO_CHANGE,
27 RATING_SMALL_CHANGE
28]
29
30RATING_TO_COLOR = {
31 RATING_FAILURE: FORMAT_MAGENTA,
32 RATING_REGRESSION: FORMAT_RED,
33 RATING_IMPROVEMENT: FORMAT_CYAN,
34 RATING_NO_CHANGE: FORMAT_GREEN,
35 RATING_SMALL_CHANGE: FORMAT_NORMAL,
36}
37
38
39class ComparisonConclusions(object):
40 """All conclusions drawn from a comparison.
41
42 This is initialized empty and then processes pairs of results for each test
43 case, determining the rating for that case, which can be:
44 "failure" if either or both runs for the case failed.
45 "regression" if there is a significant increase in time for the test case.
46 "improvement" if there is a significant decrease in time for the test case.
47 "no_change" if the time for the test case did not change at all.
48 "small_change" if the time for the test case changed but within the threshold.
49 """
50
51 def __init__(self, threshold_significant):
52 """Initializes an empty ComparisonConclusions.
53
54 Args:
55 threshold_significant: Float with the tolerance beyond which changes in
56 measurements are considered significant.
57
58 The change is considered as a multiplication rather than an addition
59 of a fraction of the previous measurement, that is, a
60 threshold_significant of 1.0 will flag test cases that became over
61 100% slower (> 200% of the previous time measured) or over 100% faster
62 (< 50% of the previous time measured).
63
64 threshold_significant 0.02 -> 98.04% to 102% is not significant
65 threshold_significant 0.1 -> 90.9% to 110% is not significant
66 threshold_significant 0.25 -> 80% to 125% is not significant
67 threshold_significant 1 -> 50% to 200% is not significant
68 threshold_significant 4 -> 20% to 500% is not significant
69
70 """
71 self.threshold_significant = threshold_significant
72 self.threshold_significant_negative = (1 / (1 + threshold_significant)) - 1
73
74 self.params = {'threshold': threshold_significant}
75 self.summary = ComparisonSummary()
76 self.case_results = {}
77
78 def ProcessCase(self, case_name, before, after):
79 """Feeds a test case results to the ComparisonConclusions.
80
81 Args:
82 case_name: String identifying the case.
83 before: Measurement for the "before" version of the code.
84 after: Measurement for the "after" version of the code.
85 """
86
87 # Switch 0 to None to simplify the json dict output. All zeros are
88 # considered failed runs, so they will be represented by "null".
89 if not before:
90 before = None
91 if not after:
92 after = None
93
94 if not before or not after:
95 ratio = None
96 rating = RATING_FAILURE
97 else:
98 ratio = (float(after) / before) - 1.0
99 if ratio > self.threshold_significant:
100 rating = RATING_REGRESSION
101 elif ratio < self.threshold_significant_negative:
102 rating = RATING_IMPROVEMENT
103 elif ratio == 0:
104 rating = RATING_NO_CHANGE
105 else:
106 rating = RATING_SMALL_CHANGE
107
108 case_result = CaseResult(case_name, before, after, ratio, rating)
109
110 self.summary.ProcessCaseResult(case_result)
111 self.case_results[case_name] = case_result
112
113 def GetSummary(self):
114 """Gets the ComparisonSummary with consolidated totals."""
115 return self.summary
116
117 def GetCaseResults(self):
118 """Gets a dict mapping each test case identifier to its CaseResult."""
119 return self.case_results
120
121 def GetOutputDict(self):
122 """Returns a conclusions dict with all the conclusions drawn.
123
124 Returns:
125 A serializable dict with the format illustrated below:
126 {
Henrique Nakashimaf76741e2017-09-07 12:16:05 -0400127 "version": 1,
Henrique Nakashimaf24fc1e2017-08-03 13:29:22 -0400128 "params": {
129 "threshold": 0.02
130 },
131 "summary": {
132 "total": 123,
133 "failure": 1,
134 "regression": 2,
135 "improvement": 1,
136 "no_change": 100,
137 "small_change": 19
138 },
139 "comparison_by_case": {
140 "testing/resources/new_test.pdf": {
141 "before": None,
142 "after": 1000,
143 "ratio": None,
144 "rating": "failure"
145 },
146 "testing/resources/test1.pdf": {
147 "before": 100,
148 "after": 120,
149 "ratio": 0.2,
150 "rating": "regression"
151 },
152 "testing/resources/test2.pdf": {
153 "before": 100,
154 "after": 2000,
155 "ratio": 19.0,
156 "rating": "regression"
157 },
158 "testing/resources/test3.pdf": {
159 "before": 1000,
160 "after": 1005,
161 "ratio": 0.005,
162 "rating": "small_change"
163 },
164 "testing/resources/test4.pdf": {
165 "before": 1000,
166 "after": 1000,
167 "ratio": 0.0,
168 "rating": "no_change"
169 },
170 "testing/resources/test5.pdf": {
171 "before": 1000,
172 "after": 600,
173 "ratio": -0.4,
174 "rating": "improvement"
175 }
176 }
177 }
178 """
179 output_dict = {}
Henrique Nakashimaf76741e2017-09-07 12:16:05 -0400180 output_dict['version'] = 1
Henrique Nakashimaf24fc1e2017-08-03 13:29:22 -0400181 output_dict['params'] = {'threshold': self.threshold_significant}
182 output_dict['summary'] = self.summary.GetOutputDict()
183 output_dict['comparison_by_case'] = {
Henrique Nakashima0da39e62017-08-15 14:37:58 -0400184 cr.case_name.decode('utf-8'): cr.GetOutputDict()
Henrique Nakashimaf24fc1e2017-08-03 13:29:22 -0400185 for cr in self.GetCaseResults().values()
186 }
187 return output_dict
188
189
190class ComparisonSummary(object):
191 """Totals computed for a comparison."""
192
193 def __init__(self):
194 self.rating_counter = Counter()
195
196 def ProcessCaseResult(self, case_result):
197 self.rating_counter[case_result.rating] += 1
198
199 def GetTotal(self):
200 """Gets the number of test cases processed."""
201 return sum(self.rating_counter.values())
202
203 def GetCount(self, rating):
204 """Gets the number of test cases processed with a given rating."""
205 return self.rating_counter[rating]
206
207 def GetOutputDict(self):
208 """Returns a dict that can be serialized with all the totals."""
209 result = {'total': self.GetTotal()}
210 for rating in RATINGS:
211 result[rating] = self.GetCount(rating)
212 return result
213
214
215class CaseResult(object):
216 """The conclusion for the comparison of a single test case."""
217
218 def __init__(self, case_name, before, after, ratio, rating):
219 """Initializes an empty ComparisonConclusions.
220
221 Args:
222 case_name: String identifying the case.
223 before: Measurement for the "before" version of the code.
224 after: Measurement for the "after" version of the code.
225 ratio: Difference between |after| and |before| as a fraction of |before|.
226 rating: Rating for this test case.
227 """
228 self.case_name = case_name
229 self.before = before
230 self.after = after
231 self.ratio = ratio
232 self.rating = rating
233
234 def GetOutputDict(self):
235 """Returns a dict with the test case's conclusions."""
236 return {'before': self.before,
237 'after': self.after,
238 'ratio': self.ratio,
239 'rating': self.rating}
240
241
242def PrintConclusionsDictHumanReadable(conclusions_dict, colored, key=None):
243 """Prints a conclusions dict in a human-readable way.
244
245 Args:
246 conclusions_dict: Dict to print.
247 colored: Whether to color the output to highlight significant changes.
248 key: String with the CaseResult dictionary key to sort the cases.
249 """
250 # Print header
251 print '=' * 80
252 print '{0:>11s} {1:>15s} {2}' .format(
253 '% Change',
254 'Time after',
255 'Test case')
256 print '-' * 80
257
258 color = FORMAT_NORMAL
259
260 # Print cases
261 if key is not None:
262 case_pairs = sorted(conclusions_dict['comparison_by_case'].iteritems(),
263 key=lambda kv: kv[1][key])
264 else:
265 case_pairs = sorted(conclusions_dict['comparison_by_case'].iteritems())
266
267 for case_name, case_dict in case_pairs:
Henrique Nakashimaf24fc1e2017-08-03 13:29:22 -0400268 if colored:
269 color = RATING_TO_COLOR[case_dict['rating']]
270
Henrique Nakashimae6f3fcc2017-08-22 11:03:33 -0400271 if case_dict['rating'] == RATING_FAILURE:
272 print u'{} to measure time for {}'.format(
273 color.format('Failed'),
274 case_name).encode('utf-8')
275 continue
276
Henrique Nakashima0da39e62017-08-15 14:37:58 -0400277 print u'{0} {1:15,d} {2}' .format(
Henrique Nakashimaf24fc1e2017-08-03 13:29:22 -0400278 color.format('{:+11.4%}'.format(case_dict['ratio'])),
279 case_dict['after'],
Henrique Nakashima0da39e62017-08-15 14:37:58 -0400280 case_name).encode('utf-8')
Henrique Nakashimaf24fc1e2017-08-03 13:29:22 -0400281
282 # Print totals
283 totals = conclusions_dict['summary']
284 print '=' * 80
285 print 'Test cases run: %d' % totals['total']
286
287 if colored:
288 color = FORMAT_MAGENTA if totals[RATING_FAILURE] else FORMAT_GREEN
289 print ('Failed to measure: %s'
290 % color.format(totals[RATING_FAILURE]))
291
292 if colored:
293 color = FORMAT_RED if totals[RATING_REGRESSION] else FORMAT_GREEN
294 print ('Regressions: %s'
295 % color.format(totals[RATING_REGRESSION]))
296
297 if colored:
298 color = FORMAT_CYAN if totals[RATING_IMPROVEMENT] else FORMAT_GREEN
299 print ('Improvements: %s'
300 % color.format(totals[RATING_IMPROVEMENT]))