blob: 0d6b1cbf1d5f1b70de35633b18731ea28d352896 [file] [log] [blame]
bungeman@google.com85669f92011-06-17 13:58:14 +00001'''
2Created on May 19, 2011
3
4@author: bungeman
5'''
6
7import re
8import math
9
bensong@google.comd3fd98f2012-12-18 20:06:10 +000010# bench representation algorithm constant names
11ALGORITHM_AVERAGE = 'avg'
12ALGORITHM_MEDIAN = 'med'
13ALGORITHM_MINIMUM = 'min'
14ALGORITHM_25TH_PERCENTILE = '25th'
15
bungeman@google.com85669f92011-06-17 13:58:14 +000016class BenchDataPoint:
17 """A single data point produced by bench.
bensong@google.comd3fd98f2012-12-18 20:06:10 +000018
bensong@google.comba98f952013-02-13 23:22:29 +000019 (str, str, str, float, {str:str}, str, [floats])"""
20 def __init__(self, bench, config, time_type, time, settings,
21 tile_layout='', per_tile_values=[]):
bungeman@google.com85669f92011-06-17 13:58:14 +000022 self.bench = bench
23 self.config = config
24 self.time_type = time_type
25 self.time = time
26 self.settings = settings
bensong@google.comba98f952013-02-13 23:22:29 +000027 # how tiles cover the whole picture. '5x3' means 5 columns and 3 rows.
28 self.tile_layout = tile_layout
29 # list of per_tile bench values, if applicable
30 self.per_tile_values = per_tile_values
bensong@google.comd3fd98f2012-12-18 20:06:10 +000031
bungeman@google.com85669f92011-06-17 13:58:14 +000032 def __repr__(self):
33 return "BenchDataPoint(%s, %s, %s, %s, %s)" % (
34 str(self.bench),
35 str(self.config),
36 str(self.time_type),
37 str(self.time),
38 str(self.settings),
39 )
bensong@google.comd3fd98f2012-12-18 20:06:10 +000040
bungeman@google.com85669f92011-06-17 13:58:14 +000041class _ExtremeType(object):
42 """Instances of this class compare greater or less than other objects."""
43 def __init__(self, cmpr, rep):
44 object.__init__(self)
45 self._cmpr = cmpr
46 self._rep = rep
bensong@google.comd3fd98f2012-12-18 20:06:10 +000047
bungeman@google.com85669f92011-06-17 13:58:14 +000048 def __cmp__(self, other):
49 if isinstance(other, self.__class__) and other._cmpr == self._cmpr:
50 return 0
51 return self._cmpr
bensong@google.comd3fd98f2012-12-18 20:06:10 +000052
bungeman@google.com85669f92011-06-17 13:58:14 +000053 def __repr__(self):
54 return self._rep
55
56Max = _ExtremeType(1, "Max")
57Min = _ExtremeType(-1, "Min")
58
bensong@google.comb6204b12012-08-16 20:49:28 +000059class _ListAlgorithm(object):
60 """Algorithm for selecting the representation value from a given list.
bensong@google.comd3fd98f2012-12-18 20:06:10 +000061 representation is one of the ALGORITHM_XXX representation types."""
bensong@google.comb6204b12012-08-16 20:49:28 +000062 def __init__(self, data, representation=None):
63 if not representation:
bensong@google.comd3fd98f2012-12-18 20:06:10 +000064 representation = ALGORITHM_AVERAGE # default algorithm
bensong@google.comb6204b12012-08-16 20:49:28 +000065 self._data = data
66 self._len = len(data)
bensong@google.comd3fd98f2012-12-18 20:06:10 +000067 if representation == ALGORITHM_AVERAGE:
bensong@google.comb6204b12012-08-16 20:49:28 +000068 self._rep = sum(self._data) / self._len
69 else:
70 self._data.sort()
bensong@google.comd3fd98f2012-12-18 20:06:10 +000071 if representation == ALGORITHM_MINIMUM:
bensong@google.comb6204b12012-08-16 20:49:28 +000072 self._rep = self._data[0]
73 else:
74 # for percentiles, we use the value below which x% of values are
75 # found, which allows for better detection of quantum behaviors.
bensong@google.comd3fd98f2012-12-18 20:06:10 +000076 if representation == ALGORITHM_MEDIAN:
bensong@google.comb6204b12012-08-16 20:49:28 +000077 x = int(round(0.5 * self._len + 0.5))
bensong@google.comd3fd98f2012-12-18 20:06:10 +000078 elif representation == ALGORITHM_25TH_PERCENTILE:
bensong@google.comb6204b12012-08-16 20:49:28 +000079 x = int(round(0.25 * self._len + 0.5))
80 else:
81 raise Exception("invalid representation algorithm %s!" %
82 representation)
83 self._rep = self._data[x - 1]
84
85 def compute(self):
86 return self._rep
87
bensong@google.comba98f952013-02-13 23:22:29 +000088def _ParseAndStoreTimes(config_re, time_re, line, bench, value_dic, layout_dic,
bensong@google.comd3fd98f2012-12-18 20:06:10 +000089 representation=None):
bensong@google.comba98f952013-02-13 23:22:29 +000090 """Parses given bench time line with regex and adds data to value_dic.
91 For per-tile benches, adds tile layout into layout_dic as well.
bensong@google.comd3fd98f2012-12-18 20:06:10 +000092 config_re: regular expression for parsing the config line.
93 time_re: regular expression for parsing bench time.
94 line: input string line to parse.
95 bench: name of bench for the time values.
bensong@google.comba98f952013-02-13 23:22:29 +000096 value_dic: dictionary to store bench values. See bench_dic in parse() below.
97 layout_dic: dictionary to store tile layouts. See parse() for descriptions.
bensong@google.comd3fd98f2012-12-18 20:06:10 +000098 representation: should match one of the ALGORITHM_XXX types."""
99
bensong@google.comba98f952013-02-13 23:22:29 +0000100 # for extracting tile layout
101 tile_layout_re = ' out of \[(\d+),(\d+)\] <averaged>: '
102
bensong@google.comd3fd98f2012-12-18 20:06:10 +0000103 for config in re.finditer(config_re, line):
104 current_config = config.group(1)
bensong@google.comba98f952013-02-13 23:22:29 +0000105 tile_layout = ''
bensong@google.comd3fd98f2012-12-18 20:06:10 +0000106 if config_re.startswith(' tile_'): # per-tile bench, add name prefix
107 current_config = 'tile_' + current_config
bensong@google.comba98f952013-02-13 23:22:29 +0000108 layouts = re.search(tile_layout_re, line)
109 if layouts and len(layouts.groups()) == 2:
110 tile_layout = '%sx%s' % layouts.groups()
bensong@google.comd3fd98f2012-12-18 20:06:10 +0000111 times = config.group(2)
112 for new_time in re.finditer(time_re, times):
113 current_time_type = new_time.group(1)
114 iters = [float(i) for i in
115 new_time.group(2).strip().split(',')]
bensong@google.comba98f952013-02-13 23:22:29 +0000116 value_dic.setdefault(bench, {}).setdefault(
117 current_config, {}).setdefault(current_time_type, []).append(
118 _ListAlgorithm(iters, representation).compute())
119 layout_dic.setdefault(bench, {}).setdefault(
120 current_config, {}).setdefault(current_time_type, tile_layout)
bensong@google.comd3fd98f2012-12-18 20:06:10 +0000121
122def parse(settings, lines, representation=None):
bungeman@google.com85669f92011-06-17 13:58:14 +0000123 """Parses bench output into a useful data structure.
bensong@google.comd3fd98f2012-12-18 20:06:10 +0000124
bensong@google.com87348162012-08-15 17:31:46 +0000125 ({str:str}, __iter__ -> str) -> [BenchDataPoint]
bensong@google.comd3fd98f2012-12-18 20:06:10 +0000126 representation is one of the ALGORITHM_XXX types."""
127
bungeman@google.com85669f92011-06-17 13:58:14 +0000128 benches = []
129 current_bench = None
bensong@google.comd3fd98f2012-12-18 20:06:10 +0000130 bench_dic = {} # [bench][config][time_type] -> [list of bench values]
bensong@google.comba98f952013-02-13 23:22:29 +0000131 # [bench][config][time_type] -> tile_layout
132 layout_dic = {}
bungeman@google.com85669f92011-06-17 13:58:14 +0000133 setting_re = '([^\s=]+)(?:=(\S+))?'
134 settings_re = 'skia bench:((?:\s+' + setting_re + ')*)'
135 bench_re = 'running bench (?:\[\d+ \d+\] )?\s*(\S+)'
bensong@google.comaf3d79a2012-07-02 20:48:51 +0000136 time_re = '(?:(\w*)msecs = )?\s*((?:\d+\.\d+)(?:,\d+\.\d+)*)'
bensong@google.comff388f82013-02-06 20:59:14 +0000137 # non-per-tile benches have configs that don't end with ']' or '>'
138 config_re = '(\S+[^\]>]): ((?:' + time_re + '\s+)+)'
bensong@google.com7fa58ac2013-01-23 20:51:17 +0000139 # per-tile bench lines are in the following format. Note that there are
140 # non-averaged bench numbers in separate lines, which we ignore now due to
141 # their inaccuracy.
142 tile_re = (' tile_(\S+): tile \[\d+,\d+\] out of \[\d+,\d+\] <averaged>:'
143 ' ((?:' + time_re + '\s+)+)')
bensong@google.comd3fd98f2012-12-18 20:06:10 +0000144
bungeman@google.com85669f92011-06-17 13:58:14 +0000145 for line in lines:
bensong@google.comd3fd98f2012-12-18 20:06:10 +0000146
147 # see if this line is a settings line
bungeman@google.com85669f92011-06-17 13:58:14 +0000148 settingsMatch = re.search(settings_re, line)
149 if (settingsMatch):
150 settings = dict(settings)
151 for settingMatch in re.finditer(setting_re, settingsMatch.group(1)):
152 if (settingMatch.group(2)):
153 settings[settingMatch.group(1)] = settingMatch.group(2)
154 else:
155 settings[settingMatch.group(1)] = True
bensong@google.comd3fd98f2012-12-18 20:06:10 +0000156
157 # see if this line starts a new bench
bungeman@google.com85669f92011-06-17 13:58:14 +0000158 new_bench = re.search(bench_re, line)
159 if new_bench:
160 current_bench = new_bench.group(1)
bensong@google.comd3fd98f2012-12-18 20:06:10 +0000161
162 # add configs on this line to the bench_dic
bungeman@google.com85669f92011-06-17 13:58:14 +0000163 if current_bench:
bensong@google.comd3fd98f2012-12-18 20:06:10 +0000164 for regex in [config_re, tile_re]:
165 _ParseAndStoreTimes(regex, time_re, line, current_bench,
bensong@google.comba98f952013-02-13 23:22:29 +0000166 bench_dic, layout_dic, representation)
bensong@google.comd3fd98f2012-12-18 20:06:10 +0000167
168 # append benches to list, use the total time as final bench value.
169 for bench in bench_dic:
170 for config in bench_dic[bench]:
171 for time_type in bench_dic[bench][config]:
bensong@google.comba98f952013-02-13 23:22:29 +0000172 tile_layout = ''
173 per_tile_values = []
174 if len(bench_dic[bench][config][time_type]) > 1:
175 # per-tile values, extract tile_layout
176 per_tile_values = bench_dic[bench][config][time_type]
177 tile_layout = layout_dic[bench][config][time_type]
bensong@google.comd3fd98f2012-12-18 20:06:10 +0000178 benches.append(BenchDataPoint(
179 bench,
180 config,
181 time_type,
182 sum(bench_dic[bench][config][time_type]),
bensong@google.comba98f952013-02-13 23:22:29 +0000183 settings,
184 tile_layout,
185 per_tile_values))
bensong@google.comd3fd98f2012-12-18 20:06:10 +0000186
bungeman@google.com85669f92011-06-17 13:58:14 +0000187 return benches
bensong@google.comd3fd98f2012-12-18 20:06:10 +0000188
bungeman@google.com85669f92011-06-17 13:58:14 +0000189class LinearRegression:
190 """Linear regression data based on a set of data points.
bensong@google.comd3fd98f2012-12-18 20:06:10 +0000191
bungeman@google.com85669f92011-06-17 13:58:14 +0000192 ([(Number,Number)])
193 There must be at least two points for this to make sense."""
194 def __init__(self, points):
195 n = len(points)
196 max_x = Min
197 min_x = Max
bensong@google.comd3fd98f2012-12-18 20:06:10 +0000198
bungeman@google.com85669f92011-06-17 13:58:14 +0000199 Sx = 0.0
200 Sy = 0.0
201 Sxx = 0.0
202 Sxy = 0.0
203 Syy = 0.0
204 for point in points:
205 x = point[0]
206 y = point[1]
207 max_x = max(max_x, x)
208 min_x = min(min_x, x)
bensong@google.comd3fd98f2012-12-18 20:06:10 +0000209
bungeman@google.com85669f92011-06-17 13:58:14 +0000210 Sx += x
211 Sy += y
212 Sxx += x*x
213 Sxy += x*y
214 Syy += y*y
bensong@google.comd3fd98f2012-12-18 20:06:10 +0000215
senorblanco@chromium.orgc5e1ed82012-09-20 19:05:33 +0000216 denom = n*Sxx - Sx*Sx
217 if (denom != 0.0):
218 B = (n*Sxy - Sx*Sy) / denom
219 else:
220 B = 0.0
bungeman@google.com85669f92011-06-17 13:58:14 +0000221 a = (1.0/n)*(Sy - B*Sx)
bensong@google.comd3fd98f2012-12-18 20:06:10 +0000222
bungeman@google.com85669f92011-06-17 13:58:14 +0000223 se2 = 0
224 sB2 = 0
225 sa2 = 0
senorblanco@chromium.orgc5e1ed82012-09-20 19:05:33 +0000226 if (n >= 3 and denom != 0.0):
227 se2 = (1.0/(n*(n-2)) * (n*Syy - Sy*Sy - B*B*denom))
228 sB2 = (n*se2) / denom
bungeman@google.com85669f92011-06-17 13:58:14 +0000229 sa2 = sB2 * (1.0/n) * Sxx
bensong@google.comd3fd98f2012-12-18 20:06:10 +0000230
231
bungeman@google.com85669f92011-06-17 13:58:14 +0000232 self.slope = B
233 self.intercept = a
234 self.serror = math.sqrt(max(0, se2))
235 self.serror_slope = math.sqrt(max(0, sB2))
236 self.serror_intercept = math.sqrt(max(0, sa2))
237 self.max_x = max_x
238 self.min_x = min_x
bensong@google.comd3fd98f2012-12-18 20:06:10 +0000239
bungeman@google.com85669f92011-06-17 13:58:14 +0000240 def __repr__(self):
241 return "LinearRegression(%s, %s, %s, %s, %s)" % (
242 str(self.slope),
243 str(self.intercept),
244 str(self.serror),
245 str(self.serror_slope),
246 str(self.serror_intercept),
247 )
bensong@google.comd3fd98f2012-12-18 20:06:10 +0000248
bungeman@google.com85669f92011-06-17 13:58:14 +0000249 def find_min_slope(self):
250 """Finds the minimal slope given one standard deviation."""
251 slope = self.slope
252 intercept = self.intercept
253 error = self.serror
254 regr_start = self.min_x
255 regr_end = self.max_x
256 regr_width = regr_end - regr_start
bensong@google.comd3fd98f2012-12-18 20:06:10 +0000257
bungeman@google.com85669f92011-06-17 13:58:14 +0000258 if slope < 0:
259 lower_left_y = slope*regr_start + intercept - error
260 upper_right_y = slope*regr_end + intercept + error
261 return min(0, (upper_right_y - lower_left_y) / regr_width)
bensong@google.comd3fd98f2012-12-18 20:06:10 +0000262
bungeman@google.com85669f92011-06-17 13:58:14 +0000263 elif slope > 0:
264 upper_left_y = slope*regr_start + intercept + error
265 lower_right_y = slope*regr_end + intercept - error
266 return max(0, (lower_right_y - upper_left_y) / regr_width)
bensong@google.comd3fd98f2012-12-18 20:06:10 +0000267
bungeman@google.com85669f92011-06-17 13:58:14 +0000268 return 0
epoger@google.comc71174d2011-08-08 17:19:23 +0000269
270def CreateRevisionLink(revision_number):
271 """Returns HTML displaying the given revision number and linking to
272 that revision's change page at code.google.com, e.g.
273 http://code.google.com/p/skia/source/detail?r=2056
274 """
275 return '<a href="http://code.google.com/p/skia/source/detail?r=%s">%s</a>'%(
276 revision_number, revision_number)
senorblanco@chromium.orgc5e1ed82012-09-20 19:05:33 +0000277
278def main():
279 foo = [[0.0, 0.0], [0.0, 1.0], [0.0, 2.0], [0.0, 3.0]]
280 LinearRegression(foo)
281
282if __name__ == "__main__":
283 main()