| bungeman@google.com | 85669f9 | 2011-06-17 13:58:14 +0000 | [diff] [blame] | 1 | ''' |
| 2 | Created on May 19, 2011 |
| 3 | |
| 4 | @author: bungeman |
| 5 | ''' |
| 6 | |
| 7 | import re |
| 8 | import math |
| 9 | |
| bensong@google.com | d3fd98f | 2012-12-18 20:06:10 +0000 | [diff] [blame] | 10 | # bench representation algorithm constant names |
| 11 | ALGORITHM_AVERAGE = 'avg' |
| 12 | ALGORITHM_MEDIAN = 'med' |
| 13 | ALGORITHM_MINIMUM = 'min' |
| 14 | ALGORITHM_25TH_PERCENTILE = '25th' |
| 15 | |
| bungeman@google.com | 85669f9 | 2011-06-17 13:58:14 +0000 | [diff] [blame] | 16 | class BenchDataPoint: |
| 17 | """A single data point produced by bench. |
| bensong@google.com | d3fd98f | 2012-12-18 20:06:10 +0000 | [diff] [blame] | 18 | |
| bungeman@google.com | 85669f9 | 2011-06-17 13:58:14 +0000 | [diff] [blame] | 19 | (str, str, str, float, {str:str})""" |
| 20 | def __init__(self, bench, config, time_type, time, settings): |
| 21 | self.bench = bench |
| 22 | self.config = config |
| 23 | self.time_type = time_type |
| 24 | self.time = time |
| 25 | self.settings = settings |
| bensong@google.com | d3fd98f | 2012-12-18 20:06:10 +0000 | [diff] [blame] | 26 | |
| bungeman@google.com | 85669f9 | 2011-06-17 13:58:14 +0000 | [diff] [blame] | 27 | def __repr__(self): |
| 28 | return "BenchDataPoint(%s, %s, %s, %s, %s)" % ( |
| 29 | str(self.bench), |
| 30 | str(self.config), |
| 31 | str(self.time_type), |
| 32 | str(self.time), |
| 33 | str(self.settings), |
| 34 | ) |
| bensong@google.com | d3fd98f | 2012-12-18 20:06:10 +0000 | [diff] [blame] | 35 | |
| bungeman@google.com | 85669f9 | 2011-06-17 13:58:14 +0000 | [diff] [blame] | 36 | class _ExtremeType(object): |
| 37 | """Instances of this class compare greater or less than other objects.""" |
| 38 | def __init__(self, cmpr, rep): |
| 39 | object.__init__(self) |
| 40 | self._cmpr = cmpr |
| 41 | self._rep = rep |
| bensong@google.com | d3fd98f | 2012-12-18 20:06:10 +0000 | [diff] [blame] | 42 | |
| bungeman@google.com | 85669f9 | 2011-06-17 13:58:14 +0000 | [diff] [blame] | 43 | def __cmp__(self, other): |
| 44 | if isinstance(other, self.__class__) and other._cmpr == self._cmpr: |
| 45 | return 0 |
| 46 | return self._cmpr |
| bensong@google.com | d3fd98f | 2012-12-18 20:06:10 +0000 | [diff] [blame] | 47 | |
| bungeman@google.com | 85669f9 | 2011-06-17 13:58:14 +0000 | [diff] [blame] | 48 | def __repr__(self): |
| 49 | return self._rep |
| 50 | |
| 51 | Max = _ExtremeType(1, "Max") |
| 52 | Min = _ExtremeType(-1, "Min") |
| 53 | |
| bensong@google.com | b6204b1 | 2012-08-16 20:49:28 +0000 | [diff] [blame] | 54 | class _ListAlgorithm(object): |
| 55 | """Algorithm for selecting the representation value from a given list. |
| bensong@google.com | d3fd98f | 2012-12-18 20:06:10 +0000 | [diff] [blame] | 56 | representation is one of the ALGORITHM_XXX representation types.""" |
| bensong@google.com | b6204b1 | 2012-08-16 20:49:28 +0000 | [diff] [blame] | 57 | def __init__(self, data, representation=None): |
| 58 | if not representation: |
| bensong@google.com | d3fd98f | 2012-12-18 20:06:10 +0000 | [diff] [blame] | 59 | representation = ALGORITHM_AVERAGE # default algorithm |
| bensong@google.com | b6204b1 | 2012-08-16 20:49:28 +0000 | [diff] [blame] | 60 | self._data = data |
| 61 | self._len = len(data) |
| bensong@google.com | d3fd98f | 2012-12-18 20:06:10 +0000 | [diff] [blame] | 62 | if representation == ALGORITHM_AVERAGE: |
| bensong@google.com | b6204b1 | 2012-08-16 20:49:28 +0000 | [diff] [blame] | 63 | self._rep = sum(self._data) / self._len |
| 64 | else: |
| 65 | self._data.sort() |
| bensong@google.com | d3fd98f | 2012-12-18 20:06:10 +0000 | [diff] [blame] | 66 | if representation == ALGORITHM_MINIMUM: |
| bensong@google.com | b6204b1 | 2012-08-16 20:49:28 +0000 | [diff] [blame] | 67 | self._rep = self._data[0] |
| 68 | else: |
| 69 | # for percentiles, we use the value below which x% of values are |
| 70 | # found, which allows for better detection of quantum behaviors. |
| bensong@google.com | d3fd98f | 2012-12-18 20:06:10 +0000 | [diff] [blame] | 71 | if representation == ALGORITHM_MEDIAN: |
| bensong@google.com | b6204b1 | 2012-08-16 20:49:28 +0000 | [diff] [blame] | 72 | x = int(round(0.5 * self._len + 0.5)) |
| bensong@google.com | d3fd98f | 2012-12-18 20:06:10 +0000 | [diff] [blame] | 73 | elif representation == ALGORITHM_25TH_PERCENTILE: |
| bensong@google.com | b6204b1 | 2012-08-16 20:49:28 +0000 | [diff] [blame] | 74 | x = int(round(0.25 * self._len + 0.5)) |
| 75 | else: |
| 76 | raise Exception("invalid representation algorithm %s!" % |
| 77 | representation) |
| 78 | self._rep = self._data[x - 1] |
| 79 | |
| 80 | def compute(self): |
| 81 | return self._rep |
| 82 | |
| bensong@google.com | d3fd98f | 2012-12-18 20:06:10 +0000 | [diff] [blame] | 83 | def _ParseAndStoreTimes(config_re, time_re, line, bench, dic, |
| 84 | representation=None): |
| 85 | """Parses given bench time line with regex and adds data to the given dic. |
| 86 | config_re: regular expression for parsing the config line. |
| 87 | time_re: regular expression for parsing bench time. |
| 88 | line: input string line to parse. |
| 89 | bench: name of bench for the time values. |
| 90 | dic: dictionary to store bench values. See bench_dic in parse() below. |
| 91 | representation: should match one of the ALGORITHM_XXX types.""" |
| 92 | |
| 93 | for config in re.finditer(config_re, line): |
| 94 | current_config = config.group(1) |
| 95 | if config_re.startswith(' tile_'): # per-tile bench, add name prefix |
| 96 | current_config = 'tile_' + current_config |
| 97 | times = config.group(2) |
| 98 | for new_time in re.finditer(time_re, times): |
| 99 | current_time_type = new_time.group(1) |
| 100 | iters = [float(i) for i in |
| 101 | new_time.group(2).strip().split(',')] |
| 102 | dic.setdefault(bench, {}).setdefault(current_config, {}).setdefault( |
| 103 | current_time_type, []).append(_ListAlgorithm( |
| 104 | iters, representation).compute()) |
| 105 | |
| 106 | def parse(settings, lines, representation=None): |
| bungeman@google.com | 85669f9 | 2011-06-17 13:58:14 +0000 | [diff] [blame] | 107 | """Parses bench output into a useful data structure. |
| bensong@google.com | d3fd98f | 2012-12-18 20:06:10 +0000 | [diff] [blame] | 108 | |
| bensong@google.com | 8734816 | 2012-08-15 17:31:46 +0000 | [diff] [blame] | 109 | ({str:str}, __iter__ -> str) -> [BenchDataPoint] |
| bensong@google.com | d3fd98f | 2012-12-18 20:06:10 +0000 | [diff] [blame] | 110 | representation is one of the ALGORITHM_XXX types.""" |
| 111 | |
| bungeman@google.com | 85669f9 | 2011-06-17 13:58:14 +0000 | [diff] [blame] | 112 | benches = [] |
| 113 | current_bench = None |
| bensong@google.com | d3fd98f | 2012-12-18 20:06:10 +0000 | [diff] [blame] | 114 | bench_dic = {} # [bench][config][time_type] -> [list of bench values] |
| bungeman@google.com | 85669f9 | 2011-06-17 13:58:14 +0000 | [diff] [blame] | 115 | setting_re = '([^\s=]+)(?:=(\S+))?' |
| 116 | settings_re = 'skia bench:((?:\s+' + setting_re + ')*)' |
| 117 | bench_re = 'running bench (?:\[\d+ \d+\] )?\s*(\S+)' |
| bensong@google.com | af3d79a | 2012-07-02 20:48:51 +0000 | [diff] [blame] | 118 | time_re = '(?:(\w*)msecs = )?\s*((?:\d+\.\d+)(?:,\d+\.\d+)*)' |
| bensong@google.com | ff388f8 | 2013-02-06 20:59:14 +0000 | [diff] [blame] | 119 | # non-per-tile benches have configs that don't end with ']' or '>' |
| 120 | config_re = '(\S+[^\]>]): ((?:' + time_re + '\s+)+)' |
| bensong@google.com | 7fa58ac | 2013-01-23 20:51:17 +0000 | [diff] [blame] | 121 | # per-tile bench lines are in the following format. Note that there are |
| 122 | # non-averaged bench numbers in separate lines, which we ignore now due to |
| 123 | # their inaccuracy. |
| 124 | tile_re = (' tile_(\S+): tile \[\d+,\d+\] out of \[\d+,\d+\] <averaged>:' |
| 125 | ' ((?:' + time_re + '\s+)+)') |
| bensong@google.com | d3fd98f | 2012-12-18 20:06:10 +0000 | [diff] [blame] | 126 | |
| bungeman@google.com | 85669f9 | 2011-06-17 13:58:14 +0000 | [diff] [blame] | 127 | for line in lines: |
| bensong@google.com | d3fd98f | 2012-12-18 20:06:10 +0000 | [diff] [blame] | 128 | |
| 129 | # see if this line is a settings line |
| bungeman@google.com | 85669f9 | 2011-06-17 13:58:14 +0000 | [diff] [blame] | 130 | settingsMatch = re.search(settings_re, line) |
| 131 | if (settingsMatch): |
| 132 | settings = dict(settings) |
| 133 | for settingMatch in re.finditer(setting_re, settingsMatch.group(1)): |
| 134 | if (settingMatch.group(2)): |
| 135 | settings[settingMatch.group(1)] = settingMatch.group(2) |
| 136 | else: |
| 137 | settings[settingMatch.group(1)] = True |
| bensong@google.com | d3fd98f | 2012-12-18 20:06:10 +0000 | [diff] [blame] | 138 | |
| 139 | # see if this line starts a new bench |
| bungeman@google.com | 85669f9 | 2011-06-17 13:58:14 +0000 | [diff] [blame] | 140 | new_bench = re.search(bench_re, line) |
| 141 | if new_bench: |
| 142 | current_bench = new_bench.group(1) |
| bensong@google.com | d3fd98f | 2012-12-18 20:06:10 +0000 | [diff] [blame] | 143 | |
| 144 | # add configs on this line to the bench_dic |
| bungeman@google.com | 85669f9 | 2011-06-17 13:58:14 +0000 | [diff] [blame] | 145 | if current_bench: |
| bensong@google.com | d3fd98f | 2012-12-18 20:06:10 +0000 | [diff] [blame] | 146 | for regex in [config_re, tile_re]: |
| 147 | _ParseAndStoreTimes(regex, time_re, line, current_bench, |
| 148 | bench_dic, representation) |
| 149 | |
| 150 | # append benches to list, use the total time as final bench value. |
| 151 | for bench in bench_dic: |
| 152 | for config in bench_dic[bench]: |
| 153 | for time_type in bench_dic[bench][config]: |
| 154 | benches.append(BenchDataPoint( |
| 155 | bench, |
| 156 | config, |
| 157 | time_type, |
| 158 | sum(bench_dic[bench][config][time_type]), |
| 159 | settings)) |
| 160 | |
| bungeman@google.com | 85669f9 | 2011-06-17 13:58:14 +0000 | [diff] [blame] | 161 | return benches |
| bensong@google.com | d3fd98f | 2012-12-18 20:06:10 +0000 | [diff] [blame] | 162 | |
| bungeman@google.com | 85669f9 | 2011-06-17 13:58:14 +0000 | [diff] [blame] | 163 | class LinearRegression: |
| 164 | """Linear regression data based on a set of data points. |
| bensong@google.com | d3fd98f | 2012-12-18 20:06:10 +0000 | [diff] [blame] | 165 | |
| bungeman@google.com | 85669f9 | 2011-06-17 13:58:14 +0000 | [diff] [blame] | 166 | ([(Number,Number)]) |
| 167 | There must be at least two points for this to make sense.""" |
| 168 | def __init__(self, points): |
| 169 | n = len(points) |
| 170 | max_x = Min |
| 171 | min_x = Max |
| bensong@google.com | d3fd98f | 2012-12-18 20:06:10 +0000 | [diff] [blame] | 172 | |
| bungeman@google.com | 85669f9 | 2011-06-17 13:58:14 +0000 | [diff] [blame] | 173 | Sx = 0.0 |
| 174 | Sy = 0.0 |
| 175 | Sxx = 0.0 |
| 176 | Sxy = 0.0 |
| 177 | Syy = 0.0 |
| 178 | for point in points: |
| 179 | x = point[0] |
| 180 | y = point[1] |
| 181 | max_x = max(max_x, x) |
| 182 | min_x = min(min_x, x) |
| bensong@google.com | d3fd98f | 2012-12-18 20:06:10 +0000 | [diff] [blame] | 183 | |
| bungeman@google.com | 85669f9 | 2011-06-17 13:58:14 +0000 | [diff] [blame] | 184 | Sx += x |
| 185 | Sy += y |
| 186 | Sxx += x*x |
| 187 | Sxy += x*y |
| 188 | Syy += y*y |
| bensong@google.com | d3fd98f | 2012-12-18 20:06:10 +0000 | [diff] [blame] | 189 | |
| senorblanco@chromium.org | c5e1ed8 | 2012-09-20 19:05:33 +0000 | [diff] [blame] | 190 | denom = n*Sxx - Sx*Sx |
| 191 | if (denom != 0.0): |
| 192 | B = (n*Sxy - Sx*Sy) / denom |
| 193 | else: |
| 194 | B = 0.0 |
| bungeman@google.com | 85669f9 | 2011-06-17 13:58:14 +0000 | [diff] [blame] | 195 | a = (1.0/n)*(Sy - B*Sx) |
| bensong@google.com | d3fd98f | 2012-12-18 20:06:10 +0000 | [diff] [blame] | 196 | |
| bungeman@google.com | 85669f9 | 2011-06-17 13:58:14 +0000 | [diff] [blame] | 197 | se2 = 0 |
| 198 | sB2 = 0 |
| 199 | sa2 = 0 |
| senorblanco@chromium.org | c5e1ed8 | 2012-09-20 19:05:33 +0000 | [diff] [blame] | 200 | if (n >= 3 and denom != 0.0): |
| 201 | se2 = (1.0/(n*(n-2)) * (n*Syy - Sy*Sy - B*B*denom)) |
| 202 | sB2 = (n*se2) / denom |
| bungeman@google.com | 85669f9 | 2011-06-17 13:58:14 +0000 | [diff] [blame] | 203 | sa2 = sB2 * (1.0/n) * Sxx |
| bensong@google.com | d3fd98f | 2012-12-18 20:06:10 +0000 | [diff] [blame] | 204 | |
| 205 | |
| bungeman@google.com | 85669f9 | 2011-06-17 13:58:14 +0000 | [diff] [blame] | 206 | self.slope = B |
| 207 | self.intercept = a |
| 208 | self.serror = math.sqrt(max(0, se2)) |
| 209 | self.serror_slope = math.sqrt(max(0, sB2)) |
| 210 | self.serror_intercept = math.sqrt(max(0, sa2)) |
| 211 | self.max_x = max_x |
| 212 | self.min_x = min_x |
| bensong@google.com | d3fd98f | 2012-12-18 20:06:10 +0000 | [diff] [blame] | 213 | |
| bungeman@google.com | 85669f9 | 2011-06-17 13:58:14 +0000 | [diff] [blame] | 214 | def __repr__(self): |
| 215 | return "LinearRegression(%s, %s, %s, %s, %s)" % ( |
| 216 | str(self.slope), |
| 217 | str(self.intercept), |
| 218 | str(self.serror), |
| 219 | str(self.serror_slope), |
| 220 | str(self.serror_intercept), |
| 221 | ) |
| bensong@google.com | d3fd98f | 2012-12-18 20:06:10 +0000 | [diff] [blame] | 222 | |
| bungeman@google.com | 85669f9 | 2011-06-17 13:58:14 +0000 | [diff] [blame] | 223 | def find_min_slope(self): |
| 224 | """Finds the minimal slope given one standard deviation.""" |
| 225 | slope = self.slope |
| 226 | intercept = self.intercept |
| 227 | error = self.serror |
| 228 | regr_start = self.min_x |
| 229 | regr_end = self.max_x |
| 230 | regr_width = regr_end - regr_start |
| bensong@google.com | d3fd98f | 2012-12-18 20:06:10 +0000 | [diff] [blame] | 231 | |
| bungeman@google.com | 85669f9 | 2011-06-17 13:58:14 +0000 | [diff] [blame] | 232 | if slope < 0: |
| 233 | lower_left_y = slope*regr_start + intercept - error |
| 234 | upper_right_y = slope*regr_end + intercept + error |
| 235 | return min(0, (upper_right_y - lower_left_y) / regr_width) |
| bensong@google.com | d3fd98f | 2012-12-18 20:06:10 +0000 | [diff] [blame] | 236 | |
| bungeman@google.com | 85669f9 | 2011-06-17 13:58:14 +0000 | [diff] [blame] | 237 | elif slope > 0: |
| 238 | upper_left_y = slope*regr_start + intercept + error |
| 239 | lower_right_y = slope*regr_end + intercept - error |
| 240 | return max(0, (lower_right_y - upper_left_y) / regr_width) |
| bensong@google.com | d3fd98f | 2012-12-18 20:06:10 +0000 | [diff] [blame] | 241 | |
| bungeman@google.com | 85669f9 | 2011-06-17 13:58:14 +0000 | [diff] [blame] | 242 | return 0 |
| epoger@google.com | c71174d | 2011-08-08 17:19:23 +0000 | [diff] [blame] | 243 | |
| 244 | def CreateRevisionLink(revision_number): |
| 245 | """Returns HTML displaying the given revision number and linking to |
| 246 | that revision's change page at code.google.com, e.g. |
| 247 | http://code.google.com/p/skia/source/detail?r=2056 |
| 248 | """ |
| 249 | return '<a href="http://code.google.com/p/skia/source/detail?r=%s">%s</a>'%( |
| 250 | revision_number, revision_number) |
| senorblanco@chromium.org | c5e1ed8 | 2012-09-20 19:05:33 +0000 | [diff] [blame] | 251 | |
| 252 | def main(): |
| 253 | foo = [[0.0, 0.0], [0.0, 1.0], [0.0, 2.0], [0.0, 3.0]] |
| 254 | LinearRegression(foo) |
| 255 | |
| 256 | if __name__ == "__main__": |
| 257 | main() |