bungeman@google.com | 85669f9 | 2011-06-17 13:58:14 +0000 | [diff] [blame] | 1 | ''' |
| 2 | Created on May 19, 2011 |
| 3 | |
| 4 | @author: bungeman |
| 5 | ''' |
| 6 | |
commit-bot@chromium.org | b1bcb21 | 2014-03-17 21:16:29 +0000 | [diff] [blame] | 7 | import os |
bungeman@google.com | 85669f9 | 2011-06-17 13:58:14 +0000 | [diff] [blame] | 8 | import re |
| 9 | import math |
| 10 | |
bensong@google.com | d3fd98f | 2012-12-18 20:06:10 +0000 | [diff] [blame] | 11 | # bench representation algorithm constant names |
| 12 | ALGORITHM_AVERAGE = 'avg' |
| 13 | ALGORITHM_MEDIAN = 'med' |
| 14 | ALGORITHM_MINIMUM = 'min' |
| 15 | ALGORITHM_25TH_PERCENTILE = '25th' |
| 16 | |
bensong@google.com | 967b258 | 2013-12-05 01:31:56 +0000 | [diff] [blame] | 17 | # Regular expressions used throughout. |
epoger@google.com | ad91d92 | 2013-02-14 18:35:17 +0000 | [diff] [blame] | 18 | PER_SETTING_RE = '([^\s=]+)(?:=(\S+))?' |
| 19 | SETTINGS_RE = 'skia bench:((?:\s+' + PER_SETTING_RE + ')*)' |
| 20 | BENCH_RE = 'running bench (?:\[\d+ \d+\] )?\s*(\S+)' |
epoger@google.com | e657a25 | 2013-08-13 15:12:33 +0000 | [diff] [blame] | 21 | TIME_RE = '(?:(\w*)msecs = )?\s*((?:\d+\.\d+)(?:,\s*\d+\.\d+)*)' |
epoger@google.com | ad91d92 | 2013-02-14 18:35:17 +0000 | [diff] [blame] | 22 | # non-per-tile benches have configs that don't end with ']' or '>' |
bensong@google.com | 967b258 | 2013-12-05 01:31:56 +0000 | [diff] [blame] | 23 | CONFIG_RE = '(\S+[^\]>]):\s+((?:' + TIME_RE + '\s+)+)' |
epoger@google.com | ad91d92 | 2013-02-14 18:35:17 +0000 | [diff] [blame] | 24 | # per-tile bench lines are in the following format. Note that there are |
| 25 | # non-averaged bench numbers in separate lines, which we ignore now due to |
| 26 | # their inaccuracy. |
| 27 | TILE_RE = (' tile_(\S+): tile \[\d+,\d+\] out of \[\d+,\d+\] <averaged>:' |
| 28 | ' ((?:' + TIME_RE + '\s+)+)') |
| 29 | # for extracting tile layout |
| 30 | TILE_LAYOUT_RE = ' out of \[(\d+),(\d+)\] <averaged>: ' |
| 31 | |
| 32 | PER_SETTING_RE_COMPILED = re.compile(PER_SETTING_RE) |
| 33 | SETTINGS_RE_COMPILED = re.compile(SETTINGS_RE) |
| 34 | BENCH_RE_COMPILED = re.compile(BENCH_RE) |
| 35 | TIME_RE_COMPILED = re.compile(TIME_RE) |
| 36 | CONFIG_RE_COMPILED = re.compile(CONFIG_RE) |
| 37 | TILE_RE_COMPILED = re.compile(TILE_RE) |
| 38 | TILE_LAYOUT_RE_COMPILED = re.compile(TILE_LAYOUT_RE) |
| 39 | |
bungeman@google.com | 85669f9 | 2011-06-17 13:58:14 +0000 | [diff] [blame] | 40 | class BenchDataPoint: |
| 41 | """A single data point produced by bench. |
commit-bot@chromium.org | 758bc7a | 2014-03-12 16:23:33 +0000 | [diff] [blame] | 42 | """ |
bensong@google.com | ba98f95 | 2013-02-13 23:22:29 +0000 | [diff] [blame] | 43 | def __init__(self, bench, config, time_type, time, settings, |
commit-bot@chromium.org | 758bc7a | 2014-03-12 16:23:33 +0000 | [diff] [blame] | 44 | tile_layout='', per_tile_values=[], per_iter_time=[]): |
| 45 | # string name of the benchmark to measure |
bungeman@google.com | 85669f9 | 2011-06-17 13:58:14 +0000 | [diff] [blame] | 46 | self.bench = bench |
commit-bot@chromium.org | 758bc7a | 2014-03-12 16:23:33 +0000 | [diff] [blame] | 47 | # string name of the configurations to run |
bungeman@google.com | 85669f9 | 2011-06-17 13:58:14 +0000 | [diff] [blame] | 48 | self.config = config |
commit-bot@chromium.org | 758bc7a | 2014-03-12 16:23:33 +0000 | [diff] [blame] | 49 | # type of the timer in string: '' (walltime), 'c' (cpu) or 'g' (gpu) |
bungeman@google.com | 85669f9 | 2011-06-17 13:58:14 +0000 | [diff] [blame] | 50 | self.time_type = time_type |
commit-bot@chromium.org | 758bc7a | 2014-03-12 16:23:33 +0000 | [diff] [blame] | 51 | # float number of the bench time value |
bungeman@google.com | 85669f9 | 2011-06-17 13:58:14 +0000 | [diff] [blame] | 52 | self.time = time |
commit-bot@chromium.org | 758bc7a | 2014-03-12 16:23:33 +0000 | [diff] [blame] | 53 | # dictionary of the run settings |
bungeman@google.com | 85669f9 | 2011-06-17 13:58:14 +0000 | [diff] [blame] | 54 | self.settings = settings |
commit-bot@chromium.org | 758bc7a | 2014-03-12 16:23:33 +0000 | [diff] [blame] | 55 | # how tiles cover the whole picture: '5x3' means 5 columns and 3 rows |
bensong@google.com | ba98f95 | 2013-02-13 23:22:29 +0000 | [diff] [blame] | 56 | self.tile_layout = tile_layout |
commit-bot@chromium.org | 758bc7a | 2014-03-12 16:23:33 +0000 | [diff] [blame] | 57 | # list of float for per_tile bench values, if applicable |
bensong@google.com | ba98f95 | 2013-02-13 23:22:29 +0000 | [diff] [blame] | 58 | self.per_tile_values = per_tile_values |
commit-bot@chromium.org | 758bc7a | 2014-03-12 16:23:33 +0000 | [diff] [blame] | 59 | # list of float for per-iteration bench time, if applicable |
| 60 | self.per_iter_time = per_iter_time |
bensong@google.com | d3fd98f | 2012-12-18 20:06:10 +0000 | [diff] [blame] | 61 | |
bungeman@google.com | 85669f9 | 2011-06-17 13:58:14 +0000 | [diff] [blame] | 62 | def __repr__(self): |
| 63 | return "BenchDataPoint(%s, %s, %s, %s, %s)" % ( |
| 64 | str(self.bench), |
| 65 | str(self.config), |
| 66 | str(self.time_type), |
| 67 | str(self.time), |
| 68 | str(self.settings), |
| 69 | ) |
bensong@google.com | d3fd98f | 2012-12-18 20:06:10 +0000 | [diff] [blame] | 70 | |
bungeman@google.com | 85669f9 | 2011-06-17 13:58:14 +0000 | [diff] [blame] | 71 | class _ExtremeType(object): |
| 72 | """Instances of this class compare greater or less than other objects.""" |
| 73 | def __init__(self, cmpr, rep): |
| 74 | object.__init__(self) |
| 75 | self._cmpr = cmpr |
| 76 | self._rep = rep |
bensong@google.com | d3fd98f | 2012-12-18 20:06:10 +0000 | [diff] [blame] | 77 | |
bungeman@google.com | 85669f9 | 2011-06-17 13:58:14 +0000 | [diff] [blame] | 78 | def __cmp__(self, other): |
| 79 | if isinstance(other, self.__class__) and other._cmpr == self._cmpr: |
| 80 | return 0 |
| 81 | return self._cmpr |
bensong@google.com | d3fd98f | 2012-12-18 20:06:10 +0000 | [diff] [blame] | 82 | |
bungeman@google.com | 85669f9 | 2011-06-17 13:58:14 +0000 | [diff] [blame] | 83 | def __repr__(self): |
| 84 | return self._rep |
| 85 | |
| 86 | Max = _ExtremeType(1, "Max") |
| 87 | Min = _ExtremeType(-1, "Min") |
| 88 | |
bensong@google.com | b6204b1 | 2012-08-16 20:49:28 +0000 | [diff] [blame] | 89 | class _ListAlgorithm(object): |
| 90 | """Algorithm for selecting the representation value from a given list. |
bensong@google.com | d3fd98f | 2012-12-18 20:06:10 +0000 | [diff] [blame] | 91 | representation is one of the ALGORITHM_XXX representation types.""" |
bensong@google.com | b6204b1 | 2012-08-16 20:49:28 +0000 | [diff] [blame] | 92 | def __init__(self, data, representation=None): |
| 93 | if not representation: |
bensong@google.com | d3fd98f | 2012-12-18 20:06:10 +0000 | [diff] [blame] | 94 | representation = ALGORITHM_AVERAGE # default algorithm |
bensong@google.com | b6204b1 | 2012-08-16 20:49:28 +0000 | [diff] [blame] | 95 | self._data = data |
| 96 | self._len = len(data) |
bensong@google.com | d3fd98f | 2012-12-18 20:06:10 +0000 | [diff] [blame] | 97 | if representation == ALGORITHM_AVERAGE: |
bensong@google.com | b6204b1 | 2012-08-16 20:49:28 +0000 | [diff] [blame] | 98 | self._rep = sum(self._data) / self._len |
| 99 | else: |
| 100 | self._data.sort() |
bensong@google.com | d3fd98f | 2012-12-18 20:06:10 +0000 | [diff] [blame] | 101 | if representation == ALGORITHM_MINIMUM: |
bensong@google.com | b6204b1 | 2012-08-16 20:49:28 +0000 | [diff] [blame] | 102 | self._rep = self._data[0] |
| 103 | else: |
| 104 | # for percentiles, we use the value below which x% of values are |
| 105 | # found, which allows for better detection of quantum behaviors. |
bensong@google.com | d3fd98f | 2012-12-18 20:06:10 +0000 | [diff] [blame] | 106 | if representation == ALGORITHM_MEDIAN: |
bensong@google.com | b6204b1 | 2012-08-16 20:49:28 +0000 | [diff] [blame] | 107 | x = int(round(0.5 * self._len + 0.5)) |
bensong@google.com | d3fd98f | 2012-12-18 20:06:10 +0000 | [diff] [blame] | 108 | elif representation == ALGORITHM_25TH_PERCENTILE: |
bensong@google.com | b6204b1 | 2012-08-16 20:49:28 +0000 | [diff] [blame] | 109 | x = int(round(0.25 * self._len + 0.5)) |
| 110 | else: |
| 111 | raise Exception("invalid representation algorithm %s!" % |
| 112 | representation) |
| 113 | self._rep = self._data[x - 1] |
| 114 | |
| 115 | def compute(self): |
| 116 | return self._rep |
| 117 | |
epoger@google.com | ad91d92 | 2013-02-14 18:35:17 +0000 | [diff] [blame] | 118 | def _ParseAndStoreTimes(config_re_compiled, is_per_tile, line, bench, |
commit-bot@chromium.org | 758bc7a | 2014-03-12 16:23:33 +0000 | [diff] [blame] | 119 | value_dic, layout_dic): |
bensong@google.com | ba98f95 | 2013-02-13 23:22:29 +0000 | [diff] [blame] | 120 | """Parses given bench time line with regex and adds data to value_dic. |
epoger@google.com | ad91d92 | 2013-02-14 18:35:17 +0000 | [diff] [blame] | 121 | |
| 122 | config_re_compiled: precompiled regular expression for parsing the config |
| 123 | line. |
| 124 | is_per_tile: boolean indicating whether this is a per-tile bench. |
| 125 | If so, we add tile layout into layout_dic as well. |
bensong@google.com | d3fd98f | 2012-12-18 20:06:10 +0000 | [diff] [blame] | 126 | line: input string line to parse. |
| 127 | bench: name of bench for the time values. |
bensong@google.com | ba98f95 | 2013-02-13 23:22:29 +0000 | [diff] [blame] | 128 | value_dic: dictionary to store bench values. See bench_dic in parse() below. |
| 129 | layout_dic: dictionary to store tile layouts. See parse() for descriptions. |
commit-bot@chromium.org | 758bc7a | 2014-03-12 16:23:33 +0000 | [diff] [blame] | 130 | """ |
bensong@google.com | d3fd98f | 2012-12-18 20:06:10 +0000 | [diff] [blame] | 131 | |
epoger@google.com | ad91d92 | 2013-02-14 18:35:17 +0000 | [diff] [blame] | 132 | for config in config_re_compiled.finditer(line): |
bensong@google.com | d3fd98f | 2012-12-18 20:06:10 +0000 | [diff] [blame] | 133 | current_config = config.group(1) |
bensong@google.com | ba98f95 | 2013-02-13 23:22:29 +0000 | [diff] [blame] | 134 | tile_layout = '' |
epoger@google.com | ad91d92 | 2013-02-14 18:35:17 +0000 | [diff] [blame] | 135 | if is_per_tile: # per-tile bench, add name prefix |
bensong@google.com | d3fd98f | 2012-12-18 20:06:10 +0000 | [diff] [blame] | 136 | current_config = 'tile_' + current_config |
epoger@google.com | ad91d92 | 2013-02-14 18:35:17 +0000 | [diff] [blame] | 137 | layouts = TILE_LAYOUT_RE_COMPILED.search(line) |
bensong@google.com | ba98f95 | 2013-02-13 23:22:29 +0000 | [diff] [blame] | 138 | if layouts and len(layouts.groups()) == 2: |
| 139 | tile_layout = '%sx%s' % layouts.groups() |
bensong@google.com | d3fd98f | 2012-12-18 20:06:10 +0000 | [diff] [blame] | 140 | times = config.group(2) |
epoger@google.com | ad91d92 | 2013-02-14 18:35:17 +0000 | [diff] [blame] | 141 | for new_time in TIME_RE_COMPILED.finditer(times): |
bensong@google.com | d3fd98f | 2012-12-18 20:06:10 +0000 | [diff] [blame] | 142 | current_time_type = new_time.group(1) |
| 143 | iters = [float(i) for i in |
| 144 | new_time.group(2).strip().split(',')] |
bensong@google.com | ba98f95 | 2013-02-13 23:22:29 +0000 | [diff] [blame] | 145 | value_dic.setdefault(bench, {}).setdefault( |
| 146 | current_config, {}).setdefault(current_time_type, []).append( |
commit-bot@chromium.org | 758bc7a | 2014-03-12 16:23:33 +0000 | [diff] [blame] | 147 | iters) |
bensong@google.com | ba98f95 | 2013-02-13 23:22:29 +0000 | [diff] [blame] | 148 | layout_dic.setdefault(bench, {}).setdefault( |
| 149 | current_config, {}).setdefault(current_time_type, tile_layout) |
bensong@google.com | d3fd98f | 2012-12-18 20:06:10 +0000 | [diff] [blame] | 150 | |
commit-bot@chromium.org | b1bcb21 | 2014-03-17 21:16:29 +0000 | [diff] [blame] | 151 | def parse_skp_bench_data(directory, revision, rep, default_settings=None): |
| 152 | """Parses all the skp bench data in the given directory. |
| 153 | |
| 154 | Args: |
| 155 | directory: string of path to input data directory. |
| 156 | revision: git hash revision that matches the data to process. |
| 157 | rep: bench representation algorithm, see bench_util.py. |
| 158 | default_settings: dictionary of other run settings. See writer.option() in |
| 159 | bench/benchmain.cpp. |
| 160 | |
| 161 | Returns: |
| 162 | A list of BenchDataPoint objects. |
| 163 | """ |
| 164 | revision_data_points = [] |
| 165 | file_list = os.listdir(directory) |
| 166 | file_list.sort() |
| 167 | for bench_file in file_list: |
| 168 | scalar_type = None |
| 169 | # Scalar type, if any, is in the bench filename after 'scalar_'. |
| 170 | if (bench_file.startswith('bench_' + revision + '_data_')): |
| 171 | if bench_file.find('scalar_') > 0: |
| 172 | components = bench_file.split('_') |
| 173 | scalar_type = components[components.index('scalar') + 1] |
| 174 | else: # Skips non skp bench files. |
| 175 | continue |
| 176 | |
| 177 | with open('/'.join([directory, bench_file]), 'r') as file_handle: |
| 178 | settings = dict(default_settings or {}) |
| 179 | settings['scalar'] = scalar_type |
| 180 | revision_data_points.extend(parse(settings, file_handle, rep)) |
| 181 | |
| 182 | return revision_data_points |
| 183 | |
bensong@google.com | 967b258 | 2013-12-05 01:31:56 +0000 | [diff] [blame] | 184 | # TODO(bensong): switch to reading JSON output when available. This way we don't |
| 185 | # need the RE complexities. |
bensong@google.com | d3fd98f | 2012-12-18 20:06:10 +0000 | [diff] [blame] | 186 | def parse(settings, lines, representation=None): |
bungeman@google.com | 85669f9 | 2011-06-17 13:58:14 +0000 | [diff] [blame] | 187 | """Parses bench output into a useful data structure. |
bensong@google.com | d3fd98f | 2012-12-18 20:06:10 +0000 | [diff] [blame] | 188 | |
bensong@google.com | 8734816 | 2012-08-15 17:31:46 +0000 | [diff] [blame] | 189 | ({str:str}, __iter__ -> str) -> [BenchDataPoint] |
bensong@google.com | d3fd98f | 2012-12-18 20:06:10 +0000 | [diff] [blame] | 190 | representation is one of the ALGORITHM_XXX types.""" |
| 191 | |
bungeman@google.com | 85669f9 | 2011-06-17 13:58:14 +0000 | [diff] [blame] | 192 | benches = [] |
| 193 | current_bench = None |
commit-bot@chromium.org | 758bc7a | 2014-03-12 16:23:33 +0000 | [diff] [blame] | 194 | # [bench][config][time_type] -> [[per-iter values]] where per-tile config |
| 195 | # has per-iter value list for each tile [[<tile1_iter1>,<tile1_iter2>,...], |
| 196 | # [<tile2_iter1>,<tile2_iter2>,...],...], while non-per-tile config only |
| 197 | # contains one list of iterations [[iter1, iter2, ...]]. |
| 198 | bench_dic = {} |
bensong@google.com | ba98f95 | 2013-02-13 23:22:29 +0000 | [diff] [blame] | 199 | # [bench][config][time_type] -> tile_layout |
| 200 | layout_dic = {} |
bensong@google.com | d3fd98f | 2012-12-18 20:06:10 +0000 | [diff] [blame] | 201 | |
bungeman@google.com | 85669f9 | 2011-06-17 13:58:14 +0000 | [diff] [blame] | 202 | for line in lines: |
bensong@google.com | d3fd98f | 2012-12-18 20:06:10 +0000 | [diff] [blame] | 203 | |
| 204 | # see if this line is a settings line |
epoger@google.com | ad91d92 | 2013-02-14 18:35:17 +0000 | [diff] [blame] | 205 | settingsMatch = SETTINGS_RE_COMPILED.search(line) |
bungeman@google.com | 85669f9 | 2011-06-17 13:58:14 +0000 | [diff] [blame] | 206 | if (settingsMatch): |
| 207 | settings = dict(settings) |
epoger@google.com | ad91d92 | 2013-02-14 18:35:17 +0000 | [diff] [blame] | 208 | for settingMatch in PER_SETTING_RE_COMPILED.finditer(settingsMatch.group(1)): |
bungeman@google.com | 85669f9 | 2011-06-17 13:58:14 +0000 | [diff] [blame] | 209 | if (settingMatch.group(2)): |
| 210 | settings[settingMatch.group(1)] = settingMatch.group(2) |
| 211 | else: |
| 212 | settings[settingMatch.group(1)] = True |
bensong@google.com | d3fd98f | 2012-12-18 20:06:10 +0000 | [diff] [blame] | 213 | |
| 214 | # see if this line starts a new bench |
epoger@google.com | ad91d92 | 2013-02-14 18:35:17 +0000 | [diff] [blame] | 215 | new_bench = BENCH_RE_COMPILED.search(line) |
bungeman@google.com | 85669f9 | 2011-06-17 13:58:14 +0000 | [diff] [blame] | 216 | if new_bench: |
| 217 | current_bench = new_bench.group(1) |
bensong@google.com | d3fd98f | 2012-12-18 20:06:10 +0000 | [diff] [blame] | 218 | |
| 219 | # add configs on this line to the bench_dic |
bungeman@google.com | 85669f9 | 2011-06-17 13:58:14 +0000 | [diff] [blame] | 220 | if current_bench: |
epoger@google.com | edb711b | 2013-02-17 08:59:56 +0000 | [diff] [blame] | 221 | if line.startswith(' tile_') : |
| 222 | _ParseAndStoreTimes(TILE_RE_COMPILED, True, line, current_bench, |
commit-bot@chromium.org | 758bc7a | 2014-03-12 16:23:33 +0000 | [diff] [blame] | 223 | bench_dic, layout_dic) |
epoger@google.com | edb711b | 2013-02-17 08:59:56 +0000 | [diff] [blame] | 224 | else: |
| 225 | _ParseAndStoreTimes(CONFIG_RE_COMPILED, False, line, |
commit-bot@chromium.org | 758bc7a | 2014-03-12 16:23:33 +0000 | [diff] [blame] | 226 | current_bench, bench_dic, layout_dic) |
bensong@google.com | d3fd98f | 2012-12-18 20:06:10 +0000 | [diff] [blame] | 227 | |
commit-bot@chromium.org | 758bc7a | 2014-03-12 16:23:33 +0000 | [diff] [blame] | 228 | # append benches to list |
bensong@google.com | d3fd98f | 2012-12-18 20:06:10 +0000 | [diff] [blame] | 229 | for bench in bench_dic: |
| 230 | for config in bench_dic[bench]: |
| 231 | for time_type in bench_dic[bench][config]: |
bensong@google.com | ba98f95 | 2013-02-13 23:22:29 +0000 | [diff] [blame] | 232 | tile_layout = '' |
commit-bot@chromium.org | 758bc7a | 2014-03-12 16:23:33 +0000 | [diff] [blame] | 233 | per_tile_values = [] # empty for non-per-tile configs |
| 234 | per_iter_time = [] # empty for per-tile configs |
| 235 | bench_summary = None # a single final bench value |
bensong@google.com | ba98f95 | 2013-02-13 23:22:29 +0000 | [diff] [blame] | 236 | if len(bench_dic[bench][config][time_type]) > 1: |
commit-bot@chromium.org | 758bc7a | 2014-03-12 16:23:33 +0000 | [diff] [blame] | 237 | # per-tile config; compute representation for each tile |
| 238 | per_tile_values = [ |
| 239 | _ListAlgorithm(iters, representation).compute() |
| 240 | for iters in bench_dic[bench][config][time_type]] |
| 241 | # use sum of each tile representation for total bench value |
| 242 | bench_summary = sum(per_tile_values) |
| 243 | # extract tile layout |
bensong@google.com | ba98f95 | 2013-02-13 23:22:29 +0000 | [diff] [blame] | 244 | tile_layout = layout_dic[bench][config][time_type] |
commit-bot@chromium.org | 758bc7a | 2014-03-12 16:23:33 +0000 | [diff] [blame] | 245 | else: |
| 246 | # get the list of per-iteration values |
| 247 | per_iter_time = bench_dic[bench][config][time_type][0] |
| 248 | bench_summary = _ListAlgorithm( |
| 249 | per_iter_time, representation).compute() |
bensong@google.com | d3fd98f | 2012-12-18 20:06:10 +0000 | [diff] [blame] | 250 | benches.append(BenchDataPoint( |
| 251 | bench, |
| 252 | config, |
| 253 | time_type, |
commit-bot@chromium.org | 758bc7a | 2014-03-12 16:23:33 +0000 | [diff] [blame] | 254 | bench_summary, |
bensong@google.com | ba98f95 | 2013-02-13 23:22:29 +0000 | [diff] [blame] | 255 | settings, |
| 256 | tile_layout, |
commit-bot@chromium.org | 758bc7a | 2014-03-12 16:23:33 +0000 | [diff] [blame] | 257 | per_tile_values, |
| 258 | per_iter_time)) |
bensong@google.com | d3fd98f | 2012-12-18 20:06:10 +0000 | [diff] [blame] | 259 | |
bungeman@google.com | 85669f9 | 2011-06-17 13:58:14 +0000 | [diff] [blame] | 260 | return benches |
bensong@google.com | d3fd98f | 2012-12-18 20:06:10 +0000 | [diff] [blame] | 261 | |
bungeman@google.com | 85669f9 | 2011-06-17 13:58:14 +0000 | [diff] [blame] | 262 | class LinearRegression: |
| 263 | """Linear regression data based on a set of data points. |
bensong@google.com | d3fd98f | 2012-12-18 20:06:10 +0000 | [diff] [blame] | 264 | |
bungeman@google.com | 85669f9 | 2011-06-17 13:58:14 +0000 | [diff] [blame] | 265 | ([(Number,Number)]) |
| 266 | There must be at least two points for this to make sense.""" |
| 267 | def __init__(self, points): |
| 268 | n = len(points) |
| 269 | max_x = Min |
| 270 | min_x = Max |
bensong@google.com | d3fd98f | 2012-12-18 20:06:10 +0000 | [diff] [blame] | 271 | |
bungeman@google.com | 85669f9 | 2011-06-17 13:58:14 +0000 | [diff] [blame] | 272 | Sx = 0.0 |
| 273 | Sy = 0.0 |
| 274 | Sxx = 0.0 |
| 275 | Sxy = 0.0 |
| 276 | Syy = 0.0 |
| 277 | for point in points: |
| 278 | x = point[0] |
| 279 | y = point[1] |
| 280 | max_x = max(max_x, x) |
| 281 | min_x = min(min_x, x) |
bensong@google.com | d3fd98f | 2012-12-18 20:06:10 +0000 | [diff] [blame] | 282 | |
bungeman@google.com | 85669f9 | 2011-06-17 13:58:14 +0000 | [diff] [blame] | 283 | Sx += x |
| 284 | Sy += y |
| 285 | Sxx += x*x |
| 286 | Sxy += x*y |
| 287 | Syy += y*y |
bensong@google.com | d3fd98f | 2012-12-18 20:06:10 +0000 | [diff] [blame] | 288 | |
senorblanco@chromium.org | c5e1ed8 | 2012-09-20 19:05:33 +0000 | [diff] [blame] | 289 | denom = n*Sxx - Sx*Sx |
| 290 | if (denom != 0.0): |
| 291 | B = (n*Sxy - Sx*Sy) / denom |
| 292 | else: |
| 293 | B = 0.0 |
bungeman@google.com | 85669f9 | 2011-06-17 13:58:14 +0000 | [diff] [blame] | 294 | a = (1.0/n)*(Sy - B*Sx) |
bensong@google.com | d3fd98f | 2012-12-18 20:06:10 +0000 | [diff] [blame] | 295 | |
bungeman@google.com | 85669f9 | 2011-06-17 13:58:14 +0000 | [diff] [blame] | 296 | se2 = 0 |
| 297 | sB2 = 0 |
| 298 | sa2 = 0 |
senorblanco@chromium.org | c5e1ed8 | 2012-09-20 19:05:33 +0000 | [diff] [blame] | 299 | if (n >= 3 and denom != 0.0): |
| 300 | se2 = (1.0/(n*(n-2)) * (n*Syy - Sy*Sy - B*B*denom)) |
| 301 | sB2 = (n*se2) / denom |
bungeman@google.com | 85669f9 | 2011-06-17 13:58:14 +0000 | [diff] [blame] | 302 | sa2 = sB2 * (1.0/n) * Sxx |
bensong@google.com | d3fd98f | 2012-12-18 20:06:10 +0000 | [diff] [blame] | 303 | |
| 304 | |
bungeman@google.com | 85669f9 | 2011-06-17 13:58:14 +0000 | [diff] [blame] | 305 | self.slope = B |
| 306 | self.intercept = a |
| 307 | self.serror = math.sqrt(max(0, se2)) |
| 308 | self.serror_slope = math.sqrt(max(0, sB2)) |
| 309 | self.serror_intercept = math.sqrt(max(0, sa2)) |
| 310 | self.max_x = max_x |
| 311 | self.min_x = min_x |
bensong@google.com | d3fd98f | 2012-12-18 20:06:10 +0000 | [diff] [blame] | 312 | |
bungeman@google.com | 85669f9 | 2011-06-17 13:58:14 +0000 | [diff] [blame] | 313 | def __repr__(self): |
| 314 | return "LinearRegression(%s, %s, %s, %s, %s)" % ( |
| 315 | str(self.slope), |
| 316 | str(self.intercept), |
| 317 | str(self.serror), |
| 318 | str(self.serror_slope), |
| 319 | str(self.serror_intercept), |
| 320 | ) |
bensong@google.com | d3fd98f | 2012-12-18 20:06:10 +0000 | [diff] [blame] | 321 | |
bungeman@google.com | 85669f9 | 2011-06-17 13:58:14 +0000 | [diff] [blame] | 322 | def find_min_slope(self): |
| 323 | """Finds the minimal slope given one standard deviation.""" |
| 324 | slope = self.slope |
| 325 | intercept = self.intercept |
| 326 | error = self.serror |
| 327 | regr_start = self.min_x |
| 328 | regr_end = self.max_x |
| 329 | regr_width = regr_end - regr_start |
bensong@google.com | d3fd98f | 2012-12-18 20:06:10 +0000 | [diff] [blame] | 330 | |
bungeman@google.com | 85669f9 | 2011-06-17 13:58:14 +0000 | [diff] [blame] | 331 | if slope < 0: |
| 332 | lower_left_y = slope*regr_start + intercept - error |
| 333 | upper_right_y = slope*regr_end + intercept + error |
| 334 | return min(0, (upper_right_y - lower_left_y) / regr_width) |
bensong@google.com | d3fd98f | 2012-12-18 20:06:10 +0000 | [diff] [blame] | 335 | |
bungeman@google.com | 85669f9 | 2011-06-17 13:58:14 +0000 | [diff] [blame] | 336 | elif slope > 0: |
| 337 | upper_left_y = slope*regr_start + intercept + error |
| 338 | lower_right_y = slope*regr_end + intercept - error |
| 339 | return max(0, (lower_right_y - upper_left_y) / regr_width) |
bensong@google.com | d3fd98f | 2012-12-18 20:06:10 +0000 | [diff] [blame] | 340 | |
bungeman@google.com | 85669f9 | 2011-06-17 13:58:14 +0000 | [diff] [blame] | 341 | return 0 |
epoger@google.com | c71174d | 2011-08-08 17:19:23 +0000 | [diff] [blame] | 342 | |
| 343 | def CreateRevisionLink(revision_number): |
| 344 | """Returns HTML displaying the given revision number and linking to |
| 345 | that revision's change page at code.google.com, e.g. |
| 346 | http://code.google.com/p/skia/source/detail?r=2056 |
| 347 | """ |
| 348 | return '<a href="http://code.google.com/p/skia/source/detail?r=%s">%s</a>'%( |
| 349 | revision_number, revision_number) |
senorblanco@chromium.org | c5e1ed8 | 2012-09-20 19:05:33 +0000 | [diff] [blame] | 350 | |
| 351 | def main(): |
| 352 | foo = [[0.0, 0.0], [0.0, 1.0], [0.0, 2.0], [0.0, 3.0]] |
| 353 | LinearRegression(foo) |
| 354 | |
| 355 | if __name__ == "__main__": |
| 356 | main() |