Remove obsolete bench analysis scripts
R=bungeman@google.com
BUG=skia:5459
GOLD_TRYBOT_URL= https://gold.skia.org/search?issue=2125953002
Review-Url: https://codereview.chromium.org/2125953002
diff --git a/bench/bench_compare.py b/bench/bench_compare.py
deleted file mode 100755
index f4f7734..0000000
--- a/bench/bench_compare.py
+++ /dev/null
@@ -1,193 +0,0 @@
-#!/usr/bin/env python
-'''
-Created on May 16, 2011
-
-@author: bungeman
-'''
-import sys
-import getopt
-import bench_util
-
-def usage():
- """Prints simple usage information."""
-
- print '-o <file> the old bench output file.'
- print '-n <file> the new bench output file.'
- print '-h causes headers to be output.'
- print '-s <stat> the type of statistical analysis used'
- print ' Not specifying is the same as -s "avg".'
- print ' avg: average of all data points'
- print ' min: minimum of all data points'
- print ' med: median of all data points'
- print ' 25th: twenty-fifth percentile for all data points'
- print '-f <fieldSpec> which fields to output and in what order.'
- print ' Not specifying is the same as -f "bctondp".'
- print ' b: bench'
- print ' c: config'
- print ' t: time type'
- print ' o: old time'
- print ' n: new time'
- print ' d: diff'
- print ' p: percent diff'
- print '-t use tab delimited format for output.'
- print '--match <bench> only matches benches which begin with <bench>.'
-
-class BenchDiff:
- """A compare between data points produced by bench.
-
- (BenchDataPoint, BenchDataPoint)"""
- def __init__(self, old, new):
- self.old = old
- self.new = new
- self.diff = old.time - new.time
- diffp = 0
- if old.time != 0:
- diffp = self.diff / old.time
- self.diffp = diffp
-
- def __repr__(self):
- return "BenchDiff(%s, %s)" % (
- str(self.new),
- str(self.old),
- )
-
-def main():
- """Parses command line and writes output."""
-
- try:
- opts, _ = getopt.getopt(sys.argv[1:], "f:o:n:s:ht", ['match='])
- except getopt.GetoptError, err:
- print str(err)
- usage()
- sys.exit(2)
-
- old = None
- new = None
- column_format = ""
- header_format = ""
- columns = 'bctondp'
- header = False
- stat_type = "avg"
- use_tabs = False
- match_bench = None;
-
- for option, value in opts:
- if option == "-o":
- old = value
- elif option == "-n":
- new = value
- elif option == "-h":
- header = True
- elif option == "-f":
- columns = value
- elif option == "-s":
- stat_type = value
- elif option == "-t":
- use_tabs = True
- elif option == "--match":
- match_bench = value
- else:
- usage()
- assert False, "unhandled option"
-
- if old is None or new is None:
- usage()
- sys.exit(2)
-
- old_benches = bench_util.parse({}, open(old, 'r'), stat_type)
- new_benches = bench_util.parse({}, open(new, 'r'), stat_type)
-
- bench_diffs = []
- for old_bench in old_benches:
- #filter benches by the match criteria
- if match_bench and not old_bench.bench.startswith(match_bench):
- continue
-
- #filter new_benches for benches that match old_bench
- new_bench_match = [bench for bench in new_benches
- if old_bench.bench == bench.bench and
- old_bench.config == bench.config and
- old_bench.time_type == bench.time_type
- ]
- if (len(new_bench_match) < 1):
- continue
- bench_diffs.append(BenchDiff(old_bench, new_bench_match[0]))
-
- if use_tabs:
- column_formats = {
- 'b' : '{bench}\t',
- 'c' : '{config}\t',
- 't' : '{time_type}\t',
- 'o' : '{old_time: 0.2f}\t',
- 'n' : '{new_time: 0.2f}\t',
- 'd' : '{diff: 0.2f}\t',
- 'p' : '{diffp: 0.1%}\t',
- }
- header_formats = {
- 'b' : '{bench}\t',
- 'c' : '{config}\t',
- 't' : '{time_type}\t',
- 'o' : '{old_time}\t',
- 'n' : '{new_time}\t',
- 'd' : '{diff}\t',
- 'p' : '{diffp}\t',
- }
- else:
- bench_max_len = max(map(lambda b: len(b.old.bench), bench_diffs))
- config_max_len = max(map(lambda b: len(b.old.config), bench_diffs))
- column_formats = {
- 'b' : '{bench: >%d} ' % (bench_max_len),
- 'c' : '{config: <%d} ' % (config_max_len),
- 't' : '{time_type: <4} ',
- 'o' : '{old_time: >10.2f} ',
- 'n' : '{new_time: >10.2f} ',
- 'd' : '{diff: >+10.2f} ',
- 'p' : '{diffp: >+8.1%} ',
- }
- header_formats = {
- 'b' : '{bench: >%d} ' % (bench_max_len),
- 'c' : '{config: <%d} ' % (config_max_len),
- 't' : '{time_type: <4} ',
- 'o' : '{old_time: >10} ',
- 'n' : '{new_time: >10} ',
- 'd' : '{diff: >10} ',
- 'p' : '{diffp: >8} ',
- }
-
- for column_char in columns:
- if column_formats[column_char]:
- column_format += column_formats[column_char]
- header_format += header_formats[column_char]
- else:
- usage()
- sys.exit(2)
-
- if header:
- print header_format.format(
- bench='bench'
- , config='conf'
- , time_type='time'
- , old_time='old'
- , new_time='new'
- , diff='diff'
- , diffp='diffP'
- )
-
- bench_diffs.sort(key=lambda d : [d.diffp,
- d.old.bench,
- d.old.config,
- d.old.time_type,
- ])
- for bench_diff in bench_diffs:
- print column_format.format(
- bench=bench_diff.old.bench.strip()
- , config=bench_diff.old.config.strip()
- , time_type=bench_diff.old.time_type
- , old_time=bench_diff.old.time
- , new_time=bench_diff.new.time
- , diff=bench_diff.diff
- , diffp=bench_diff.diffp
- )
-
-if __name__ == "__main__":
- main()
diff --git a/bench/bench_util.py b/bench/bench_util.py
deleted file mode 100644
index b6fecb7..0000000
--- a/bench/bench_util.py
+++ /dev/null
@@ -1,356 +0,0 @@
-'''
-Created on May 19, 2011
-
-@author: bungeman
-'''
-
-import os
-import re
-import math
-
-# bench representation algorithm constant names
-ALGORITHM_AVERAGE = 'avg'
-ALGORITHM_MEDIAN = 'med'
-ALGORITHM_MINIMUM = 'min'
-ALGORITHM_25TH_PERCENTILE = '25th'
-
-# Regular expressions used throughout.
-PER_SETTING_RE = '([^\s=]+)(?:=(\S+))?'
-SETTINGS_RE = 'skia bench:((?:\s+' + PER_SETTING_RE + ')*)'
-BENCH_RE = 'running bench (?:\[\d+ \d+\] )?\s*(\S+)'
-TIME_RE = '(?:(\w*)msecs = )?\s*((?:\d+\.\d+)(?:,\s*\d+\.\d+)*)'
-# non-per-tile benches have configs that don't end with ']' or '>'
-CONFIG_RE = '(\S+[^\]>]):\s+((?:' + TIME_RE + '\s+)+)'
-# per-tile bench lines are in the following format. Note that there are
-# non-averaged bench numbers in separate lines, which we ignore now due to
-# their inaccuracy.
-TILE_RE = (' tile_(\S+): tile \[\d+,\d+\] out of \[\d+,\d+\] <averaged>:'
- ' ((?:' + TIME_RE + '\s+)+)')
-# for extracting tile layout
-TILE_LAYOUT_RE = ' out of \[(\d+),(\d+)\] <averaged>: '
-
-PER_SETTING_RE_COMPILED = re.compile(PER_SETTING_RE)
-SETTINGS_RE_COMPILED = re.compile(SETTINGS_RE)
-BENCH_RE_COMPILED = re.compile(BENCH_RE)
-TIME_RE_COMPILED = re.compile(TIME_RE)
-CONFIG_RE_COMPILED = re.compile(CONFIG_RE)
-TILE_RE_COMPILED = re.compile(TILE_RE)
-TILE_LAYOUT_RE_COMPILED = re.compile(TILE_LAYOUT_RE)
-
-class BenchDataPoint:
- """A single data point produced by bench.
- """
- def __init__(self, bench, config, time_type, time, settings,
- tile_layout='', per_tile_values=[], per_iter_time=[]):
- # string name of the benchmark to measure
- self.bench = bench
- # string name of the configurations to run
- self.config = config
- # type of the timer in string: '' (walltime), 'c' (cpu) or 'g' (gpu)
- self.time_type = time_type
- # float number of the bench time value
- self.time = time
- # dictionary of the run settings
- self.settings = settings
- # how tiles cover the whole picture: '5x3' means 5 columns and 3 rows
- self.tile_layout = tile_layout
- # list of float for per_tile bench values, if applicable
- self.per_tile_values = per_tile_values
- # list of float for per-iteration bench time, if applicable
- self.per_iter_time = per_iter_time
-
- def __repr__(self):
- return "BenchDataPoint(%s, %s, %s, %s, %s)" % (
- str(self.bench),
- str(self.config),
- str(self.time_type),
- str(self.time),
- str(self.settings),
- )
-
-class _ExtremeType(object):
- """Instances of this class compare greater or less than other objects."""
- def __init__(self, cmpr, rep):
- object.__init__(self)
- self._cmpr = cmpr
- self._rep = rep
-
- def __cmp__(self, other):
- if isinstance(other, self.__class__) and other._cmpr == self._cmpr:
- return 0
- return self._cmpr
-
- def __repr__(self):
- return self._rep
-
-Max = _ExtremeType(1, "Max")
-Min = _ExtremeType(-1, "Min")
-
-class _ListAlgorithm(object):
- """Algorithm for selecting the representation value from a given list.
- representation is one of the ALGORITHM_XXX representation types."""
- def __init__(self, data, representation=None):
- if not representation:
- representation = ALGORITHM_AVERAGE # default algorithm
- self._data = data
- self._len = len(data)
- if representation == ALGORITHM_AVERAGE:
- self._rep = sum(self._data) / self._len
- else:
- self._data.sort()
- if representation == ALGORITHM_MINIMUM:
- self._rep = self._data[0]
- else:
- # for percentiles, we use the value below which x% of values are
- # found, which allows for better detection of quantum behaviors.
- if representation == ALGORITHM_MEDIAN:
- x = int(round(0.5 * self._len + 0.5))
- elif representation == ALGORITHM_25TH_PERCENTILE:
- x = int(round(0.25 * self._len + 0.5))
- else:
- raise Exception("invalid representation algorithm %s!" %
- representation)
- self._rep = self._data[x - 1]
-
- def compute(self):
- return self._rep
-
-def _ParseAndStoreTimes(config_re_compiled, is_per_tile, line, bench,
- value_dic, layout_dic):
- """Parses given bench time line with regex and adds data to value_dic.
-
- config_re_compiled: precompiled regular expression for parsing the config
- line.
- is_per_tile: boolean indicating whether this is a per-tile bench.
- If so, we add tile layout into layout_dic as well.
- line: input string line to parse.
- bench: name of bench for the time values.
- value_dic: dictionary to store bench values. See bench_dic in parse() below.
- layout_dic: dictionary to store tile layouts. See parse() for descriptions.
- """
-
- for config in config_re_compiled.finditer(line):
- current_config = config.group(1)
- tile_layout = ''
- if is_per_tile: # per-tile bench, add name prefix
- current_config = 'tile_' + current_config
- layouts = TILE_LAYOUT_RE_COMPILED.search(line)
- if layouts and len(layouts.groups()) == 2:
- tile_layout = '%sx%s' % layouts.groups()
- times = config.group(2)
- for new_time in TIME_RE_COMPILED.finditer(times):
- current_time_type = new_time.group(1)
- iters = [float(i) for i in
- new_time.group(2).strip().split(',')]
- value_dic.setdefault(bench, {}).setdefault(
- current_config, {}).setdefault(current_time_type, []).append(
- iters)
- layout_dic.setdefault(bench, {}).setdefault(
- current_config, {}).setdefault(current_time_type, tile_layout)
-
-def parse_skp_bench_data(directory, revision, rep, default_settings=None):
- """Parses all the skp bench data in the given directory.
-
- Args:
- directory: string of path to input data directory.
- revision: git hash revision that matches the data to process.
- rep: bench representation algorithm, see bench_util.py.
- default_settings: dictionary of other run settings. See writer.option() in
- bench/benchmain.cpp.
-
- Returns:
- A list of BenchDataPoint objects.
- """
- revision_data_points = []
- file_list = os.listdir(directory)
- file_list.sort()
- for bench_file in file_list:
- scalar_type = None
- # Scalar type, if any, is in the bench filename after 'scalar_'.
- if (bench_file.startswith('bench_' + revision + '_data_')):
- if bench_file.find('scalar_') > 0:
- components = bench_file.split('_')
- scalar_type = components[components.index('scalar') + 1]
- else: # Skips non skp bench files.
- continue
-
- with open('/'.join([directory, bench_file]), 'r') as file_handle:
- settings = dict(default_settings or {})
- settings['scalar'] = scalar_type
- revision_data_points.extend(parse(settings, file_handle, rep))
-
- return revision_data_points
-
-# TODO(bensong): switch to reading JSON output when available. This way we don't
-# need the RE complexities.
-def parse(settings, lines, representation=None):
- """Parses bench output into a useful data structure.
-
- ({str:str}, __iter__ -> str) -> [BenchDataPoint]
- representation is one of the ALGORITHM_XXX types."""
-
- benches = []
- current_bench = None
- # [bench][config][time_type] -> [[per-iter values]] where per-tile config
- # has per-iter value list for each tile [[<tile1_iter1>,<tile1_iter2>,...],
- # [<tile2_iter1>,<tile2_iter2>,...],...], while non-per-tile config only
- # contains one list of iterations [[iter1, iter2, ...]].
- bench_dic = {}
- # [bench][config][time_type] -> tile_layout
- layout_dic = {}
-
- for line in lines:
-
- # see if this line is a settings line
- settingsMatch = SETTINGS_RE_COMPILED.search(line)
- if (settingsMatch):
- settings = dict(settings)
- for settingMatch in PER_SETTING_RE_COMPILED.finditer(settingsMatch.group(1)):
- if (settingMatch.group(2)):
- settings[settingMatch.group(1)] = settingMatch.group(2)
- else:
- settings[settingMatch.group(1)] = True
-
- # see if this line starts a new bench
- new_bench = BENCH_RE_COMPILED.search(line)
- if new_bench:
- current_bench = new_bench.group(1)
-
- # add configs on this line to the bench_dic
- if current_bench:
- if line.startswith(' tile_') :
- _ParseAndStoreTimes(TILE_RE_COMPILED, True, line, current_bench,
- bench_dic, layout_dic)
- else:
- _ParseAndStoreTimes(CONFIG_RE_COMPILED, False, line,
- current_bench, bench_dic, layout_dic)
-
- # append benches to list
- for bench in bench_dic:
- for config in bench_dic[bench]:
- for time_type in bench_dic[bench][config]:
- tile_layout = ''
- per_tile_values = [] # empty for non-per-tile configs
- per_iter_time = [] # empty for per-tile configs
- bench_summary = None # a single final bench value
- if len(bench_dic[bench][config][time_type]) > 1:
- # per-tile config; compute representation for each tile
- per_tile_values = [
- _ListAlgorithm(iters, representation).compute()
- for iters in bench_dic[bench][config][time_type]]
- # use sum of each tile representation for total bench value
- bench_summary = sum(per_tile_values)
- # extract tile layout
- tile_layout = layout_dic[bench][config][time_type]
- else:
- # get the list of per-iteration values
- per_iter_time = bench_dic[bench][config][time_type][0]
- bench_summary = _ListAlgorithm(
- per_iter_time, representation).compute()
- benches.append(BenchDataPoint(
- bench,
- config,
- time_type,
- bench_summary,
- settings,
- tile_layout,
- per_tile_values,
- per_iter_time))
-
- return benches
-
-class LinearRegression:
- """Linear regression data based on a set of data points.
-
- ([(Number,Number)])
- There must be at least two points for this to make sense."""
- def __init__(self, points):
- n = len(points)
- max_x = Min
- min_x = Max
-
- Sx = 0.0
- Sy = 0.0
- Sxx = 0.0
- Sxy = 0.0
- Syy = 0.0
- for point in points:
- x = point[0]
- y = point[1]
- max_x = max(max_x, x)
- min_x = min(min_x, x)
-
- Sx += x
- Sy += y
- Sxx += x*x
- Sxy += x*y
- Syy += y*y
-
- denom = n*Sxx - Sx*Sx
- if (denom != 0.0):
- B = (n*Sxy - Sx*Sy) / denom
- else:
- B = 0.0
- a = (1.0/n)*(Sy - B*Sx)
-
- se2 = 0
- sB2 = 0
- sa2 = 0
- if (n >= 3 and denom != 0.0):
- se2 = (1.0/(n*(n-2)) * (n*Syy - Sy*Sy - B*B*denom))
- sB2 = (n*se2) / denom
- sa2 = sB2 * (1.0/n) * Sxx
-
-
- self.slope = B
- self.intercept = a
- self.serror = math.sqrt(max(0, se2))
- self.serror_slope = math.sqrt(max(0, sB2))
- self.serror_intercept = math.sqrt(max(0, sa2))
- self.max_x = max_x
- self.min_x = min_x
-
- def __repr__(self):
- return "LinearRegression(%s, %s, %s, %s, %s)" % (
- str(self.slope),
- str(self.intercept),
- str(self.serror),
- str(self.serror_slope),
- str(self.serror_intercept),
- )
-
- def find_min_slope(self):
- """Finds the minimal slope given one standard deviation."""
- slope = self.slope
- intercept = self.intercept
- error = self.serror
- regr_start = self.min_x
- regr_end = self.max_x
- regr_width = regr_end - regr_start
-
- if slope < 0:
- lower_left_y = slope*regr_start + intercept - error
- upper_right_y = slope*regr_end + intercept + error
- return min(0, (upper_right_y - lower_left_y) / regr_width)
-
- elif slope > 0:
- upper_left_y = slope*regr_start + intercept + error
- lower_right_y = slope*regr_end + intercept - error
- return max(0, (lower_right_y - upper_left_y) / regr_width)
-
- return 0
-
-def CreateRevisionLink(revision_number):
- """Returns HTML displaying the given revision number and linking to
- that revision's change page at code.google.com, e.g.
- http://code.google.com/p/skia/source/detail?r=2056
- """
- return '<a href="http://code.google.com/p/skia/source/detail?r=%s">%s</a>'%(
- revision_number, revision_number)
-
-def main():
- foo = [[0.0, 0.0], [0.0, 1.0], [0.0, 2.0], [0.0, 3.0]]
- LinearRegression(foo)
-
-if __name__ == "__main__":
- main()
diff --git a/bench/gen_bench_expectations.py b/bench/gen_bench_expectations.py
deleted file mode 100644
index 4edc38c..0000000
--- a/bench/gen_bench_expectations.py
+++ /dev/null
@@ -1,223 +0,0 @@
-#!/usr/bin/env python
-# Copyright (c) 2014 The Chromium Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-
-""" Generate bench_expectations file from a given set of bench data files. """
-
-import argparse
-import bench_util
-import json
-import os
-import re
-import sys
-import urllib2
-
-# Parameters for calculating bench ranges.
-RANGE_RATIO_UPPER = 1.5 # Ratio of range for upper bounds.
-RANGE_RATIO_LOWER = 2.0 # Ratio of range for lower bounds.
-ERR_RATIO = 0.08 # Further widens the range by the ratio of average value.
-ERR_UB = 1.0 # Adds an absolute upper error to cope with small benches.
-ERR_LB = 1.5
-
-# List of bench configs to monitor. Ignore all other configs.
-CONFIGS_TO_INCLUDE = ['simple_viewport_1000x1000',
- 'simple_viewport_1000x1000_angle',
- 'simple_viewport_1000x1000_gpu',
- 'simple_viewport_1000x1000_scalar_1.100000',
- 'simple_viewport_1000x1000_scalar_1.100000_gpu',
- ]
-
-# List of flaky entries that should be excluded. Each entry is defined by a list
-# of 3 strings, corresponding to the substrings of [bench, config, builder] to
-# search for. A bench expectations line is excluded when each of the 3 strings
-# in the list is a substring of the corresponding element of the given line. For
-# instance, ['desk_yahooanswers', 'gpu', 'Ubuntu'] will skip expectation entries
-# of SKP benchs whose name contains 'desk_yahooanswers' on all gpu-related
-# configs of all Ubuntu builders.
-ENTRIES_TO_EXCLUDE = [
- ]
-
-_GS_CLOUD_FORMAT = 'http://storage.googleapis.com/chromium-skia-gm/perfdata/%s/%s'
-
-def compute_ranges(benches, more_benches=None):
- """Given a list of bench numbers, calculate the alert range.
-
- Args:
- benches: a list of float bench values.
- more_benches: a tuple of lists of additional bench values.
- The first value of each tuple is the number of commits before the current
- one that set of values is at, and the second value is a list of
- bench results.
-
- Returns:
- a list of float [lower_bound, upper_bound].
- """
- avg = sum(benches)/len(benches)
- minimum = min(benches)
- maximum = max(benches)
- diff = maximum - minimum
-
- return [minimum - diff*RANGE_RATIO_LOWER - avg*ERR_RATIO - ERR_LB,
- maximum + diff*RANGE_RATIO_UPPER + avg*ERR_RATIO + ERR_UB]
-
-
-def create_expectations_dict(revision_data_points, builder, extra_data=None):
- """Convert list of bench data points into a dictionary of expectations data.
-
- Args:
- revision_data_points: a list of BenchDataPoint objects.
- builder: string of the corresponding buildbot builder name.
-
- Returns:
- a dictionary of this form:
- keys = tuple of (config, bench) strings.
- values = list of float [expected, lower_bound, upper_bound] for the key.
- """
- bench_dict = {}
- for point in revision_data_points:
- if (point.time_type or # Not walltime which has time_type ''
- not point.config in CONFIGS_TO_INCLUDE):
- continue
- to_skip = False
- for bench_substr, config_substr, builder_substr in ENTRIES_TO_EXCLUDE:
- if (bench_substr in point.bench and config_substr in point.config and
- builder_substr in builder):
- to_skip = True
- break
- if to_skip:
- continue
- key = (point.config, point.bench)
-
- extras = []
- for idx, dataset in extra_data:
- for data in dataset:
- if (data.bench == point.bench and data.config == point.config and
- data.time_type == point.time_type and data.per_iter_time):
- extras.append((idx, data.per_iter_time))
-
- if key in bench_dict:
- raise Exception('Duplicate bench entry: ' + str(key))
- bench_dict[key] = [point.time] + compute_ranges(point.per_iter_time, extras)
-
- return bench_dict
-
-
-def get_parent_commits(start_hash, num_back):
- """Returns a list of commits that are the parent of the commit passed in."""
- list_commits = urllib2.urlopen(
- 'https://skia.googlesource.com/skia/+log/%s?format=json&n=%d' %
- (start_hash, num_back))
- # NOTE: Very brittle. Removes the four extraneous characters
- # so json can be read successfully
- trunc_list = list_commits.read()[4:]
- json_data = json.loads(trunc_list)
- return [revision['commit'] for revision in json_data['log']]
-
-
-def get_file_suffixes(commit_hash, directory):
- """Gets all the suffixes available in the directory"""
- possible_files = os.listdir(directory)
- prefix = 'bench_' + commit_hash + '_data_'
- return [name[len(prefix):] for name in possible_files
- if name.startswith(prefix)]
-
-
-def download_bench_data(builder, commit_hash, suffixes, directory):
- """Downloads data, returns the number successfully downloaded"""
- cur_files = os.listdir(directory)
- count = 0
- for suffix in suffixes:
- file_name = 'bench_'+commit_hash+'_data_'+suffix
- if file_name in cur_files:
- continue
- try:
- src = urllib2.urlopen(_GS_CLOUD_FORMAT % (builder, file_name))
- with open(os.path.join(directory, file_name), 'w') as dest:
- dest.writelines(src)
- count += 1
- except urllib2.HTTPError:
- pass
- return count
-
-
-def main():
- """Reads bench data points, then calculate and export expectations.
- """
- parser = argparse.ArgumentParser()
- parser.add_argument(
- '-a', '--representation_alg', default='25th',
- help='bench representation algorithm to use, see bench_util.py.')
- parser.add_argument(
- '-b', '--builder', required=True,
- help='name of the builder whose bench ranges we are computing.')
- parser.add_argument(
- '-d', '--input_dir', required=True,
- help='a directory containing bench data files.')
- parser.add_argument(
- '-o', '--output_file', required=True,
- help='file path and name for storing the output bench expectations.')
- parser.add_argument(
- '-r', '--git_revision', required=True,
- help='the git hash to indicate the revision of input data to use.')
- parser.add_argument(
- '-t', '--back_track', required=False, default=10,
- help='the number of commit hashes backwards to look to include' +
- 'in the calculations.')
- parser.add_argument(
- '-m', '--max_commits', required=False, default=1,
- help='the number of commit hashes to include in the calculations.')
- args = parser.parse_args()
-
- builder = args.builder
-
- data_points = bench_util.parse_skp_bench_data(
- args.input_dir, args.git_revision, args.representation_alg)
-
- parent_commits = get_parent_commits(args.git_revision, args.back_track)
- print "Using commits: {}".format(parent_commits)
- suffixes = get_file_suffixes(args.git_revision, args.input_dir)
- print "Using suffixes: {}".format(suffixes)
-
- # TODO(kelvinly): Find a better approach to than directly copying from
- # the GS server?
- downloaded_commits = []
- for idx, commit in enumerate(parent_commits):
- num_downloaded = download_bench_data(
- builder, commit, suffixes, args.input_dir)
- if num_downloaded > 0:
- downloaded_commits.append((num_downloaded, idx, commit))
-
- if len(downloaded_commits) < args.max_commits:
- print ('Less than desired number of commits found. Please increase'
- '--back_track in later runs')
- trunc_commits = sorted(downloaded_commits, reverse=True)[:args.max_commits]
- extra_data = []
- for _, idx, commit in trunc_commits:
- extra_data.append((idx, bench_util.parse_skp_bench_data(
- args.input_dir, commit, args.representation_alg)))
-
- expectations_dict = create_expectations_dict(data_points, builder,
- extra_data)
-
- out_lines = []
- keys = expectations_dict.keys()
- keys.sort()
- for (config, bench) in keys:
- (expected, lower_bound, upper_bound) = expectations_dict[(config, bench)]
- out_lines.append('%(bench)s_%(config)s_,%(builder)s-%(representation)s,'
- '%(expected)s,%(lower_bound)s,%(upper_bound)s' % {
- 'bench': bench,
- 'config': config,
- 'builder': builder,
- 'representation': args.representation_alg,
- 'expected': expected,
- 'lower_bound': lower_bound,
- 'upper_bound': upper_bound})
-
- with open(args.output_file, 'w') as file_handle:
- file_handle.write('\n'.join(out_lines))
-
-
-if __name__ == "__main__":
- main()
diff --git a/bench/tile_analyze.py b/bench/tile_analyze.py
deleted file mode 100755
index 03fe086..0000000
--- a/bench/tile_analyze.py
+++ /dev/null
@@ -1,279 +0,0 @@
-#!/usr/bin/env python
-# Copyright (c) 2013 The Chromium Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be found
-# in the LICENSE file.
-
-""" Analyze per-tile and viewport bench data, and output visualized results.
-"""
-
-__author__ = 'bensong@google.com (Ben Chen)'
-
-import bench_util
-import boto
-import math
-import optparse
-import os
-import re
-import shutil
-
-from oauth2_plugin import oauth2_plugin
-
-# The default platform to analyze. Used when OPTION_PLATFORM flag is not set.
-DEFAULT_PLATFORM = 'Nexus10_4-1_Float_Bench_32'
-
-# Template for gsutil uri.
-GOOGLE_STORAGE_URI_SCHEME = 'gs'
-URI_BUCKET = 'chromium-skia-gm'
-
-# Maximum number of rows of tiles to track for viewport covering.
-MAX_TILE_ROWS = 8
-
-# Constants for optparse.
-USAGE_STRING = 'USAGE: %s [options]'
-HOWTO_STRING = """
-Note: to read bench data stored in Google Storage, you will need to set up the
-corresponding Python library.
-See http://developers.google.com/storage/docs/gspythonlibrary for details.
-"""
-HELP_STRING = """
-For the given platform and revision number, find corresponding viewport and
-tile benchmarks for each available picture bench, and output visualization and
-analysis in HTML. By default it reads from Skia's Google Storage location where
-bot data are stored, but if --dir is given, will read from local directory
-instead.
-""" + HOWTO_STRING
-
-OPTION_DIR = '--dir'
-OPTION_DIR_SHORT = '-d'
-OPTION_REVISION = '--rev'
-OPTION_REVISION_SHORT = '-r'
-OPTION_PLATFORM = '--platform'
-OPTION_PLATFORM_SHORT = '-p'
-# Bench representation algorithm flag.
-OPTION_REPRESENTATION_ALG = '--algorithm'
-OPTION_REPRESENTATION_ALG_SHORT = '-a'
-
-# Bench representation algorithm. See trunk/bench/bench_util.py.
-REPRESENTATION_ALG = bench_util.ALGORITHM_25TH_PERCENTILE
-
-# Constants for bench file matching.
-GOOGLE_STORAGE_OBJECT_NAME_PREFIX = 'perfdata/Skia_'
-BENCH_FILE_PREFIX_TEMPLATE = 'bench_r%s_'
-TILING_FILE_NAME_INDICATOR = '_tile_'
-VIEWPORT_FILE_NAME_INDICATOR = '_viewport_'
-
-# Regular expression for matching format '<integer>x<integer>'.
-DIMENSIONS_RE = '(\d+)x(\d+)'
-
-# HTML and JS output templates.
-HTML_PREFIX = """
-<html><head><script type="text/javascript" src="https://www.google.com/jsapi">
-</script><script type="text/javascript">google.load("visualization", "1.1",
-{packages:["table"]});google.load("prototype", "1.6");</script>
-<script type="text/javascript" src="https://systemsbiology-visualizations.googlecode.com/svn/trunk/src/main/js/load.js"></script><script
-type="text/javascript"> systemsbiology.load("visualization", "1.0",
-{packages:["bioheatmap"]});</script><script type="text/javascript">
-google.setOnLoadCallback(drawVisualization); function drawVisualization() {
-"""
-HTML_SUFFIX = '</body></html>'
-BAR_CHART_TEMPLATE = ('<img src="https://chart.googleapis.com/chart?chxr=0,0,'
- '300&chxt=x&chbh=15,0&chs=600x150&cht=bhg&chco=80C65A,224499,FF0000,0A8C8A,'
- 'EBB671,DE091A,000000,00ffff&chds=a&chdl=%s&chd=t:%s" /><br>\n')
-DRAW_OPTIONS = ('{passThroughBlack:false,useRowLabels:false,cellWidth:30,'
- 'cellHeight:30}')
-TABLE_OPTIONS = '{showRowNumber:true,firstRowNumber:" ",sort:"disable"}'
-
-def GetFiles(rev, bench_dir, platform):
- """Reads in bench files of interest into a dictionary.
-
- If bench_dir is not empty, tries to read in local bench files; otherwise check
- Google Storage. Filters files by revision (rev) and platform, and ignores
- non-tile, non-viewport bench files.
- Outputs dictionary [filename] -> [file content].
- """
- file_dic = {}
- if not bench_dir:
- uri = boto.storage_uri(URI_BUCKET, GOOGLE_STORAGE_URI_SCHEME)
- # The boto API does not allow prefix/wildcard matching of Google Storage
- # objects. And Google Storage has a flat structure instead of being
- # organized in directories. Therefore, we have to scan all objects in the
- # Google Storage bucket to find the files we need, which is slow.
- # The option of implementing prefix matching as in gsutil seems to be
- # overkill, but gsutil does not provide an API ready for use. If speed is a
- # big concern, we suggest copying bot bench data from Google Storage using
- # gsutil and use --log_dir for fast local data reading.
- for obj in uri.get_bucket():
- # Filters out files of no interest.
- if (not obj.name.startswith(GOOGLE_STORAGE_OBJECT_NAME_PREFIX) or
- (obj.name.find(TILING_FILE_NAME_INDICATOR) < 0 and
- obj.name.find(VIEWPORT_FILE_NAME_INDICATOR) < 0) or
- obj.name.find(platform) < 0 or
- obj.name.find(BENCH_FILE_PREFIX_TEMPLATE % rev) < 0):
- continue
- file_dic[
- obj.name[obj.name.rfind('/') + 1 : ]] = obj.get_contents_as_string()
- else:
- for f in os.listdir(bench_dir):
- if (not os.path.isfile(os.path.join(bench_dir, f)) or
- (f.find(TILING_FILE_NAME_INDICATOR) < 0 and
- f.find(VIEWPORT_FILE_NAME_INDICATOR) < 0) or
- not f.startswith(BENCH_FILE_PREFIX_TEMPLATE % rev)):
- continue
- file_dic[f] = open(os.path.join(bench_dir, f)).read()
-
- if not file_dic:
- raise Exception('No bench file found in "%s" or Google Storage.' %
- bench_dir)
-
- return file_dic
-
-def GetTileMatrix(layout, tile_size, values, viewport):
- """For the given tile layout and per-tile bench values, returns a matrix of
- bench values with tiles outside the given viewport set to 0.
-
- layout, tile_size and viewport are given in string of format <w>x<h>, where
- <w> is viewport width or number of tile columns, and <h> is viewport height or
- number of tile rows. We truncate tile rows to MAX_TILE_ROWS to adjust for very
- long skp's.
-
- values: per-tile benches ordered row-by-row, starting from the top-left tile.
-
- Returns [sum, matrix] where sum is the total bench tile time that covers the
- viewport, and matrix is used for visualizing the tiles.
- """
- [tile_cols, tile_rows] = [int(i) for i in layout.split('x')]
- [tile_x, tile_y] = [int(i) for i in tile_size.split('x')]
- [viewport_x, viewport_y] = [int(i) for i in viewport.split('x')]
- viewport_cols = int(math.ceil(viewport_x * 1.0 / tile_x))
- viewport_rows = int(math.ceil(viewport_y * 1.0 / tile_y))
- truncated_tile_rows = min(tile_rows, MAX_TILE_ROWS)
-
- viewport_tile_sum = 0
- matrix = [[0 for y in range(tile_cols)] for x in range(truncated_tile_rows)]
- for y in range(min(viewport_cols, tile_cols)):
- for x in range(min(truncated_tile_rows, viewport_rows)):
- matrix[x][y] = values[x * tile_cols + y]
- viewport_tile_sum += values[x * tile_cols + y]
-
- return [viewport_tile_sum, matrix]
-
-def GetTileVisCodes(suffix, matrix):
- """Generates and returns strings of [js_codes, row1, row2] which are codes for
- visualizing the benches from the given tile config and matrix data.
- row1 is used for the first row of heatmaps; row2 is for corresponding tables.
- suffix is only used to avoid name conflicts in the whole html output.
- """
- this_js = 'var data_%s=new google.visualization.DataTable();' % suffix
- for i in range(len(matrix[0])):
- this_js += 'data_%s.addColumn("number","%s");' % (suffix, i)
- this_js += 'data_%s.addRows(%s);' % (suffix, str(matrix))
- # Adds heatmap chart.
- this_js += ('var heat_%s=new org.systemsbiology.visualization' % suffix +
- '.BioHeatMap(document.getElementById("%s"));' % suffix +
- 'heat_%s.draw(data_%s,%s);' % (suffix, suffix, DRAW_OPTIONS))
- # Adds data table chart.
- this_js += ('var table_%s=new google.visualization.Table(document.' % suffix +
- 'getElementById("t%s"));table_%s.draw(data_%s,%s);\n' % (
- suffix, suffix, suffix, TABLE_OPTIONS))
- table_row1 = '<td>%s<div id="%s"></div></td>' % (suffix, suffix)
- table_row2 = '<td><div id="t%s"></div></td>' % suffix
-
- return [this_js, table_row1, table_row2]
-
-def OutputTileAnalysis(rev, representation_alg, bench_dir, platform):
- """Reads skp bench data and outputs tile vs. viewport analysis for the given
- platform.
-
- Ignores data with revisions other than rev. If bench_dir is not empty, read
- from the local directory instead of Google Storage.
- Uses the provided representation_alg for calculating bench representations.
-
- Returns (js_codes, body_codes): strings of js/html codes for stats and
- visualization.
- """
- js_codes = ''
- body_codes = ('}</script></head><body>'
- '<h3>PLATFORM: %s REVISION: %s</h3><br>' % (platform, rev))
- bench_dic = {} # [bench][config] -> [layout, [values]]
- file_dic = GetFiles(rev, bench_dir, platform)
- for f in file_dic:
- for point in bench_util.parse('', file_dic[f].split('\n'),
- representation_alg):
- if point.time_type: # Ignores non-walltime time_type.
- continue
- bench = point.bench.replace('.skp', '')
- config = point.config.replace('simple_', '')
- components = config.split('_')
- if components[0] == 'viewport':
- bench_dic.setdefault(bench, {})[config] = [components[1], [point.time]]
- else: # Stores per-tile benches.
- bench_dic.setdefault(bench, {})[config] = [
- point.tile_layout, point.per_tile_values]
- benches = bench_dic.keys()
- benches.sort()
- for bench in benches:
- body_codes += '<h4>%s</h4><br><table><tr>' % bench
- heat_plots = '' # For table row of heatmap plots.
- table_plots = '' # For table row of data table plots.
- # For bar plot legends and values in URL string.
- legends = ''
- values = ''
- keys = bench_dic[bench].keys()
- keys.sort()
- if not keys[-1].startswith('viewport'): # No viewport to analyze; skip.
- continue
- else:
- # Extracts viewport size, which for all viewport configs is the same.
- viewport = bench_dic[bench][keys[-1]][0]
- for config in keys:
- [layout, value_li] = bench_dic[bench][config]
- if config.startswith('tile_'): # For per-tile data, visualize tiles.
- tile_size = config.split('_')[1]
- if (not re.search(DIMENSIONS_RE, layout) or
- not re.search(DIMENSIONS_RE, tile_size) or
- not re.search(DIMENSIONS_RE, viewport)):
- continue # Skip unrecognized formats.
- [viewport_tile_sum, matrix] = GetTileMatrix(
- layout, tile_size, value_li, viewport)
- values += '%s|' % viewport_tile_sum
- [this_js, row1, row2] = GetTileVisCodes(config + '_' + bench, matrix)
- heat_plots += row1
- table_plots += row2
- js_codes += this_js
- else: # For viewport data, there is only one element in value_li.
- values += '%s|' % sum(value_li)
- legends += '%s:%s|' % (config, sum(value_li))
- body_codes += (heat_plots + '</tr><tr>' + table_plots + '</tr></table>' +
- '<br>' + BAR_CHART_TEMPLATE % (legends[:-1], values[:-1]))
-
- return (js_codes, body_codes)
-
-def main():
- """Parses flags and outputs expected Skia picture bench results."""
- parser = optparse.OptionParser(USAGE_STRING % '%prog' + HELP_STRING)
- parser.add_option(OPTION_PLATFORM_SHORT, OPTION_PLATFORM,
- dest='plat', default=DEFAULT_PLATFORM,
- help='Platform to analyze. Set to DEFAULT_PLATFORM if not given.')
- parser.add_option(OPTION_REVISION_SHORT, OPTION_REVISION,
- dest='rev',
- help='(Mandatory) revision number to analyze.')
- parser.add_option(OPTION_DIR_SHORT, OPTION_DIR,
- dest='log_dir', default='',
- help=('(Optional) local directory where bench log files reside. If left '
- 'empty (by default), will try to read from Google Storage.'))
- parser.add_option(OPTION_REPRESENTATION_ALG_SHORT, OPTION_REPRESENTATION_ALG,
- dest='alg', default=REPRESENTATION_ALG,
- help=('Bench representation algorithm. '
- 'Default to "%s".' % REPRESENTATION_ALG))
- (options, args) = parser.parse_args()
- if not (options.rev and options.rev.isdigit()):
- parser.error('Please provide correct mandatory flag %s' % OPTION_REVISION)
- return
- rev = int(options.rev)
- (js_codes, body_codes) = OutputTileAnalysis(
- rev, options.alg, options.log_dir, options.plat)
- print HTML_PREFIX + js_codes + body_codes + HTML_SUFFIX
-
-
-if '__main__' == __name__:
- main()