Ravi Mistry | 5cc084b | 2019-07-25 13:47:36 -0400 | [diff] [blame] | 1 | #!/usr/bin/python2 |
| 2 | # |
| 3 | # Copyright 2019 Google Inc. |
| 4 | # |
| 5 | # Use of this source code is governed by a BSD-style license that can be |
| 6 | # found in the LICENSE file. |
| 7 | # |
| 8 | # Helper script that takes as input 2 CSVs downloaded from perf.skia.org and |
| 9 | # outputs a CSV with test_name, avg_value1 (from CSV1), avg_value2 (from CSV2), |
| 10 | # perc_diff between avg_value1 and avg_value2. |
| 11 | # This script also discards NUM_OUTLIERS_TO_REMOVE min values and |
| 12 | # NUM_OUTLIERS_TO_REMOVE max values. |
| 13 | |
| 14 | |
| 15 | import csv |
| 16 | import optparse |
| 17 | import sys |
| 18 | import re |
| 19 | |
| 20 | |
| 21 | MISSING_STR = 'N/A' |
| 22 | NUM_OUTLIERS_TO_REMOVE = 2 |
| 23 | |
| 24 | |
| 25 | def read_from_csv(csv_file): |
| 26 | test_to_avg = {} |
| 27 | with open(csv_file, 'rb') as f: |
| 28 | csv_reader = csv.reader(f, delimiter=',') |
| 29 | # First row should contain headers. Validate that it does. |
| 30 | header_row = csv_reader.next() |
| 31 | if header_row[0] != 'id': |
| 32 | raise Exception('%s in unexpected format' % csv_file) |
| 33 | p = re.compile('^.*,test=(.*),$') |
| 34 | for v in csv_reader: |
| 35 | # Extract the test name. |
| 36 | result = p.search(v[0]) |
| 37 | test_name = result.group(1) |
| 38 | |
| 39 | vals = [float(i) for i in v[1:]] |
| 40 | vals.sort() |
| 41 | # Discard outliers. |
| 42 | vals = vals[NUM_OUTLIERS_TO_REMOVE:-NUM_OUTLIERS_TO_REMOVE] |
| 43 | # Find the avg val. |
| 44 | avg_val = reduce(lambda x, y: x+y, vals) / float(len(vals)) |
| 45 | test_to_avg[test_name] = avg_val |
| 46 | return test_to_avg |
| 47 | |
| 48 | |
| 49 | def combine_results(d1, d2): |
| 50 | test_to_result = {} |
| 51 | for test1, v1 in d1.items(): |
| 52 | v2 = d2.get(test1, MISSING_STR) |
| 53 | perc_diff = MISSING_STR |
| 54 | if v2 != MISSING_STR: |
| 55 | diff = v2 - v1 |
| 56 | avg = (v2 + v1)/2 |
| 57 | perc_diff = 0 if avg == 0 else diff/avg * 100 |
| 58 | result = { |
| 59 | 'test_name': test1, |
| 60 | 'csv1': v1, |
| 61 | 'csv2': v2, |
| 62 | 'perc_diff': perc_diff, |
| 63 | } |
| 64 | test_to_result[test1] = result |
| 65 | |
| 66 | # Also add keys in d2 and not d1. |
| 67 | for test2, v2 in d2.items(): |
| 68 | if test2 in test_to_result: |
| 69 | continue |
| 70 | test_to_result[test2] = { |
| 71 | 'test_name': test2, |
| 72 | 'csv1': MISSING_STR, |
| 73 | 'csv2': v2, |
| 74 | 'perc_diff': MISSING_STR, |
| 75 | } |
| 76 | |
| 77 | return test_to_result |
| 78 | |
| 79 | |
| 80 | def write_to_csv(output_dict, output_csv): |
| 81 | with open(output_csv, 'w') as f: |
| 82 | fieldnames = ['test_name', 'csv1', 'csv2', 'perc_diff'] |
| 83 | writer = csv.DictWriter(f, fieldnames=fieldnames) |
| 84 | writer.writeheader() |
| 85 | tests = output_dict.keys() |
| 86 | tests.sort() |
| 87 | for test in tests: |
| 88 | writer.writerow(output_dict[test]) |
| 89 | |
| 90 | |
| 91 | def parse_and_output(csv1, csv2, output_csv): |
| 92 | test_to_avg1 = read_from_csv(csv1) |
| 93 | test_to_avg2 = read_from_csv(csv2) |
| 94 | output_dict = combine_results(test_to_avg1, test_to_avg2) |
| 95 | write_to_csv(output_dict, output_csv) |
| 96 | |
| 97 | |
| 98 | def main(): |
| 99 | option_parser = optparse.OptionParser() |
| 100 | option_parser.add_option( |
| 101 | '', '--csv1', type=str, |
| 102 | help='The first CSV to parse.') |
| 103 | option_parser.add_option( |
| 104 | '', '--csv2', type=str, |
| 105 | help='The second CSV to parse.') |
| 106 | option_parser.add_option( |
| 107 | '', '--output_csv', type=str, |
| 108 | help='The file to write the output CSV to.') |
| 109 | options, _ = option_parser.parse_args() |
| 110 | sys.exit(parse_and_output(options.csv1, options.csv2, options.output_csv)) |
| 111 | |
| 112 | |
| 113 | if __name__ == '__main__': |
| 114 | main() |