borenet | a6ae14e | 2015-07-20 09:43:36 -0700 | [diff] [blame] | 1 | #!/usr/bin/env python |
| 2 | # Copyright (c) 2015 The Chromium Authors. All rights reserved. |
| 3 | # Use of this source code is governed by a BSD-style license that can be |
| 4 | # found in the LICENSE file. |
| 5 | |
| 6 | |
| 7 | """Parse an LLVM coverage report to generate useable results.""" |
| 8 | |
| 9 | |
| 10 | import argparse |
| 11 | import json |
| 12 | import os |
| 13 | import re |
| 14 | import subprocess |
| 15 | import sys |
| 16 | |
| 17 | |
| 18 | def _fix_filename(filename): |
| 19 | """Return a filename which we can use to identify the file. |
| 20 | |
| 21 | The file paths printed by llvm-cov take the form: |
| 22 | |
| 23 | /path/to/repo/out/dir/../../src/filename.cpp |
| 24 | |
| 25 | And then they're truncated to 22 characters with leading ellipses: |
| 26 | |
| 27 | ...../../src/filename.cpp |
| 28 | |
| 29 | This makes it really tough to determine whether the file actually belongs in |
| 30 | the Skia repo. This function strips out the leading junk so that, if the file |
| 31 | exists in the repo, the returned string matches the end of some relative path |
| 32 | in the repo. This doesn't guarantee correctness, but it's about as close as |
| 33 | we can get. |
| 34 | """ |
| 35 | return filename.split('..')[-1].lstrip('./') |
| 36 | |
| 37 | |
| 38 | def _file_in_repo(filename, all_files): |
| 39 | """Return the name of the checked-in file matching the given filename. |
| 40 | |
| 41 | Use suffix matching to determine which checked-in files the given filename |
| 42 | matches. If there are no matches or multiple matches, return None. |
| 43 | """ |
| 44 | new_file = _fix_filename(filename) |
| 45 | matched = [] |
| 46 | for f in all_files: |
| 47 | if f.endswith(new_file): |
| 48 | matched.append(f) |
| 49 | if len(matched) == 1: |
| 50 | return matched[0] |
| 51 | elif len(matched) > 1: |
| 52 | print >> sys.stderr, ('WARNING: multiple matches for %s; skipping:\n\t%s' |
| 53 | % (new_file, '\n\t'.join(matched))) |
| 54 | return None |
| 55 | |
| 56 | |
| 57 | def _get_per_file_per_line_coverage(report): |
| 58 | """Return a dict whose keys are file names and values are coverage data. |
| 59 | |
| 60 | Values are lists which take the form (lineno, coverage, code). |
| 61 | """ |
rmistry | 5f80e8c | 2016-04-18 04:18:56 -0700 | [diff] [blame] | 62 | all_files = [] |
| 63 | for root, dirs, files in os.walk(os.getcwd()): |
| 64 | if 'third_party/externals' in root: |
| 65 | continue |
| 66 | files = [f for f in files if not (f[0] == '.' or f.endswith('.pyc'))] |
| 67 | dirs[:] = [d for d in dirs if not d[0] == '.'] |
| 68 | for name in files: |
| 69 | all_files.append(os.path.join(root[(len(os.getcwd()) + 1):], name)) |
| 70 | all_files.sort() |
| 71 | |
borenet | a6ae14e | 2015-07-20 09:43:36 -0700 | [diff] [blame] | 72 | lines = report.splitlines() |
| 73 | current_file = None |
| 74 | file_lines = [] |
| 75 | files = {} |
| 76 | not_checked_in = '%' # Use this as the file name for not-checked-in files. |
| 77 | for line in lines: |
| 78 | m = re.match('([a-zA-Z0-9\./_-]+):', line) |
| 79 | if m: |
| 80 | if current_file and current_file != not_checked_in: |
| 81 | files[current_file] = file_lines |
| 82 | match_filename = _file_in_repo(m.groups()[0], all_files) |
| 83 | current_file = match_filename or not_checked_in |
| 84 | file_lines = [] |
| 85 | else: |
| 86 | if current_file != not_checked_in: |
| 87 | skip = re.match('^\s{2}-+$|^\s{2}\|.+$', line) |
| 88 | if line and not skip: |
| 89 | cov, linenum, code = line.split('|', 2) |
| 90 | cov = cov.strip() |
| 91 | if cov: |
| 92 | cov = int(cov) |
| 93 | else: |
| 94 | cov = None # We don't care about coverage for this line. |
| 95 | linenum = int(linenum.strip()) |
| 96 | assert linenum == len(file_lines) + 1 |
| 97 | file_lines.append((linenum, cov, code.decode('utf-8', 'replace'))) |
| 98 | return files |
| 99 | |
| 100 | |
| 101 | |
| 102 | def _testname(filename): |
| 103 | """Transform the file name into an ingestible test name.""" |
| 104 | return re.sub(r'[^a-zA-Z0-9]', '_', filename) |
| 105 | |
| 106 | |
| 107 | def _nanobench_json(results, properties, key): |
| 108 | """Return the results in JSON format like that produced by nanobench.""" |
| 109 | rv = {} |
| 110 | # Copy over the properties first, then set the 'key' and 'results' keys, |
| 111 | # in order to avoid bad formatting in case the user passes in a properties |
| 112 | # dict containing those keys. |
| 113 | rv.update(properties) |
| 114 | rv['key'] = key |
| 115 | rv['results'] = { |
| 116 | _testname(f): { |
| 117 | 'coverage': { |
| 118 | 'percent': percent, |
borenet | 4cb3003 | 2015-07-22 08:19:25 -0700 | [diff] [blame] | 119 | 'lines_not_covered': not_covered_lines, |
borenet | a6ae14e | 2015-07-20 09:43:36 -0700 | [diff] [blame] | 120 | 'options': { |
| 121 | 'fullname': f, |
| 122 | 'dir': os.path.dirname(f), |
borenet | 4cb3003 | 2015-07-22 08:19:25 -0700 | [diff] [blame] | 123 | 'source_type': 'coverage', |
borenet | a6ae14e | 2015-07-20 09:43:36 -0700 | [diff] [blame] | 124 | }, |
| 125 | }, |
borenet | 4cb3003 | 2015-07-22 08:19:25 -0700 | [diff] [blame] | 126 | } for percent, not_covered_lines, f in results |
borenet | a6ae14e | 2015-07-20 09:43:36 -0700 | [diff] [blame] | 127 | } |
| 128 | return rv |
| 129 | |
| 130 | |
| 131 | def _parse_key_value(kv_list): |
| 132 | """Return a dict whose key/value pairs are derived from the given list. |
| 133 | |
| 134 | For example: |
| 135 | |
| 136 | ['k1', 'v1', 'k2', 'v2'] |
| 137 | becomes: |
| 138 | |
| 139 | {'k1': 'v1', |
| 140 | 'k2': 'v2'} |
| 141 | """ |
| 142 | if len(kv_list) % 2 != 0: |
| 143 | raise Exception('Invalid key/value pairs: %s' % kv_list) |
| 144 | |
| 145 | rv = {} |
| 146 | for i in xrange(len(kv_list) / 2): |
| 147 | rv[kv_list[i*2]] = kv_list[i*2+1] |
| 148 | return rv |
| 149 | |
| 150 | |
| 151 | def _get_per_file_summaries(line_by_line): |
| 152 | """Summarize the full line-by-line coverage report by file.""" |
| 153 | per_file = [] |
| 154 | for filepath, lines in line_by_line.iteritems(): |
| 155 | total_lines = 0 |
| 156 | covered_lines = 0 |
| 157 | for _, cov, _ in lines: |
| 158 | if cov is not None: |
| 159 | total_lines += 1 |
| 160 | if cov > 0: |
| 161 | covered_lines += 1 |
| 162 | if total_lines > 0: |
| 163 | per_file.append((float(covered_lines)/float(total_lines)*100.0, |
borenet | 4cb3003 | 2015-07-22 08:19:25 -0700 | [diff] [blame] | 164 | total_lines - covered_lines, |
borenet | a6ae14e | 2015-07-20 09:43:36 -0700 | [diff] [blame] | 165 | filepath)) |
| 166 | return per_file |
| 167 | |
| 168 | |
| 169 | def main(): |
| 170 | """Generate useful data from a coverage report.""" |
| 171 | # Parse args. |
| 172 | parser = argparse.ArgumentParser() |
| 173 | parser.add_argument('--report', help='input file; an llvm coverage report.', |
| 174 | required=True) |
| 175 | parser.add_argument('--nanobench', help='output file for nanobench data.') |
| 176 | parser.add_argument( |
| 177 | '--key', metavar='key_or_value', nargs='+', |
| 178 | help='key/value pairs identifying this bot.') |
| 179 | parser.add_argument( |
| 180 | '--properties', metavar='key_or_value', nargs='+', |
| 181 | help='key/value pairs representing properties of this build.') |
| 182 | parser.add_argument('--linebyline', |
| 183 | help='output file for line-by-line JSON data.') |
| 184 | args = parser.parse_args() |
| 185 | |
| 186 | if args.nanobench and not (args.key and args.properties): |
| 187 | raise Exception('--key and --properties are required with --nanobench') |
| 188 | |
| 189 | with open(args.report) as f: |
| 190 | report = f.read() |
| 191 | |
| 192 | line_by_line = _get_per_file_per_line_coverage(report) |
| 193 | |
| 194 | if args.linebyline: |
| 195 | with open(args.linebyline, 'w') as f: |
| 196 | json.dump(line_by_line, f) |
| 197 | |
| 198 | if args.nanobench: |
| 199 | # Parse the key and properties for use in the nanobench JSON output. |
| 200 | key = _parse_key_value(args.key) |
| 201 | properties = _parse_key_value(args.properties) |
| 202 | |
| 203 | # Get per-file summaries. |
| 204 | per_file = _get_per_file_summaries(line_by_line) |
| 205 | |
| 206 | # Write results. |
| 207 | format_results = _nanobench_json(per_file, properties, key) |
| 208 | with open(args.nanobench, 'w') as f: |
| 209 | json.dump(format_results, f) |
| 210 | |
| 211 | |
| 212 | if __name__ == '__main__': |
| 213 | main() |