Blame - tools/parse_llvm_coverage.py - platform/external/skia

blob: 5569fadac98754e0c30100b6ac3506f37f077791 [file] [log] [blame]

borenet	a6ae14e	2015-07-20 09:43:36 -0700	[diff] [blame]	1	#!/usr/bin/env python
				2	# Copyright (c) 2015 The Chromium Authors. All rights reserved.
				3	# Use of this source code is governed by a BSD-style license that can be
				4	# found in the LICENSE file.
				5
				6
				7	"""Parse an LLVM coverage report to generate useable results."""
				8
				9
				10	import argparse
				11	import json
				12	import os
				13	import re
				14	import subprocess
				15	import sys
				16
				17
				18	def _fix_filename(filename):
				19	"""Return a filename which we can use to identify the file.
				20
				21	The file paths printed by llvm-cov take the form:
				22
				23	/path/to/repo/out/dir/../../src/filename.cpp
				24
				25	And then they're truncated to 22 characters with leading ellipses:
				26
				27	...../../src/filename.cpp
				28
				29	This makes it really tough to determine whether the file actually belongs in
				30	the Skia repo. This function strips out the leading junk so that, if the file
				31	exists in the repo, the returned string matches the end of some relative path
				32	in the repo. This doesn't guarantee correctness, but it's about as close as
				33	we can get.
				34	"""
				35	return filename.split('..')[-1].lstrip('./')
				36
				37
				38	def _file_in_repo(filename, all_files):
				39	"""Return the name of the checked-in file matching the given filename.
				40
				41	Use suffix matching to determine which checked-in files the given filename
				42	matches. If there are no matches or multiple matches, return None.
				43	"""
				44	new_file = _fix_filename(filename)
				45	matched = []
				46	for f in all_files:
				47	if f.endswith(new_file):
				48	matched.append(f)
				49	if len(matched) == 1:
				50	return matched[0]
				51	elif len(matched) > 1:
				52	print >> sys.stderr, ('WARNING: multiple matches for %s; skipping:\n\t%s'
				53	% (new_file, '\n\t'.join(matched)))
				54	return None
				55
				56
				57	def _get_per_file_per_line_coverage(report):
				58	"""Return a dict whose keys are file names and values are coverage data.
				59
				60	Values are lists which take the form (lineno, coverage, code).
				61	"""
rmistry	5f80e8c	2016-04-18 04:18:56 -0700	[diff] [blame]	62	all_files = []
				63	for root, dirs, files in os.walk(os.getcwd()):
				64	if 'third_party/externals' in root:
				65	continue
				66	files = [f for f in files if not (f[0] == '.' or f.endswith('.pyc'))]
				67	dirs[:] = [d for d in dirs if not d[0] == '.']
				68	for name in files:
				69	all_files.append(os.path.join(root[(len(os.getcwd()) + 1):], name))
				70	all_files.sort()
				71
borenet	a6ae14e	2015-07-20 09:43:36 -0700	[diff] [blame]	72	lines = report.splitlines()
				73	current_file = None
				74	file_lines = []
				75	files = {}
				76	not_checked_in = '%' # Use this as the file name for not-checked-in files.
				77	for line in lines:
				78	m = re.match('([a-zA-Z0-9\./_-]+):', line)
				79	if m:
				80	if current_file and current_file != not_checked_in:
				81	files[current_file] = file_lines
				82	match_filename = _file_in_repo(m.groups()[0], all_files)
				83	current_file = match_filename or not_checked_in
				84	file_lines = []
				85	else:
				86	if current_file != not_checked_in:
				87	skip = re.match('^\s{2}-+$\|^\s{2}\\|.+$', line)
				88	if line and not skip:
				89	cov, linenum, code = line.split('\|', 2)
				90	cov = cov.strip()
				91	if cov:
				92	cov = int(cov)
				93	else:
				94	cov = None # We don't care about coverage for this line.
				95	linenum = int(linenum.strip())
				96	assert linenum == len(file_lines) + 1
				97	file_lines.append((linenum, cov, code.decode('utf-8', 'replace')))
				98	return files
				99
				100
				101
				102	def _testname(filename):
				103	"""Transform the file name into an ingestible test name."""
				104	return re.sub(r'[^a-zA-Z0-9]', '_', filename)
				105
				106
				107	def _nanobench_json(results, properties, key):
				108	"""Return the results in JSON format like that produced by nanobench."""
				109	rv = {}
				110	# Copy over the properties first, then set the 'key' and 'results' keys,
				111	# in order to avoid bad formatting in case the user passes in a properties
				112	# dict containing those keys.
				113	rv.update(properties)
				114	rv['key'] = key
				115	rv['results'] = {
				116	_testname(f): {
				117	'coverage': {
				118	'percent': percent,
borenet	4cb3003	2015-07-22 08:19:25 -0700	[diff] [blame]	119	'lines_not_covered': not_covered_lines,
borenet	a6ae14e	2015-07-20 09:43:36 -0700	[diff] [blame]	120	'options': {
				121	'fullname': f,
				122	'dir': os.path.dirname(f),
borenet	4cb3003	2015-07-22 08:19:25 -0700	[diff] [blame]	123	'source_type': 'coverage',
borenet	a6ae14e	2015-07-20 09:43:36 -0700	[diff] [blame]	124	},
				125	},
borenet	4cb3003	2015-07-22 08:19:25 -0700	[diff] [blame]	126	} for percent, not_covered_lines, f in results
borenet	a6ae14e	2015-07-20 09:43:36 -0700	[diff] [blame]	127	}
				128	return rv
				129
				130
				131	def _parse_key_value(kv_list):
				132	"""Return a dict whose key/value pairs are derived from the given list.
				133
				134	For example:
				135
				136	['k1', 'v1', 'k2', 'v2']
				137	becomes:
				138
				139	{'k1': 'v1',
				140	'k2': 'v2'}
				141	"""
				142	if len(kv_list) % 2 != 0:
				143	raise Exception('Invalid key/value pairs: %s' % kv_list)
				144
				145	rv = {}
				146	for i in xrange(len(kv_list) / 2):
				147	rv[kv_list[i2]] = kv_list[i2+1]
				148	return rv
				149
				150
				151	def _get_per_file_summaries(line_by_line):
				152	"""Summarize the full line-by-line coverage report by file."""
				153	per_file = []
				154	for filepath, lines in line_by_line.iteritems():
				155	total_lines = 0
				156	covered_lines = 0
				157	for _, cov, _ in lines:
				158	if cov is not None:
				159	total_lines += 1
				160	if cov > 0:
				161	covered_lines += 1
				162	if total_lines > 0:
				163	per_file.append((float(covered_lines)/float(total_lines)*100.0,
borenet	4cb3003	2015-07-22 08:19:25 -0700	[diff] [blame]	164	total_lines - covered_lines,
borenet	a6ae14e	2015-07-20 09:43:36 -0700	[diff] [blame]	165	filepath))
				166	return per_file
				167
				168
				169	def main():
				170	"""Generate useful data from a coverage report."""
				171	# Parse args.
				172	parser = argparse.ArgumentParser()
				173	parser.add_argument('--report', help='input file; an llvm coverage report.',
				174	required=True)
				175	parser.add_argument('--nanobench', help='output file for nanobench data.')
				176	parser.add_argument(
				177	'--key', metavar='key_or_value', nargs='+',
				178	help='key/value pairs identifying this bot.')
				179	parser.add_argument(
				180	'--properties', metavar='key_or_value', nargs='+',
				181	help='key/value pairs representing properties of this build.')
				182	parser.add_argument('--linebyline',
				183	help='output file for line-by-line JSON data.')
				184	args = parser.parse_args()
				185
				186	if args.nanobench and not (args.key and args.properties):
				187	raise Exception('--key and --properties are required with --nanobench')
				188
				189	with open(args.report) as f:
				190	report = f.read()
				191
				192	line_by_line = _get_per_file_per_line_coverage(report)
				193
				194	if args.linebyline:
				195	with open(args.linebyline, 'w') as f:
				196	json.dump(line_by_line, f)
				197
				198	if args.nanobench:
				199	# Parse the key and properties for use in the nanobench JSON output.
				200	key = _parse_key_value(args.key)
				201	properties = _parse_key_value(args.properties)
				202
				203	# Get per-file summaries.
				204	per_file = _get_per_file_summaries(line_by_line)
				205
				206	# Write results.
				207	format_results = _nanobench_json(per_file, properties, key)
				208	with open(args.nanobench, 'w') as f:
				209	json.dump(format_results, f)
				210
				211
				212	if __name__ == '__main__':
				213	main()