Blame - build/android/emma_coverage_stats.py - fp2-dev/platform/external/v8

blob: 20ec8eae46877fbefe5b6f6f74eebcf28a7f1608 [file] [log] [blame]

Ben Murdoch	097c5b2	2016-05-18 11:27:45 +0100	[diff] [blame]	1	#!/usr/bin/python
				2	# Copyright 2015 The Chromium Authors. All rights reserved.
				3	# Use of this source code is governed by a BSD-style license that can be
				4	# found in the LICENSE file.
				5
				6	"""Generates incremental code coverage reports for Java code in Chromium.
				7
				8	Usage:
				9
				10	build/android/emma_coverage_stats.py -v --out <output file path> --emma-dir
				11	<EMMA file directory> --lines-for-coverage-file
				12	<path to file containing lines for coverage>
				13
				14	Creates a JSON representation of the overall and file coverage stats and saves
				15	this information to the specified output file.
				16	"""
				17
				18	import argparse
				19	import collections
				20	import json
				21	import logging
				22	import os
				23	import re
				24	import sys
				25	from xml.etree import ElementTree
				26
				27	import devil_chromium
				28	from devil.utils import run_tests_helper
				29
				30	NOT_EXECUTABLE = -1
				31	NOT_COVERED = 0
				32	COVERED = 1
				33	PARTIALLY_COVERED = 2
				34
				35	# Coverage information about a single line of code.
				36	LineCoverage = collections.namedtuple(
				37	'LineCoverage',
				38	['lineno', 'source', 'covered_status', 'fractional_line_coverage'])
				39
				40
				41	class _EmmaHtmlParser(object):
				42	"""Encapsulates HTML file parsing operations.
				43
				44	This class contains all operations related to parsing HTML files that were
				45	produced using the EMMA code coverage tool.
				46
				47	Example HTML:
				48
				49	Package links:
				50	<a href="_files/1.html">org.chromium.chrome</a>
				51	This is returned by the selector \|XPATH_SELECT_PACKAGE_ELEMENTS\|.
				52
				53	Class links:
				54	<a href="1e.html">DoActivity.java</a>
				55	This is returned by the selector \|XPATH_SELECT_CLASS_ELEMENTS\|.
				56
				57	Line coverage data:
				58	<tr class="p">
				59	<td class="l" title="78% line coverage (7 out of 9)">108</td>
				60	<td title="78% line coverage (7 out of 9 instructions)">
				61	if (index < 0 \|\| index = mSelectors.size()) index = 0;</td>
				62	</tr>
				63	<tr>
				64	<td class="l">109</td>
				65	<td> </td>
				66	</tr>
				67	<tr class="c">
				68	<td class="l">110</td>
				69	<td> if (mSelectors.get(index) != null) {</td>
				70	</tr>
				71	<tr class="z">
				72	<td class="l">111</td>
				73	<td> for (int i = 0; i < mSelectors.size(); i++) {</td>
				74	</tr>
				75	Each <tr> element is returned by the selector \|XPATH_SELECT_LOC\|.
				76
				77	We can parse this to get:
				78	1. Line number
				79	2. Line of source code
				80	3. Coverage status (c, z, or p)
				81	4. Fractional coverage value (% out of 100 if PARTIALLY_COVERED)
				82	"""
				83	# Selector to match all <a> elements within the rows that are in the table
				84	# that displays all of the different packages.
				85	_XPATH_SELECT_PACKAGE_ELEMENTS = './/BODY/TABLE[4]/TR/TD/A'
				86
				87	# Selector to match all <a> elements within the rows that are in the table
				88	# that displays all of the different classes within a package.
				89	_XPATH_SELECT_CLASS_ELEMENTS = './/BODY/TABLE[3]/TR/TD/A'
				90
				91	# Selector to match all <tr> elements within the table containing Java source
				92	# code in an EMMA HTML file.
				93	_XPATH_SELECT_LOC = './/BODY/TABLE[4]/TR'
				94
				95	# Children of HTML elements are represented as a list in ElementTree. These
				96	# constants represent list indices corresponding to relevant child elements.
				97
				98	# Child 1 contains percentage covered for a line.
				99	_ELEMENT_PERCENT_COVERED = 1
				100
				101	# Child 1 contains the original line of source code.
				102	_ELEMENT_CONTAINING_SOURCE_CODE = 1
				103
				104	# Child 0 contains the line number.
				105	_ELEMENT_CONTAINING_LINENO = 0
				106
				107	# Maps CSS class names to corresponding coverage constants.
				108	_CSS_TO_STATUS = {'c': COVERED, 'p': PARTIALLY_COVERED, 'z': NOT_COVERED}
				109
				110	# UTF-8 no break space.
				111	_NO_BREAK_SPACE = '\xc2\xa0'
				112
				113	def __init__(self, emma_file_base_dir):
				114	"""Initializes _EmmaHtmlParser.
				115
				116	Args:
				117	emma_file_base_dir: Path to the location where EMMA report files are
				118	stored. Should be where index.html is stored.
				119	"""
				120	self._base_dir = emma_file_base_dir
				121	self._emma_files_path = os.path.join(self._base_dir, '_files')
				122	self._index_path = os.path.join(self._base_dir, 'index.html')
				123
				124	def GetLineCoverage(self, emma_file_path):
				125	"""Returns a list of LineCoverage objects for the given EMMA HTML file.
				126
				127	Args:
				128	emma_file_path: String representing the path to the EMMA HTML file.
				129
				130	Returns:
				131	A list of LineCoverage objects.
				132	"""
				133	line_tr_elements = self._FindElements(
				134	emma_file_path, self._XPATH_SELECT_LOC)
				135	line_coverage = []
				136	for tr in line_tr_elements:
				137	# Get the coverage status.
				138	coverage_status = self._CSS_TO_STATUS.get(tr.get('CLASS'), NOT_EXECUTABLE)
				139	# Get the fractional coverage value.
				140	if coverage_status == PARTIALLY_COVERED:
				141	title_attribute = (tr[self._ELEMENT_PERCENT_COVERED].get('TITLE'))
				142	# Parse string that contains percent covered: "83% line coverage ...".
				143	percent_covered = title_attribute.split('%')[0]
				144	fractional_coverage = int(percent_covered) / 100.0
				145	else:
				146	fractional_coverage = 1.0
				147
				148	# Get the line number.
				149	lineno_element = tr[self._ELEMENT_CONTAINING_LINENO]
				150	# Handles oddly formatted HTML (where there is an extra <a> tag).
				151	lineno = int(lineno_element.text or
				152	lineno_element[self._ELEMENT_CONTAINING_LINENO].text)
				153	# Get the original line of Java source code.
				154	raw_source = tr[self._ELEMENT_CONTAINING_SOURCE_CODE].text
				155	utf8_source = raw_source.encode('UTF-8')
				156	source = utf8_source.replace(self._NO_BREAK_SPACE, ' ')
				157
				158	line = LineCoverage(lineno, source, coverage_status, fractional_coverage)
				159	line_coverage.append(line)
				160
				161	return line_coverage
				162
				163	def GetPackageNameToEmmaFileDict(self):
				164	"""Returns a dict mapping Java packages to EMMA HTML coverage files.
				165
				166	Parses the EMMA index.html file to get a list of packages, then parses each
				167	package HTML file to get a list of classes for that package, and creates
				168	a dict with this info.
				169
				170	Returns:
				171	A dict mapping string representation of Java packages (with class
				172	names appended) to the corresponding file paths of EMMA HTML files.
				173	"""
				174	# These <a> elements contain each package name and the path of the file
				175	# where all classes within said package are listed.
				176	package_link_elements = self._FindElements(
				177	self._index_path, self._XPATH_SELECT_PACKAGE_ELEMENTS)
				178	# Maps file path of package directory (EMMA generated) to package name.
				179	# Example: emma_dir/f.html: org.chromium.chrome.
				180	package_links = {
				181	os.path.join(self._base_dir, link.attrib['HREF']): link.text
				182	for link in package_link_elements if 'HREF' in link.attrib
				183	}
				184
				185	package_to_emma = {}
				186	for package_emma_file_path, package_name in package_links.iteritems():
				187	# These <a> elements contain each class name in the current package and
				188	# the path of the file where the coverage info is stored for each class.
				189	coverage_file_link_elements = self._FindElements(
				190	package_emma_file_path, self._XPATH_SELECT_CLASS_ELEMENTS)
				191
				192	for class_name_element in coverage_file_link_elements:
				193	emma_coverage_file_path = os.path.join(
				194	self._emma_files_path, class_name_element.attrib['HREF'])
				195	full_package_name = '%s.%s' % (package_name, class_name_element.text)
				196	package_to_emma[full_package_name] = emma_coverage_file_path
				197
				198	return package_to_emma
				199
				200	# pylint: disable=no-self-use
				201	def _FindElements(self, file_path, xpath_selector):
				202	"""Reads a HTML file and performs an XPath match.
				203
				204	Args:
				205	file_path: String representing the path to the HTML file.
				206	xpath_selector: String representing xpath search pattern.
				207
				208	Returns:
				209	A list of ElementTree.Elements matching the given XPath selector.
				210	Returns an empty list if there is no match.
				211	"""
				212	with open(file_path) as f:
				213	file_contents = f.read().decode('ISO-8859-1').encode('UTF-8')
				214	root = ElementTree.fromstring(file_contents)
				215	return root.findall(xpath_selector)
				216
				217
				218	class _EmmaCoverageStats(object):
				219	"""Computes code coverage stats for Java code using the coverage tool EMMA.
				220
				221	This class provides an API that allows users to capture absolute code coverage
				222	and code coverage on a subset of lines for each Java source file. Coverage
				223	reports are generated in JSON format.
				224	"""
				225	# Regular expression to get package name from Java package statement.
				226	RE_PACKAGE_MATCH_GROUP = 'package'
				227	RE_PACKAGE = re.compile(r'package (?P<%s>[\w.]*);' % RE_PACKAGE_MATCH_GROUP)
				228
				229	def __init__(self, emma_file_base_dir, files_for_coverage):
				230	"""Initialize _EmmaCoverageStats.
				231
				232	Args:
				233	emma_file_base_dir: String representing the path to the base directory
				234	where EMMA HTML coverage files are stored, i.e. parent of index.html.
				235	files_for_coverage: A list of Java source code file paths to get EMMA
				236	coverage for.
				237	"""
				238	self._emma_parser = _EmmaHtmlParser(emma_file_base_dir)
				239	self._source_to_emma = self._GetSourceFileToEmmaFileDict(files_for_coverage)
				240
				241	def GetCoverageDict(self, lines_for_coverage):
				242	"""Returns a dict containing detailed coverage information.
				243
				244	Gets detailed coverage stats for each file specified in the
				245	\|lines_for_coverage\| dict and the total incremental number of lines covered
				246	and executable for all files in \|lines_for_coverage\|.
				247
				248	Args:
				249	lines_for_coverage: A dict mapping Java source file paths to lists of line
				250	numbers.
				251
				252	Returns:
				253	A dict containing coverage stats for the given dict of files and lines.
				254	Contains absolute coverage stats for each file, coverage stats for each
				255	file's lines specified in \|lines_for_coverage\|, line by line coverage
				256	for each file, and overall coverage stats for the lines specified in
				257	\|lines_for_coverage\|.
				258	"""
				259	file_coverage = {}
				260	for file_path, line_numbers in lines_for_coverage.iteritems():
				261	file_coverage_dict = self.GetCoverageDictForFile(file_path, line_numbers)
				262	if file_coverage_dict:
				263	file_coverage[file_path] = file_coverage_dict
				264	else:
				265	logging.warning(
				266	'No code coverage data for %s, skipping.', file_path)
				267
				268	covered_statuses = [s['incremental'] for s in file_coverage.itervalues()]
				269	num_covered_lines = sum(s['covered'] for s in covered_statuses)
				270	num_total_lines = sum(s['total'] for s in covered_statuses)
				271	return {
				272	'files': file_coverage,
				273	'patch': {
				274	'incremental': {
				275	'covered': num_covered_lines,
				276	'total': num_total_lines
				277	}
				278	}
				279	}
				280
				281	def GetCoverageDictForFile(self, file_path, line_numbers):
				282	"""Returns a dict containing detailed coverage info for the given file.
				283
				284	Args:
				285	file_path: The path to the Java source file that we want to create the
				286	coverage dict for.
				287	line_numbers: A list of integer line numbers to retrieve additional stats
				288	for.
				289
				290	Returns:
				291	A dict containing absolute, incremental, and line by line coverage for
				292	a file.
				293	"""
				294	if file_path not in self._source_to_emma:
				295	return None
				296	emma_file = self._source_to_emma[file_path]
				297	total_line_coverage = self._emma_parser.GetLineCoverage(emma_file)
				298	incremental_line_coverage = [line for line in total_line_coverage
				299	if line.lineno in line_numbers]
				300	line_by_line_coverage = [
				301	{
				302	'line': line.source,
				303	'coverage': line.covered_status,
				304	'changed': line.lineno in line_numbers,
				305	'fractional_coverage': line.fractional_line_coverage,
				306	}
				307	for line in total_line_coverage
				308	]
				309	total_covered_lines, total_lines = (
				310	self.GetSummaryStatsForLines(total_line_coverage))
				311	incremental_covered_lines, incremental_total_lines = (
				312	self.GetSummaryStatsForLines(incremental_line_coverage))
				313
				314	file_coverage_stats = {
				315	'absolute': {
				316	'covered': total_covered_lines,
				317	'total': total_lines
				318	},
				319	'incremental': {
				320	'covered': incremental_covered_lines,
				321	'total': incremental_total_lines
				322	},
				323	'source': line_by_line_coverage,
				324	}
				325	return file_coverage_stats
				326
				327	# pylint: disable=no-self-use
				328	def GetSummaryStatsForLines(self, line_coverage):
				329	"""Gets summary stats for a given list of LineCoverage objects.
				330
				331	Args:
				332	line_coverage: A list of LineCoverage objects.
				333
				334	Returns:
				335	A tuple containing the number of lines that are covered and the total
				336	number of lines that are executable, respectively
				337	"""
				338	partially_covered_sum = 0
				339	covered_status_totals = {COVERED: 0, NOT_COVERED: 0, PARTIALLY_COVERED: 0}
				340	for line in line_coverage:
				341	status = line.covered_status
				342	if status == NOT_EXECUTABLE:
				343	continue
				344	covered_status_totals[status] += 1
				345	if status == PARTIALLY_COVERED:
				346	partially_covered_sum += line.fractional_line_coverage
				347
				348	total_covered = covered_status_totals[COVERED] + partially_covered_sum
				349	total_lines = sum(covered_status_totals.values())
				350	return total_covered, total_lines
				351
				352	def _GetSourceFileToEmmaFileDict(self, files):
				353	"""Gets a dict used to correlate Java source files with EMMA HTML files.
				354
				355	This method gathers the information needed to correlate EMMA HTML
				356	files with Java source files. EMMA XML and plain text reports do not provide
				357	line by line coverage data, so HTML reports must be used instead.
				358	Unfortunately, the HTML files that are created are given garbage names
				359	(i.e 1.html) so we need to manually correlate EMMA HTML files
				360	with the original Java source files.
				361
				362	Args:
				363	files: A list of file names for which coverage information is desired.
				364
				365	Returns:
				366	A dict mapping Java source file paths to EMMA HTML file paths.
				367	"""
				368	# Maps Java source file paths to package names.
				369	# Example: /usr/code/file.java -> org.chromium.file.java.
				370	source_to_package = {}
				371	for file_path in files:
				372	package = self.GetPackageNameFromFile(file_path)
				373	if package:
				374	source_to_package[file_path] = package
				375	else:
				376	logging.warning("Skipping %s because it doesn\'t have a package "
				377	"statement.", file_path)
				378
				379	# Maps package names to EMMA report HTML files.
				380	# Example: org.chromium.file.java -> out/coverage/1a.html.
				381	package_to_emma = self._emma_parser.GetPackageNameToEmmaFileDict()
				382	# Finally, we have a dict mapping Java file paths to EMMA report files.
				383	# Example: /usr/code/file.java -> out/coverage/1a.html.
				384	source_to_emma = {source: package_to_emma[package]
				385	for source, package in source_to_package.iteritems()
				386	if package in package_to_emma}
				387	return source_to_emma
				388
				389	@staticmethod
				390	def NeedsCoverage(file_path):
				391	"""Checks to see if the file needs to be analyzed for code coverage.
				392
				393	Args:
				394	file_path: A string representing path to the file.
				395
				396	Returns:
				397	True for Java files that exist, False for all others.
				398	"""
				399	if os.path.splitext(file_path)[1] == '.java' and os.path.exists(file_path):
				400	return True
				401	else:
				402	logging.info('Skipping file %s, cannot compute code coverage.', file_path)
				403	return False
				404
				405	@staticmethod
				406	def GetPackageNameFromFile(file_path):
				407	"""Gets the full package name including the file name for a given file path.
				408
				409	Args:
				410	file_path: String representing the path to the Java source file.
				411
				412	Returns:
				413	A string representing the full package name with file name appended or
				414	None if there is no package statement in the file.
				415	"""
				416	with open(file_path) as f:
				417	file_content = f.read()
				418	package_match = re.search(_EmmaCoverageStats.RE_PACKAGE, file_content)
				419	if package_match:
				420	package = package_match.group(_EmmaCoverageStats.RE_PACKAGE_MATCH_GROUP)
				421	file_name = os.path.basename(file_path)
				422	return '%s.%s' % (package, file_name)
				423	else:
				424	return None
				425
				426
				427	def GenerateCoverageReport(line_coverage_file, out_file_path, coverage_dir):
				428	"""Generates a coverage report for a given set of lines.
				429
				430	Writes the results of the coverage analysis to the file specified by
				431	\|out_file_path\|.
				432
				433	Args:
				434	line_coverage_file: The path to a file which contains a dict mapping file
				435	names to lists of line numbers. Example: {file1: [1, 2, 3], ...} means
				436	that we should compute coverage information on lines 1 - 3 for file1.
				437	out_file_path: A string representing the location to write the JSON report.
				438	coverage_dir: A string representing the file path where the EMMA
				439	HTML coverage files are located (i.e. folder where index.html is located).
				440	"""
				441	with open(line_coverage_file) as f:
				442	potential_files_for_coverage = json.load(f)
				443
				444	files_for_coverage = {f: lines
				445	for f, lines in potential_files_for_coverage.iteritems()
				446	if _EmmaCoverageStats.NeedsCoverage(f)}
				447
				448	coverage_results = {}
				449	if files_for_coverage:
				450	code_coverage = _EmmaCoverageStats(coverage_dir, files_for_coverage.keys())
				451	coverage_results = code_coverage.GetCoverageDict(files_for_coverage)
				452	else:
				453	logging.info('No Java files requiring coverage were included in %s.',
				454	line_coverage_file)
				455
				456	with open(out_file_path, 'w+') as out_status_file:
				457	json.dump(coverage_results, out_status_file)
				458
				459
				460	def main():
				461	argparser = argparse.ArgumentParser()
				462	argparser.add_argument('--out', required=True, type=str,
				463	help='Report output file path.')
				464	argparser.add_argument('--emma-dir', required=True, type=str,
				465	help='EMMA HTML report directory.')
				466	argparser.add_argument('--lines-for-coverage-file', required=True, type=str,
				467	help='File containing a JSON object. Should contain a '
				468	'dict mapping file names to lists of line numbers of '
				469	'code for which coverage information is desired.')
				470	argparser.add_argument('-v', '--verbose', action='count',
				471	help='Print verbose log information.')
				472	args = argparser.parse_args()
				473	run_tests_helper.SetLogLevel(args.verbose)
				474	devil_chromium.Initialize()
				475	GenerateCoverageReport(args.lines_for_coverage_file, args.out, args.emma_dir)
				476
				477
				478	if __name__ == '__main__':
				479	sys.exit(main())