blob: 20ec8eae46877fbefe5b6f6f74eebcf28a7f1608 [file] [log] [blame]
Ben Murdoch097c5b22016-05-18 11:27:45 +01001#!/usr/bin/python
2# Copyright 2015 The Chromium Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6"""Generates incremental code coverage reports for Java code in Chromium.
7
8Usage:
9
10 build/android/emma_coverage_stats.py -v --out <output file path> --emma-dir
11 <EMMA file directory> --lines-for-coverage-file
12 <path to file containing lines for coverage>
13
14 Creates a JSON representation of the overall and file coverage stats and saves
15 this information to the specified output file.
16"""
17
18import argparse
19import collections
20import json
21import logging
22import os
23import re
24import sys
25from xml.etree import ElementTree
26
27import devil_chromium
28from devil.utils import run_tests_helper
29
30NOT_EXECUTABLE = -1
31NOT_COVERED = 0
32COVERED = 1
33PARTIALLY_COVERED = 2
34
35# Coverage information about a single line of code.
36LineCoverage = collections.namedtuple(
37 'LineCoverage',
38 ['lineno', 'source', 'covered_status', 'fractional_line_coverage'])
39
40
41class _EmmaHtmlParser(object):
42 """Encapsulates HTML file parsing operations.
43
44 This class contains all operations related to parsing HTML files that were
45 produced using the EMMA code coverage tool.
46
47 Example HTML:
48
49 Package links:
50 <a href="_files/1.html">org.chromium.chrome</a>
51 This is returned by the selector |XPATH_SELECT_PACKAGE_ELEMENTS|.
52
53 Class links:
54 <a href="1e.html">DoActivity.java</a>
55 This is returned by the selector |XPATH_SELECT_CLASS_ELEMENTS|.
56
57 Line coverage data:
58 <tr class="p">
59 <td class="l" title="78% line coverage (7 out of 9)">108</td>
60 <td title="78% line coverage (7 out of 9 instructions)">
61 if (index < 0 || index = mSelectors.size()) index = 0;</td>
62 </tr>
63 <tr>
64 <td class="l">109</td>
65 <td> </td>
66 </tr>
67 <tr class="c">
68 <td class="l">110</td>
69 <td> if (mSelectors.get(index) != null) {</td>
70 </tr>
71 <tr class="z">
72 <td class="l">111</td>
73 <td> for (int i = 0; i < mSelectors.size(); i++) {</td>
74 </tr>
75 Each <tr> element is returned by the selector |XPATH_SELECT_LOC|.
76
77 We can parse this to get:
78 1. Line number
79 2. Line of source code
80 3. Coverage status (c, z, or p)
81 4. Fractional coverage value (% out of 100 if PARTIALLY_COVERED)
82 """
83 # Selector to match all <a> elements within the rows that are in the table
84 # that displays all of the different packages.
85 _XPATH_SELECT_PACKAGE_ELEMENTS = './/BODY/TABLE[4]/TR/TD/A'
86
87 # Selector to match all <a> elements within the rows that are in the table
88 # that displays all of the different classes within a package.
89 _XPATH_SELECT_CLASS_ELEMENTS = './/BODY/TABLE[3]/TR/TD/A'
90
91 # Selector to match all <tr> elements within the table containing Java source
92 # code in an EMMA HTML file.
93 _XPATH_SELECT_LOC = './/BODY/TABLE[4]/TR'
94
95 # Children of HTML elements are represented as a list in ElementTree. These
96 # constants represent list indices corresponding to relevant child elements.
97
98 # Child 1 contains percentage covered for a line.
99 _ELEMENT_PERCENT_COVERED = 1
100
101 # Child 1 contains the original line of source code.
102 _ELEMENT_CONTAINING_SOURCE_CODE = 1
103
104 # Child 0 contains the line number.
105 _ELEMENT_CONTAINING_LINENO = 0
106
107 # Maps CSS class names to corresponding coverage constants.
108 _CSS_TO_STATUS = {'c': COVERED, 'p': PARTIALLY_COVERED, 'z': NOT_COVERED}
109
110 # UTF-8 no break space.
111 _NO_BREAK_SPACE = '\xc2\xa0'
112
113 def __init__(self, emma_file_base_dir):
114 """Initializes _EmmaHtmlParser.
115
116 Args:
117 emma_file_base_dir: Path to the location where EMMA report files are
118 stored. Should be where index.html is stored.
119 """
120 self._base_dir = emma_file_base_dir
121 self._emma_files_path = os.path.join(self._base_dir, '_files')
122 self._index_path = os.path.join(self._base_dir, 'index.html')
123
124 def GetLineCoverage(self, emma_file_path):
125 """Returns a list of LineCoverage objects for the given EMMA HTML file.
126
127 Args:
128 emma_file_path: String representing the path to the EMMA HTML file.
129
130 Returns:
131 A list of LineCoverage objects.
132 """
133 line_tr_elements = self._FindElements(
134 emma_file_path, self._XPATH_SELECT_LOC)
135 line_coverage = []
136 for tr in line_tr_elements:
137 # Get the coverage status.
138 coverage_status = self._CSS_TO_STATUS.get(tr.get('CLASS'), NOT_EXECUTABLE)
139 # Get the fractional coverage value.
140 if coverage_status == PARTIALLY_COVERED:
141 title_attribute = (tr[self._ELEMENT_PERCENT_COVERED].get('TITLE'))
142 # Parse string that contains percent covered: "83% line coverage ...".
143 percent_covered = title_attribute.split('%')[0]
144 fractional_coverage = int(percent_covered) / 100.0
145 else:
146 fractional_coverage = 1.0
147
148 # Get the line number.
149 lineno_element = tr[self._ELEMENT_CONTAINING_LINENO]
150 # Handles oddly formatted HTML (where there is an extra <a> tag).
151 lineno = int(lineno_element.text or
152 lineno_element[self._ELEMENT_CONTAINING_LINENO].text)
153 # Get the original line of Java source code.
154 raw_source = tr[self._ELEMENT_CONTAINING_SOURCE_CODE].text
155 utf8_source = raw_source.encode('UTF-8')
156 source = utf8_source.replace(self._NO_BREAK_SPACE, ' ')
157
158 line = LineCoverage(lineno, source, coverage_status, fractional_coverage)
159 line_coverage.append(line)
160
161 return line_coverage
162
163 def GetPackageNameToEmmaFileDict(self):
164 """Returns a dict mapping Java packages to EMMA HTML coverage files.
165
166 Parses the EMMA index.html file to get a list of packages, then parses each
167 package HTML file to get a list of classes for that package, and creates
168 a dict with this info.
169
170 Returns:
171 A dict mapping string representation of Java packages (with class
172 names appended) to the corresponding file paths of EMMA HTML files.
173 """
174 # These <a> elements contain each package name and the path of the file
175 # where all classes within said package are listed.
176 package_link_elements = self._FindElements(
177 self._index_path, self._XPATH_SELECT_PACKAGE_ELEMENTS)
178 # Maps file path of package directory (EMMA generated) to package name.
179 # Example: emma_dir/f.html: org.chromium.chrome.
180 package_links = {
181 os.path.join(self._base_dir, link.attrib['HREF']): link.text
182 for link in package_link_elements if 'HREF' in link.attrib
183 }
184
185 package_to_emma = {}
186 for package_emma_file_path, package_name in package_links.iteritems():
187 # These <a> elements contain each class name in the current package and
188 # the path of the file where the coverage info is stored for each class.
189 coverage_file_link_elements = self._FindElements(
190 package_emma_file_path, self._XPATH_SELECT_CLASS_ELEMENTS)
191
192 for class_name_element in coverage_file_link_elements:
193 emma_coverage_file_path = os.path.join(
194 self._emma_files_path, class_name_element.attrib['HREF'])
195 full_package_name = '%s.%s' % (package_name, class_name_element.text)
196 package_to_emma[full_package_name] = emma_coverage_file_path
197
198 return package_to_emma
199
200 # pylint: disable=no-self-use
201 def _FindElements(self, file_path, xpath_selector):
202 """Reads a HTML file and performs an XPath match.
203
204 Args:
205 file_path: String representing the path to the HTML file.
206 xpath_selector: String representing xpath search pattern.
207
208 Returns:
209 A list of ElementTree.Elements matching the given XPath selector.
210 Returns an empty list if there is no match.
211 """
212 with open(file_path) as f:
213 file_contents = f.read().decode('ISO-8859-1').encode('UTF-8')
214 root = ElementTree.fromstring(file_contents)
215 return root.findall(xpath_selector)
216
217
218class _EmmaCoverageStats(object):
219 """Computes code coverage stats for Java code using the coverage tool EMMA.
220
221 This class provides an API that allows users to capture absolute code coverage
222 and code coverage on a subset of lines for each Java source file. Coverage
223 reports are generated in JSON format.
224 """
225 # Regular expression to get package name from Java package statement.
226 RE_PACKAGE_MATCH_GROUP = 'package'
227 RE_PACKAGE = re.compile(r'package (?P<%s>[\w.]*);' % RE_PACKAGE_MATCH_GROUP)
228
229 def __init__(self, emma_file_base_dir, files_for_coverage):
230 """Initialize _EmmaCoverageStats.
231
232 Args:
233 emma_file_base_dir: String representing the path to the base directory
234 where EMMA HTML coverage files are stored, i.e. parent of index.html.
235 files_for_coverage: A list of Java source code file paths to get EMMA
236 coverage for.
237 """
238 self._emma_parser = _EmmaHtmlParser(emma_file_base_dir)
239 self._source_to_emma = self._GetSourceFileToEmmaFileDict(files_for_coverage)
240
241 def GetCoverageDict(self, lines_for_coverage):
242 """Returns a dict containing detailed coverage information.
243
244 Gets detailed coverage stats for each file specified in the
245 |lines_for_coverage| dict and the total incremental number of lines covered
246 and executable for all files in |lines_for_coverage|.
247
248 Args:
249 lines_for_coverage: A dict mapping Java source file paths to lists of line
250 numbers.
251
252 Returns:
253 A dict containing coverage stats for the given dict of files and lines.
254 Contains absolute coverage stats for each file, coverage stats for each
255 file's lines specified in |lines_for_coverage|, line by line coverage
256 for each file, and overall coverage stats for the lines specified in
257 |lines_for_coverage|.
258 """
259 file_coverage = {}
260 for file_path, line_numbers in lines_for_coverage.iteritems():
261 file_coverage_dict = self.GetCoverageDictForFile(file_path, line_numbers)
262 if file_coverage_dict:
263 file_coverage[file_path] = file_coverage_dict
264 else:
265 logging.warning(
266 'No code coverage data for %s, skipping.', file_path)
267
268 covered_statuses = [s['incremental'] for s in file_coverage.itervalues()]
269 num_covered_lines = sum(s['covered'] for s in covered_statuses)
270 num_total_lines = sum(s['total'] for s in covered_statuses)
271 return {
272 'files': file_coverage,
273 'patch': {
274 'incremental': {
275 'covered': num_covered_lines,
276 'total': num_total_lines
277 }
278 }
279 }
280
281 def GetCoverageDictForFile(self, file_path, line_numbers):
282 """Returns a dict containing detailed coverage info for the given file.
283
284 Args:
285 file_path: The path to the Java source file that we want to create the
286 coverage dict for.
287 line_numbers: A list of integer line numbers to retrieve additional stats
288 for.
289
290 Returns:
291 A dict containing absolute, incremental, and line by line coverage for
292 a file.
293 """
294 if file_path not in self._source_to_emma:
295 return None
296 emma_file = self._source_to_emma[file_path]
297 total_line_coverage = self._emma_parser.GetLineCoverage(emma_file)
298 incremental_line_coverage = [line for line in total_line_coverage
299 if line.lineno in line_numbers]
300 line_by_line_coverage = [
301 {
302 'line': line.source,
303 'coverage': line.covered_status,
304 'changed': line.lineno in line_numbers,
305 'fractional_coverage': line.fractional_line_coverage,
306 }
307 for line in total_line_coverage
308 ]
309 total_covered_lines, total_lines = (
310 self.GetSummaryStatsForLines(total_line_coverage))
311 incremental_covered_lines, incremental_total_lines = (
312 self.GetSummaryStatsForLines(incremental_line_coverage))
313
314 file_coverage_stats = {
315 'absolute': {
316 'covered': total_covered_lines,
317 'total': total_lines
318 },
319 'incremental': {
320 'covered': incremental_covered_lines,
321 'total': incremental_total_lines
322 },
323 'source': line_by_line_coverage,
324 }
325 return file_coverage_stats
326
327 # pylint: disable=no-self-use
328 def GetSummaryStatsForLines(self, line_coverage):
329 """Gets summary stats for a given list of LineCoverage objects.
330
331 Args:
332 line_coverage: A list of LineCoverage objects.
333
334 Returns:
335 A tuple containing the number of lines that are covered and the total
336 number of lines that are executable, respectively
337 """
338 partially_covered_sum = 0
339 covered_status_totals = {COVERED: 0, NOT_COVERED: 0, PARTIALLY_COVERED: 0}
340 for line in line_coverage:
341 status = line.covered_status
342 if status == NOT_EXECUTABLE:
343 continue
344 covered_status_totals[status] += 1
345 if status == PARTIALLY_COVERED:
346 partially_covered_sum += line.fractional_line_coverage
347
348 total_covered = covered_status_totals[COVERED] + partially_covered_sum
349 total_lines = sum(covered_status_totals.values())
350 return total_covered, total_lines
351
352 def _GetSourceFileToEmmaFileDict(self, files):
353 """Gets a dict used to correlate Java source files with EMMA HTML files.
354
355 This method gathers the information needed to correlate EMMA HTML
356 files with Java source files. EMMA XML and plain text reports do not provide
357 line by line coverage data, so HTML reports must be used instead.
358 Unfortunately, the HTML files that are created are given garbage names
359 (i.e 1.html) so we need to manually correlate EMMA HTML files
360 with the original Java source files.
361
362 Args:
363 files: A list of file names for which coverage information is desired.
364
365 Returns:
366 A dict mapping Java source file paths to EMMA HTML file paths.
367 """
368 # Maps Java source file paths to package names.
369 # Example: /usr/code/file.java -> org.chromium.file.java.
370 source_to_package = {}
371 for file_path in files:
372 package = self.GetPackageNameFromFile(file_path)
373 if package:
374 source_to_package[file_path] = package
375 else:
376 logging.warning("Skipping %s because it doesn\'t have a package "
377 "statement.", file_path)
378
379 # Maps package names to EMMA report HTML files.
380 # Example: org.chromium.file.java -> out/coverage/1a.html.
381 package_to_emma = self._emma_parser.GetPackageNameToEmmaFileDict()
382 # Finally, we have a dict mapping Java file paths to EMMA report files.
383 # Example: /usr/code/file.java -> out/coverage/1a.html.
384 source_to_emma = {source: package_to_emma[package]
385 for source, package in source_to_package.iteritems()
386 if package in package_to_emma}
387 return source_to_emma
388
389 @staticmethod
390 def NeedsCoverage(file_path):
391 """Checks to see if the file needs to be analyzed for code coverage.
392
393 Args:
394 file_path: A string representing path to the file.
395
396 Returns:
397 True for Java files that exist, False for all others.
398 """
399 if os.path.splitext(file_path)[1] == '.java' and os.path.exists(file_path):
400 return True
401 else:
402 logging.info('Skipping file %s, cannot compute code coverage.', file_path)
403 return False
404
405 @staticmethod
406 def GetPackageNameFromFile(file_path):
407 """Gets the full package name including the file name for a given file path.
408
409 Args:
410 file_path: String representing the path to the Java source file.
411
412 Returns:
413 A string representing the full package name with file name appended or
414 None if there is no package statement in the file.
415 """
416 with open(file_path) as f:
417 file_content = f.read()
418 package_match = re.search(_EmmaCoverageStats.RE_PACKAGE, file_content)
419 if package_match:
420 package = package_match.group(_EmmaCoverageStats.RE_PACKAGE_MATCH_GROUP)
421 file_name = os.path.basename(file_path)
422 return '%s.%s' % (package, file_name)
423 else:
424 return None
425
426
427def GenerateCoverageReport(line_coverage_file, out_file_path, coverage_dir):
428 """Generates a coverage report for a given set of lines.
429
430 Writes the results of the coverage analysis to the file specified by
431 |out_file_path|.
432
433 Args:
434 line_coverage_file: The path to a file which contains a dict mapping file
435 names to lists of line numbers. Example: {file1: [1, 2, 3], ...} means
436 that we should compute coverage information on lines 1 - 3 for file1.
437 out_file_path: A string representing the location to write the JSON report.
438 coverage_dir: A string representing the file path where the EMMA
439 HTML coverage files are located (i.e. folder where index.html is located).
440 """
441 with open(line_coverage_file) as f:
442 potential_files_for_coverage = json.load(f)
443
444 files_for_coverage = {f: lines
445 for f, lines in potential_files_for_coverage.iteritems()
446 if _EmmaCoverageStats.NeedsCoverage(f)}
447
448 coverage_results = {}
449 if files_for_coverage:
450 code_coverage = _EmmaCoverageStats(coverage_dir, files_for_coverage.keys())
451 coverage_results = code_coverage.GetCoverageDict(files_for_coverage)
452 else:
453 logging.info('No Java files requiring coverage were included in %s.',
454 line_coverage_file)
455
456 with open(out_file_path, 'w+') as out_status_file:
457 json.dump(coverage_results, out_status_file)
458
459
460def main():
461 argparser = argparse.ArgumentParser()
462 argparser.add_argument('--out', required=True, type=str,
463 help='Report output file path.')
464 argparser.add_argument('--emma-dir', required=True, type=str,
465 help='EMMA HTML report directory.')
466 argparser.add_argument('--lines-for-coverage-file', required=True, type=str,
467 help='File containing a JSON object. Should contain a '
468 'dict mapping file names to lists of line numbers of '
469 'code for which coverage information is desired.')
470 argparser.add_argument('-v', '--verbose', action='count',
471 help='Print verbose log information.')
472 args = argparser.parse_args()
473 run_tests_helper.SetLogLevel(args.verbose)
474 devil_chromium.Initialize()
475 GenerateCoverageReport(args.lines_for_coverage_file, args.out, args.emma_dir)
476
477
478if __name__ == '__main__':
479 sys.exit(main())