Ben Murdoch | 097c5b2 | 2016-05-18 11:27:45 +0100 | [diff] [blame] | 1 | #!/usr/bin/python |
| 2 | # Copyright 2015 The Chromium Authors. All rights reserved. |
| 3 | # Use of this source code is governed by a BSD-style license that can be |
| 4 | # found in the LICENSE file. |
| 5 | |
| 6 | """Generates incremental code coverage reports for Java code in Chromium. |
| 7 | |
| 8 | Usage: |
| 9 | |
| 10 | build/android/emma_coverage_stats.py -v --out <output file path> --emma-dir |
| 11 | <EMMA file directory> --lines-for-coverage-file |
| 12 | <path to file containing lines for coverage> |
| 13 | |
| 14 | Creates a JSON representation of the overall and file coverage stats and saves |
| 15 | this information to the specified output file. |
| 16 | """ |
| 17 | |
| 18 | import argparse |
| 19 | import collections |
| 20 | import json |
| 21 | import logging |
| 22 | import os |
| 23 | import re |
| 24 | import sys |
| 25 | from xml.etree import ElementTree |
| 26 | |
| 27 | import devil_chromium |
| 28 | from devil.utils import run_tests_helper |
| 29 | |
| 30 | NOT_EXECUTABLE = -1 |
| 31 | NOT_COVERED = 0 |
| 32 | COVERED = 1 |
| 33 | PARTIALLY_COVERED = 2 |
| 34 | |
| 35 | # Coverage information about a single line of code. |
| 36 | LineCoverage = collections.namedtuple( |
| 37 | 'LineCoverage', |
| 38 | ['lineno', 'source', 'covered_status', 'fractional_line_coverage']) |
| 39 | |
| 40 | |
| 41 | class _EmmaHtmlParser(object): |
| 42 | """Encapsulates HTML file parsing operations. |
| 43 | |
| 44 | This class contains all operations related to parsing HTML files that were |
| 45 | produced using the EMMA code coverage tool. |
| 46 | |
| 47 | Example HTML: |
| 48 | |
| 49 | Package links: |
| 50 | <a href="_files/1.html">org.chromium.chrome</a> |
| 51 | This is returned by the selector |XPATH_SELECT_PACKAGE_ELEMENTS|. |
| 52 | |
| 53 | Class links: |
| 54 | <a href="1e.html">DoActivity.java</a> |
| 55 | This is returned by the selector |XPATH_SELECT_CLASS_ELEMENTS|. |
| 56 | |
| 57 | Line coverage data: |
| 58 | <tr class="p"> |
| 59 | <td class="l" title="78% line coverage (7 out of 9)">108</td> |
| 60 | <td title="78% line coverage (7 out of 9 instructions)"> |
| 61 | if (index < 0 || index = mSelectors.size()) index = 0;</td> |
| 62 | </tr> |
| 63 | <tr> |
| 64 | <td class="l">109</td> |
| 65 | <td> </td> |
| 66 | </tr> |
| 67 | <tr class="c"> |
| 68 | <td class="l">110</td> |
| 69 | <td> if (mSelectors.get(index) != null) {</td> |
| 70 | </tr> |
| 71 | <tr class="z"> |
| 72 | <td class="l">111</td> |
| 73 | <td> for (int i = 0; i < mSelectors.size(); i++) {</td> |
| 74 | </tr> |
| 75 | Each <tr> element is returned by the selector |XPATH_SELECT_LOC|. |
| 76 | |
| 77 | We can parse this to get: |
| 78 | 1. Line number |
| 79 | 2. Line of source code |
| 80 | 3. Coverage status (c, z, or p) |
| 81 | 4. Fractional coverage value (% out of 100 if PARTIALLY_COVERED) |
| 82 | """ |
| 83 | # Selector to match all <a> elements within the rows that are in the table |
| 84 | # that displays all of the different packages. |
| 85 | _XPATH_SELECT_PACKAGE_ELEMENTS = './/BODY/TABLE[4]/TR/TD/A' |
| 86 | |
| 87 | # Selector to match all <a> elements within the rows that are in the table |
| 88 | # that displays all of the different classes within a package. |
| 89 | _XPATH_SELECT_CLASS_ELEMENTS = './/BODY/TABLE[3]/TR/TD/A' |
| 90 | |
| 91 | # Selector to match all <tr> elements within the table containing Java source |
| 92 | # code in an EMMA HTML file. |
| 93 | _XPATH_SELECT_LOC = './/BODY/TABLE[4]/TR' |
| 94 | |
| 95 | # Children of HTML elements are represented as a list in ElementTree. These |
| 96 | # constants represent list indices corresponding to relevant child elements. |
| 97 | |
| 98 | # Child 1 contains percentage covered for a line. |
| 99 | _ELEMENT_PERCENT_COVERED = 1 |
| 100 | |
| 101 | # Child 1 contains the original line of source code. |
| 102 | _ELEMENT_CONTAINING_SOURCE_CODE = 1 |
| 103 | |
| 104 | # Child 0 contains the line number. |
| 105 | _ELEMENT_CONTAINING_LINENO = 0 |
| 106 | |
| 107 | # Maps CSS class names to corresponding coverage constants. |
| 108 | _CSS_TO_STATUS = {'c': COVERED, 'p': PARTIALLY_COVERED, 'z': NOT_COVERED} |
| 109 | |
| 110 | # UTF-8 no break space. |
| 111 | _NO_BREAK_SPACE = '\xc2\xa0' |
| 112 | |
| 113 | def __init__(self, emma_file_base_dir): |
| 114 | """Initializes _EmmaHtmlParser. |
| 115 | |
| 116 | Args: |
| 117 | emma_file_base_dir: Path to the location where EMMA report files are |
| 118 | stored. Should be where index.html is stored. |
| 119 | """ |
| 120 | self._base_dir = emma_file_base_dir |
| 121 | self._emma_files_path = os.path.join(self._base_dir, '_files') |
| 122 | self._index_path = os.path.join(self._base_dir, 'index.html') |
| 123 | |
| 124 | def GetLineCoverage(self, emma_file_path): |
| 125 | """Returns a list of LineCoverage objects for the given EMMA HTML file. |
| 126 | |
| 127 | Args: |
| 128 | emma_file_path: String representing the path to the EMMA HTML file. |
| 129 | |
| 130 | Returns: |
| 131 | A list of LineCoverage objects. |
| 132 | """ |
| 133 | line_tr_elements = self._FindElements( |
| 134 | emma_file_path, self._XPATH_SELECT_LOC) |
| 135 | line_coverage = [] |
| 136 | for tr in line_tr_elements: |
| 137 | # Get the coverage status. |
| 138 | coverage_status = self._CSS_TO_STATUS.get(tr.get('CLASS'), NOT_EXECUTABLE) |
| 139 | # Get the fractional coverage value. |
| 140 | if coverage_status == PARTIALLY_COVERED: |
| 141 | title_attribute = (tr[self._ELEMENT_PERCENT_COVERED].get('TITLE')) |
| 142 | # Parse string that contains percent covered: "83% line coverage ...". |
| 143 | percent_covered = title_attribute.split('%')[0] |
| 144 | fractional_coverage = int(percent_covered) / 100.0 |
| 145 | else: |
| 146 | fractional_coverage = 1.0 |
| 147 | |
| 148 | # Get the line number. |
| 149 | lineno_element = tr[self._ELEMENT_CONTAINING_LINENO] |
| 150 | # Handles oddly formatted HTML (where there is an extra <a> tag). |
| 151 | lineno = int(lineno_element.text or |
| 152 | lineno_element[self._ELEMENT_CONTAINING_LINENO].text) |
| 153 | # Get the original line of Java source code. |
| 154 | raw_source = tr[self._ELEMENT_CONTAINING_SOURCE_CODE].text |
| 155 | utf8_source = raw_source.encode('UTF-8') |
| 156 | source = utf8_source.replace(self._NO_BREAK_SPACE, ' ') |
| 157 | |
| 158 | line = LineCoverage(lineno, source, coverage_status, fractional_coverage) |
| 159 | line_coverage.append(line) |
| 160 | |
| 161 | return line_coverage |
| 162 | |
| 163 | def GetPackageNameToEmmaFileDict(self): |
| 164 | """Returns a dict mapping Java packages to EMMA HTML coverage files. |
| 165 | |
| 166 | Parses the EMMA index.html file to get a list of packages, then parses each |
| 167 | package HTML file to get a list of classes for that package, and creates |
| 168 | a dict with this info. |
| 169 | |
| 170 | Returns: |
| 171 | A dict mapping string representation of Java packages (with class |
| 172 | names appended) to the corresponding file paths of EMMA HTML files. |
| 173 | """ |
| 174 | # These <a> elements contain each package name and the path of the file |
| 175 | # where all classes within said package are listed. |
| 176 | package_link_elements = self._FindElements( |
| 177 | self._index_path, self._XPATH_SELECT_PACKAGE_ELEMENTS) |
| 178 | # Maps file path of package directory (EMMA generated) to package name. |
| 179 | # Example: emma_dir/f.html: org.chromium.chrome. |
| 180 | package_links = { |
| 181 | os.path.join(self._base_dir, link.attrib['HREF']): link.text |
| 182 | for link in package_link_elements if 'HREF' in link.attrib |
| 183 | } |
| 184 | |
| 185 | package_to_emma = {} |
| 186 | for package_emma_file_path, package_name in package_links.iteritems(): |
| 187 | # These <a> elements contain each class name in the current package and |
| 188 | # the path of the file where the coverage info is stored for each class. |
| 189 | coverage_file_link_elements = self._FindElements( |
| 190 | package_emma_file_path, self._XPATH_SELECT_CLASS_ELEMENTS) |
| 191 | |
| 192 | for class_name_element in coverage_file_link_elements: |
| 193 | emma_coverage_file_path = os.path.join( |
| 194 | self._emma_files_path, class_name_element.attrib['HREF']) |
| 195 | full_package_name = '%s.%s' % (package_name, class_name_element.text) |
| 196 | package_to_emma[full_package_name] = emma_coverage_file_path |
| 197 | |
| 198 | return package_to_emma |
| 199 | |
| 200 | # pylint: disable=no-self-use |
| 201 | def _FindElements(self, file_path, xpath_selector): |
| 202 | """Reads a HTML file and performs an XPath match. |
| 203 | |
| 204 | Args: |
| 205 | file_path: String representing the path to the HTML file. |
| 206 | xpath_selector: String representing xpath search pattern. |
| 207 | |
| 208 | Returns: |
| 209 | A list of ElementTree.Elements matching the given XPath selector. |
| 210 | Returns an empty list if there is no match. |
| 211 | """ |
| 212 | with open(file_path) as f: |
| 213 | file_contents = f.read().decode('ISO-8859-1').encode('UTF-8') |
| 214 | root = ElementTree.fromstring(file_contents) |
| 215 | return root.findall(xpath_selector) |
| 216 | |
| 217 | |
| 218 | class _EmmaCoverageStats(object): |
| 219 | """Computes code coverage stats for Java code using the coverage tool EMMA. |
| 220 | |
| 221 | This class provides an API that allows users to capture absolute code coverage |
| 222 | and code coverage on a subset of lines for each Java source file. Coverage |
| 223 | reports are generated in JSON format. |
| 224 | """ |
| 225 | # Regular expression to get package name from Java package statement. |
| 226 | RE_PACKAGE_MATCH_GROUP = 'package' |
| 227 | RE_PACKAGE = re.compile(r'package (?P<%s>[\w.]*);' % RE_PACKAGE_MATCH_GROUP) |
| 228 | |
| 229 | def __init__(self, emma_file_base_dir, files_for_coverage): |
| 230 | """Initialize _EmmaCoverageStats. |
| 231 | |
| 232 | Args: |
| 233 | emma_file_base_dir: String representing the path to the base directory |
| 234 | where EMMA HTML coverage files are stored, i.e. parent of index.html. |
| 235 | files_for_coverage: A list of Java source code file paths to get EMMA |
| 236 | coverage for. |
| 237 | """ |
| 238 | self._emma_parser = _EmmaHtmlParser(emma_file_base_dir) |
| 239 | self._source_to_emma = self._GetSourceFileToEmmaFileDict(files_for_coverage) |
| 240 | |
| 241 | def GetCoverageDict(self, lines_for_coverage): |
| 242 | """Returns a dict containing detailed coverage information. |
| 243 | |
| 244 | Gets detailed coverage stats for each file specified in the |
| 245 | |lines_for_coverage| dict and the total incremental number of lines covered |
| 246 | and executable for all files in |lines_for_coverage|. |
| 247 | |
| 248 | Args: |
| 249 | lines_for_coverage: A dict mapping Java source file paths to lists of line |
| 250 | numbers. |
| 251 | |
| 252 | Returns: |
| 253 | A dict containing coverage stats for the given dict of files and lines. |
| 254 | Contains absolute coverage stats for each file, coverage stats for each |
| 255 | file's lines specified in |lines_for_coverage|, line by line coverage |
| 256 | for each file, and overall coverage stats for the lines specified in |
| 257 | |lines_for_coverage|. |
| 258 | """ |
| 259 | file_coverage = {} |
| 260 | for file_path, line_numbers in lines_for_coverage.iteritems(): |
| 261 | file_coverage_dict = self.GetCoverageDictForFile(file_path, line_numbers) |
| 262 | if file_coverage_dict: |
| 263 | file_coverage[file_path] = file_coverage_dict |
| 264 | else: |
| 265 | logging.warning( |
| 266 | 'No code coverage data for %s, skipping.', file_path) |
| 267 | |
| 268 | covered_statuses = [s['incremental'] for s in file_coverage.itervalues()] |
| 269 | num_covered_lines = sum(s['covered'] for s in covered_statuses) |
| 270 | num_total_lines = sum(s['total'] for s in covered_statuses) |
| 271 | return { |
| 272 | 'files': file_coverage, |
| 273 | 'patch': { |
| 274 | 'incremental': { |
| 275 | 'covered': num_covered_lines, |
| 276 | 'total': num_total_lines |
| 277 | } |
| 278 | } |
| 279 | } |
| 280 | |
| 281 | def GetCoverageDictForFile(self, file_path, line_numbers): |
| 282 | """Returns a dict containing detailed coverage info for the given file. |
| 283 | |
| 284 | Args: |
| 285 | file_path: The path to the Java source file that we want to create the |
| 286 | coverage dict for. |
| 287 | line_numbers: A list of integer line numbers to retrieve additional stats |
| 288 | for. |
| 289 | |
| 290 | Returns: |
| 291 | A dict containing absolute, incremental, and line by line coverage for |
| 292 | a file. |
| 293 | """ |
| 294 | if file_path not in self._source_to_emma: |
| 295 | return None |
| 296 | emma_file = self._source_to_emma[file_path] |
| 297 | total_line_coverage = self._emma_parser.GetLineCoverage(emma_file) |
| 298 | incremental_line_coverage = [line for line in total_line_coverage |
| 299 | if line.lineno in line_numbers] |
| 300 | line_by_line_coverage = [ |
| 301 | { |
| 302 | 'line': line.source, |
| 303 | 'coverage': line.covered_status, |
| 304 | 'changed': line.lineno in line_numbers, |
| 305 | 'fractional_coverage': line.fractional_line_coverage, |
| 306 | } |
| 307 | for line in total_line_coverage |
| 308 | ] |
| 309 | total_covered_lines, total_lines = ( |
| 310 | self.GetSummaryStatsForLines(total_line_coverage)) |
| 311 | incremental_covered_lines, incremental_total_lines = ( |
| 312 | self.GetSummaryStatsForLines(incremental_line_coverage)) |
| 313 | |
| 314 | file_coverage_stats = { |
| 315 | 'absolute': { |
| 316 | 'covered': total_covered_lines, |
| 317 | 'total': total_lines |
| 318 | }, |
| 319 | 'incremental': { |
| 320 | 'covered': incremental_covered_lines, |
| 321 | 'total': incremental_total_lines |
| 322 | }, |
| 323 | 'source': line_by_line_coverage, |
| 324 | } |
| 325 | return file_coverage_stats |
| 326 | |
| 327 | # pylint: disable=no-self-use |
| 328 | def GetSummaryStatsForLines(self, line_coverage): |
| 329 | """Gets summary stats for a given list of LineCoverage objects. |
| 330 | |
| 331 | Args: |
| 332 | line_coverage: A list of LineCoverage objects. |
| 333 | |
| 334 | Returns: |
| 335 | A tuple containing the number of lines that are covered and the total |
| 336 | number of lines that are executable, respectively |
| 337 | """ |
| 338 | partially_covered_sum = 0 |
| 339 | covered_status_totals = {COVERED: 0, NOT_COVERED: 0, PARTIALLY_COVERED: 0} |
| 340 | for line in line_coverage: |
| 341 | status = line.covered_status |
| 342 | if status == NOT_EXECUTABLE: |
| 343 | continue |
| 344 | covered_status_totals[status] += 1 |
| 345 | if status == PARTIALLY_COVERED: |
| 346 | partially_covered_sum += line.fractional_line_coverage |
| 347 | |
| 348 | total_covered = covered_status_totals[COVERED] + partially_covered_sum |
| 349 | total_lines = sum(covered_status_totals.values()) |
| 350 | return total_covered, total_lines |
| 351 | |
| 352 | def _GetSourceFileToEmmaFileDict(self, files): |
| 353 | """Gets a dict used to correlate Java source files with EMMA HTML files. |
| 354 | |
| 355 | This method gathers the information needed to correlate EMMA HTML |
| 356 | files with Java source files. EMMA XML and plain text reports do not provide |
| 357 | line by line coverage data, so HTML reports must be used instead. |
| 358 | Unfortunately, the HTML files that are created are given garbage names |
| 359 | (i.e 1.html) so we need to manually correlate EMMA HTML files |
| 360 | with the original Java source files. |
| 361 | |
| 362 | Args: |
| 363 | files: A list of file names for which coverage information is desired. |
| 364 | |
| 365 | Returns: |
| 366 | A dict mapping Java source file paths to EMMA HTML file paths. |
| 367 | """ |
| 368 | # Maps Java source file paths to package names. |
| 369 | # Example: /usr/code/file.java -> org.chromium.file.java. |
| 370 | source_to_package = {} |
| 371 | for file_path in files: |
| 372 | package = self.GetPackageNameFromFile(file_path) |
| 373 | if package: |
| 374 | source_to_package[file_path] = package |
| 375 | else: |
| 376 | logging.warning("Skipping %s because it doesn\'t have a package " |
| 377 | "statement.", file_path) |
| 378 | |
| 379 | # Maps package names to EMMA report HTML files. |
| 380 | # Example: org.chromium.file.java -> out/coverage/1a.html. |
| 381 | package_to_emma = self._emma_parser.GetPackageNameToEmmaFileDict() |
| 382 | # Finally, we have a dict mapping Java file paths to EMMA report files. |
| 383 | # Example: /usr/code/file.java -> out/coverage/1a.html. |
| 384 | source_to_emma = {source: package_to_emma[package] |
| 385 | for source, package in source_to_package.iteritems() |
| 386 | if package in package_to_emma} |
| 387 | return source_to_emma |
| 388 | |
| 389 | @staticmethod |
| 390 | def NeedsCoverage(file_path): |
| 391 | """Checks to see if the file needs to be analyzed for code coverage. |
| 392 | |
| 393 | Args: |
| 394 | file_path: A string representing path to the file. |
| 395 | |
| 396 | Returns: |
| 397 | True for Java files that exist, False for all others. |
| 398 | """ |
| 399 | if os.path.splitext(file_path)[1] == '.java' and os.path.exists(file_path): |
| 400 | return True |
| 401 | else: |
| 402 | logging.info('Skipping file %s, cannot compute code coverage.', file_path) |
| 403 | return False |
| 404 | |
| 405 | @staticmethod |
| 406 | def GetPackageNameFromFile(file_path): |
| 407 | """Gets the full package name including the file name for a given file path. |
| 408 | |
| 409 | Args: |
| 410 | file_path: String representing the path to the Java source file. |
| 411 | |
| 412 | Returns: |
| 413 | A string representing the full package name with file name appended or |
| 414 | None if there is no package statement in the file. |
| 415 | """ |
| 416 | with open(file_path) as f: |
| 417 | file_content = f.read() |
| 418 | package_match = re.search(_EmmaCoverageStats.RE_PACKAGE, file_content) |
| 419 | if package_match: |
| 420 | package = package_match.group(_EmmaCoverageStats.RE_PACKAGE_MATCH_GROUP) |
| 421 | file_name = os.path.basename(file_path) |
| 422 | return '%s.%s' % (package, file_name) |
| 423 | else: |
| 424 | return None |
| 425 | |
| 426 | |
| 427 | def GenerateCoverageReport(line_coverage_file, out_file_path, coverage_dir): |
| 428 | """Generates a coverage report for a given set of lines. |
| 429 | |
| 430 | Writes the results of the coverage analysis to the file specified by |
| 431 | |out_file_path|. |
| 432 | |
| 433 | Args: |
| 434 | line_coverage_file: The path to a file which contains a dict mapping file |
| 435 | names to lists of line numbers. Example: {file1: [1, 2, 3], ...} means |
| 436 | that we should compute coverage information on lines 1 - 3 for file1. |
| 437 | out_file_path: A string representing the location to write the JSON report. |
| 438 | coverage_dir: A string representing the file path where the EMMA |
| 439 | HTML coverage files are located (i.e. folder where index.html is located). |
| 440 | """ |
| 441 | with open(line_coverage_file) as f: |
| 442 | potential_files_for_coverage = json.load(f) |
| 443 | |
| 444 | files_for_coverage = {f: lines |
| 445 | for f, lines in potential_files_for_coverage.iteritems() |
| 446 | if _EmmaCoverageStats.NeedsCoverage(f)} |
| 447 | |
| 448 | coverage_results = {} |
| 449 | if files_for_coverage: |
| 450 | code_coverage = _EmmaCoverageStats(coverage_dir, files_for_coverage.keys()) |
| 451 | coverage_results = code_coverage.GetCoverageDict(files_for_coverage) |
| 452 | else: |
| 453 | logging.info('No Java files requiring coverage were included in %s.', |
| 454 | line_coverage_file) |
| 455 | |
| 456 | with open(out_file_path, 'w+') as out_status_file: |
| 457 | json.dump(coverage_results, out_status_file) |
| 458 | |
| 459 | |
| 460 | def main(): |
| 461 | argparser = argparse.ArgumentParser() |
| 462 | argparser.add_argument('--out', required=True, type=str, |
| 463 | help='Report output file path.') |
| 464 | argparser.add_argument('--emma-dir', required=True, type=str, |
| 465 | help='EMMA HTML report directory.') |
| 466 | argparser.add_argument('--lines-for-coverage-file', required=True, type=str, |
| 467 | help='File containing a JSON object. Should contain a ' |
| 468 | 'dict mapping file names to lists of line numbers of ' |
| 469 | 'code for which coverage information is desired.') |
| 470 | argparser.add_argument('-v', '--verbose', action='count', |
| 471 | help='Print verbose log information.') |
| 472 | args = argparser.parse_args() |
| 473 | run_tests_helper.SetLogLevel(args.verbose) |
| 474 | devil_chromium.Initialize() |
| 475 | GenerateCoverageReport(args.lines_for_coverage_file, args.out, args.emma_dir) |
| 476 | |
| 477 | |
| 478 | if __name__ == '__main__': |
| 479 | sys.exit(main()) |