blob: a4cfec1b0c49c7a5da66255a9d62f0265d75638a [file] [log] [blame]
Ben Murdochda12d292016-06-02 14:46:10 +01001#!/usr/bin/env python
2# Copyright 2016 the V8 project authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6"""Script for merging sancov files in parallel.
7
8When merging test runner output, the sancov files are expected
9to be located in one directory with the file-name pattern:
10<executable name>.test.<id>.sancov
11
12For each executable, this script writes a new file:
13<executable name>.result.sancov
14
15When --swarming-output-dir is specified, this script will merge the result
16files found there into the coverage folder.
17
18The sancov tool is expected to be in the llvm compiler-rt third-party
19directory. It's not checked out by default and must be added as a custom deps:
20'v8/third_party/llvm/projects/compiler-rt':
21 'https://chromium.googlesource.com/external/llvm.org/compiler-rt.git'
22"""
23
24import argparse
25import logging
26import math
27import os
28import re
29import subprocess
30import sys
31
32from multiprocessing import Pool, cpu_count
33
34
35logging.basicConfig(level=logging.INFO)
36
37# V8 checkout directory.
38BASE_DIR = os.path.dirname(os.path.dirname(os.path.dirname(
39 os.path.abspath(__file__))))
40
41# The sancov tool location.
42SANCOV_TOOL = os.path.join(
43 BASE_DIR, 'third_party', 'llvm', 'projects', 'compiler-rt',
44 'lib', 'sanitizer_common', 'scripts', 'sancov.py')
45
46# Number of cpus.
47CPUS = cpu_count()
48
49# Regexp to find sancov file as output by the v8 test runner. Also grabs the
50# executable name in group 1.
51SANCOV_FILE_RE = re.compile(r'^(.*)\.test\.\d+\.sancov$')
52
53# Regexp to find sancov result files as returned from swarming.
54SANCOV_RESULTS_FILE_RE = re.compile(r'^.*\.result\.sancov$')
55
56
57def merge(args):
58 """Merge several sancov files into one.
59
60 Called trough multiprocessing pool. The args are expected to unpack to:
61 keep: Option if source and intermediate sancov files should be kept.
62 coverage_dir: Folder where to find the sancov files.
63 executable: Name of the executable whose sancov files should be merged.
64 index: A number to be put into the intermediate result file name.
65 If None, this is a final result.
66 bucket: The list of sancov files to be merged.
67 Returns: A tuple with the executable name and the result file name.
68 """
69 keep, coverage_dir, executable, index, bucket = args
70 process = subprocess.Popen(
71 [SANCOV_TOOL, 'merge'] + bucket,
72 stdout=subprocess.PIPE,
73 stderr=subprocess.PIPE,
74 cwd=coverage_dir,
75 )
76 output, _ = process.communicate()
77 assert process.returncode == 0
78 if index is not None:
79 # This is an intermediate result, add the bucket index to the file name.
80 result_file_name = '%s.result.%d.sancov' % (executable, index)
81 else:
82 # This is the final result without bucket index.
83 result_file_name = '%s.result.sancov' % executable
84 with open(os.path.join(coverage_dir, result_file_name), "wb") as f:
85 f.write(output)
86 if not keep:
87 for f in bucket:
88 os.remove(os.path.join(coverage_dir, f))
89 return executable, result_file_name
90
91
92def generate_inputs(keep, coverage_dir, file_map, cpus):
93 """Generate inputs for multiprocessed merging.
94
95 Splits the sancov files into several buckets, so that each bucket can be
96 merged in a separate process. We have only few executables in total with
97 mostly lots of associated files. In the general case, with many executables
98 we might need to avoid splitting buckets of executables with few files.
99
100 Returns: List of args as expected by merge above.
101 """
102 inputs = []
103 for executable, files in file_map.iteritems():
104 # What's the bucket size for distributing files for merging? E.g. with
105 # 2 cpus and 9 files we want bucket size 5.
106 n = max(2, int(math.ceil(len(files) / float(cpus))))
107
108 # Chop files into buckets.
109 buckets = [files[i:i+n] for i in xrange(0, len(files), n)]
110
111 # Inputs for multiprocessing. List of tuples containing:
112 # Keep-files option, base path, executable name, index of bucket,
113 # list of files.
114 inputs.extend([(keep, coverage_dir, executable, i, b)
115 for i, b in enumerate(buckets)])
116 return inputs
117
118
119def merge_parallel(inputs, merge_fun=merge):
120 """Process several merge jobs in parallel."""
121 pool = Pool(CPUS)
122 try:
123 return pool.map(merge_fun, inputs)
124 finally:
125 pool.close()
126
127
128def merge_test_runner_output(options):
129 # Map executable names to their respective sancov files.
130 file_map = {}
131 for f in os.listdir(options.coverage_dir):
132 match = SANCOV_FILE_RE.match(f)
133 if match:
134 file_map.setdefault(match.group(1), []).append(f)
135
136 inputs = generate_inputs(
137 options.keep, options.coverage_dir, file_map, CPUS)
138
139 logging.info('Executing %d merge jobs in parallel for %d executables.' %
140 (len(inputs), len(file_map)))
141
142 results = merge_parallel(inputs)
143
144 # Map executable names to intermediate bucket result files.
145 file_map = {}
146 for executable, f in results:
147 file_map.setdefault(executable, []).append(f)
148
149 # Merge the bucket results for each executable.
150 # The final result has index None, so no index will appear in the
151 # file name.
152 inputs = [(options.keep, options.coverage_dir, executable, None, files)
153 for executable, files in file_map.iteritems()]
154
155 logging.info('Merging %d intermediate results.' % len(inputs))
156
157 merge_parallel(inputs)
158
159
160def merge_two(args):
161 """Merge two sancov files.
162
163 Called trough multiprocessing pool. The args are expected to unpack to:
164 swarming_output_dir: Folder where to find the new file.
165 coverage_dir: Folder where to find the existing file.
166 f: File name of the file to be merged.
167 """
168 swarming_output_dir, coverage_dir, f = args
169 input_file = os.path.join(swarming_output_dir, f)
170 output_file = os.path.join(coverage_dir, f)
171 process = subprocess.Popen(
172 [SANCOV_TOOL, 'merge', input_file, output_file],
173 stdout=subprocess.PIPE,
174 stderr=subprocess.PIPE,
175 )
176 output, _ = process.communicate()
177 assert process.returncode == 0
178 with open(output_file, "wb") as f:
179 f.write(output)
180
181
182def merge_swarming_output(options):
183 # Iterate sancov files from swarming.
184 files = []
185 for f in os.listdir(options.swarming_output_dir):
186 match = SANCOV_RESULTS_FILE_RE.match(f)
187 if match:
188 if os.path.exists(os.path.join(options.coverage_dir, f)):
189 # If the same file already exists, we'll merge the data.
190 files.append(f)
191 else:
192 # No file yet? Just move it.
193 os.rename(os.path.join(options.swarming_output_dir, f),
194 os.path.join(options.coverage_dir, f))
195
196 inputs = [(options.swarming_output_dir, options.coverage_dir, f)
197 for f in files]
198
199 logging.info('Executing %d merge jobs in parallel.' % len(inputs))
200 merge_parallel(inputs, merge_two)
201
202
203def main():
204 parser = argparse.ArgumentParser()
205 parser.add_argument('--coverage-dir', required=True,
206 help='Path to the sancov output files.')
207 parser.add_argument('--keep', default=False, action='store_true',
208 help='Keep sancov output files after merging.')
209 parser.add_argument('--swarming-output-dir',
210 help='Folder containing a results shard from swarming.')
211 options = parser.parse_args()
212
213 # Check if folder with coverage output exists.
214 assert (os.path.exists(options.coverage_dir) and
215 os.path.isdir(options.coverage_dir))
216
217 if options.swarming_output_dir:
218 # Check if folder with swarming output exists.
219 assert (os.path.exists(options.swarming_output_dir) and
220 os.path.isdir(options.swarming_output_dir))
221 merge_swarming_output(options)
222 else:
223 merge_test_runner_output(options)
224
225 return 0
226
227
228if __name__ == '__main__':
229 sys.exit(main())