blob: ac2221e697b57d0cfe20c0db0f06f93107dd3029 [file] [log] [blame]
mbligh96cf0512008-04-17 15:25:38 +00001#!/usr/bin/python -u
mblighc2514542008-02-19 15:54:26 +00002
Aviv Keshet687d2dc2016-10-20 15:41:16 -07003import collections
Fang Deng49822682014-10-21 16:29:22 -07004import datetime
Aviv Keshet687d2dc2016-10-20 15:41:16 -07005import errno
6import fcntl
Simran Basi1e10e922015-04-16 15:09:56 -07007import json
Aviv Keshet687d2dc2016-10-20 15:41:16 -07008import optparse
9import os
10import socket
Shuqian Zhao31425d52016-12-07 09:35:03 -080011import subprocess
Aviv Keshet687d2dc2016-10-20 15:41:16 -070012import sys
Dan Shi11e35062017-11-03 10:09:05 -070013import time
Aviv Keshet687d2dc2016-10-20 15:41:16 -070014import traceback
mblighbb7b8912006-10-08 03:59:02 +000015
mbligh96cf0512008-04-17 15:25:38 +000016import common
Dan Shi4f8c0242017-07-07 15:34:49 -070017from autotest_lib.client.bin.result_tools import utils as result_utils
18from autotest_lib.client.bin.result_tools import utils_lib as result_utils_lib
19from autotest_lib.client.bin.result_tools import runner as result_runner
20from autotest_lib.client.common_lib import control_data
Benny Peakefeb775c2017-02-08 15:14:14 -080021from autotest_lib.client.common_lib import global_config
jadmanskidb4f9b52008-12-03 22:52:53 +000022from autotest_lib.client.common_lib import mail, pidfile
Fang Deng49822682014-10-21 16:29:22 -070023from autotest_lib.client.common_lib import utils
Fang Deng49822682014-10-21 16:29:22 -070024from autotest_lib.frontend import setup_django_environment
Fang Deng9ec66802014-04-28 19:04:33 +000025from autotest_lib.frontend.tko import models as tko_models
Shuqian Zhao19e62fb2017-01-09 10:10:14 -080026from autotest_lib.server import site_utils
Fang Deng49822682014-10-21 16:29:22 -070027from autotest_lib.server.cros.dynamic_suite import constants
28from autotest_lib.site_utils import job_overhead
Benny Peaked322d3d2017-02-08 15:39:28 -080029from autotest_lib.site_utils.sponge_lib import sponge_utils
Dennis Jeffreyf9bef6c2013-08-05 11:01:27 -070030from autotest_lib.tko import db as tko_db, utils as tko_utils
Luigi Semenzatoe7064812017-02-03 14:47:59 -080031from autotest_lib.tko import models, parser_lib
Dennis Jeffreyf9bef6c2013-08-05 11:01:27 -070032from autotest_lib.tko.perf_upload import perf_uploader
mbligh74fc0462007-11-05 20:24:17 +000033
Dan Shib0af6212017-07-17 14:40:02 -070034try:
35 from chromite.lib import metrics
36except ImportError:
37 metrics = utils.metrics_mock
38
39
Aviv Keshet687d2dc2016-10-20 15:41:16 -070040_ParseOptions = collections.namedtuple(
Shuqian Zhao19e62fb2017-01-09 10:10:14 -080041 'ParseOptions', ['reparse', 'mail_on_failure', 'dry_run', 'suite_report',
42 'datastore_creds', 'export_to_gcloud_path'])
Aviv Keshet687d2dc2016-10-20 15:41:16 -070043
mbligh96cf0512008-04-17 15:25:38 +000044def parse_args():
Fang Deng49822682014-10-21 16:29:22 -070045 """Parse args."""
jadmanski0afbb632008-06-06 21:10:57 +000046 # build up our options parser and parse sys.argv
47 parser = optparse.OptionParser()
48 parser.add_option("-m", help="Send mail for FAILED tests",
49 dest="mailit", action="store_true")
50 parser.add_option("-r", help="Reparse the results of a job",
51 dest="reparse", action="store_true")
52 parser.add_option("-o", help="Parse a single results directory",
53 dest="singledir", action="store_true")
54 parser.add_option("-l", help=("Levels of subdirectories to include "
55 "in the job name"),
56 type="int", dest="level", default=1)
57 parser.add_option("-n", help="No blocking on an existing parse",
58 dest="noblock", action="store_true")
59 parser.add_option("-s", help="Database server hostname",
60 dest="db_host", action="store")
61 parser.add_option("-u", help="Database username", dest="db_user",
62 action="store")
63 parser.add_option("-p", help="Database password", dest="db_pass",
64 action="store")
65 parser.add_option("-d", help="Database name", dest="db_name",
66 action="store")
Aviv Keshet0b7bab02016-10-20 17:17:36 -070067 parser.add_option("--dry-run", help="Do not actually commit any results.",
68 dest="dry_run", action="store_true", default=False)
Prathmesh Prabhu3e319da2017-08-30 19:13:03 -070069 parser.add_option(
70 "--detach", action="store_true",
71 help="Detach parsing process from the caller process. Used by "
72 "monitor_db to safely restart without affecting parsing.",
73 default=False)
jadmanskid5ab8c52008-12-03 16:27:07 +000074 parser.add_option("--write-pidfile",
75 help="write pidfile (.parser_execute)",
76 dest="write_pidfile", action="store_true",
77 default=False)
Fang Deng49822682014-10-21 16:29:22 -070078 parser.add_option("--record-duration",
Prathmesh Prabhu77769452018-04-17 13:30:50 -070079 help="[DEPRECATED] Record timing to metadata db",
Fang Deng49822682014-10-21 16:29:22 -070080 dest="record_duration", action="store_true",
81 default=False)
Shuqian Zhao31425d52016-12-07 09:35:03 -080082 parser.add_option("--suite-report",
83 help=("Allows parsing job to attempt to create a suite "
Shuqian Zhao19e62fb2017-01-09 10:10:14 -080084 "timeline report, if it detects that the job being "
Shuqian Zhao31425d52016-12-07 09:35:03 -080085 "parsed is a suite job."),
86 dest="suite_report", action="store_true",
87 default=False)
Shuqian Zhao19e62fb2017-01-09 10:10:14 -080088 parser.add_option("--datastore-creds",
89 help=("The path to gcloud datastore credentials file, "
90 "which will be used to upload suite timeline "
91 "report to gcloud. If not specified, the one "
92 "defined in shadow_config will be used."),
93 dest="datastore_creds", action="store", default=None)
94 parser.add_option("--export-to-gcloud-path",
95 help=("The path to export_to_gcloud script. Please find "
96 "chromite path on your server. The script is under "
97 "chromite/bin/."),
98 dest="export_to_gcloud_path", action="store",
99 default=None)
jadmanski0afbb632008-06-06 21:10:57 +0000100 options, args = parser.parse_args()
mbligh74fc0462007-11-05 20:24:17 +0000101
jadmanski0afbb632008-06-06 21:10:57 +0000102 # we need a results directory
103 if len(args) == 0:
104 tko_utils.dprint("ERROR: at least one results directory must "
105 "be provided")
106 parser.print_help()
107 sys.exit(1)
mbligh74fc0462007-11-05 20:24:17 +0000108
Shuqian Zhao19e62fb2017-01-09 10:10:14 -0800109 if not options.datastore_creds:
110 gcloud_creds = global_config.global_config.get_config_value(
111 'GCLOUD', 'cidb_datastore_writer_creds', default=None)
112 options.datastore_creds = (site_utils.get_creds_abspath(gcloud_creds)
113 if gcloud_creds else None)
114
115 if not options.export_to_gcloud_path:
116 export_script = 'chromiumos/chromite/bin/export_to_gcloud'
117 # If it is a lab server, the script is under ~chromeos-test/
118 if os.path.exists(os.path.expanduser('~chromeos-test/%s' %
119 export_script)):
120 path = os.path.expanduser('~chromeos-test/%s' % export_script)
121 # If it is a local workstation, it is probably under ~/
122 elif os.path.exists(os.path.expanduser('~/%s' % export_script)):
123 path = os.path.expanduser('~/%s' % export_script)
124 # If it is not found anywhere, the default will be set to None.
125 else:
126 path = None
127 options.export_to_gcloud_path = path
128
jadmanski0afbb632008-06-06 21:10:57 +0000129 # pass the options back
130 return options, args
mbligh74fc0462007-11-05 20:24:17 +0000131
132
mbligh96cf0512008-04-17 15:25:38 +0000133def format_failure_message(jobname, kernel, testname, status, reason):
Fang Deng49822682014-10-21 16:29:22 -0700134 """Format failure message with the given information.
135
136 @param jobname: String representing the job name.
137 @param kernel: String representing the kernel.
138 @param testname: String representing the test name.
139 @param status: String representing the test status.
140 @param reason: String representing the reason.
141
142 @return: Failure message as a string.
143 """
jadmanski0afbb632008-06-06 21:10:57 +0000144 format_string = "%-12s %-20s %-12s %-10s %s"
145 return format_string % (jobname, kernel, testname, status, reason)
mblighb85e6b02006-10-08 17:20:56 +0000146
mblighbb7b8912006-10-08 03:59:02 +0000147
mbligh96cf0512008-04-17 15:25:38 +0000148def mailfailure(jobname, job, message):
Fang Deng49822682014-10-21 16:29:22 -0700149 """Send an email about the failure.
150
151 @param jobname: String representing the job name.
152 @param job: A job object.
153 @param message: The message to mail.
154 """
jadmanski0afbb632008-06-06 21:10:57 +0000155 message_lines = [""]
156 message_lines.append("The following tests FAILED for this job")
157 message_lines.append("http://%s/results/%s" %
158 (socket.gethostname(), jobname))
159 message_lines.append("")
160 message_lines.append(format_failure_message("Job name", "Kernel",
161 "Test name", "FAIL/WARN",
162 "Failure reason"))
163 message_lines.append(format_failure_message("=" * 8, "=" * 6, "=" * 8,
164 "=" * 8, "=" * 14))
165 message_header = "\n".join(message_lines)
mbligh96cf0512008-04-17 15:25:38 +0000166
jadmanski0afbb632008-06-06 21:10:57 +0000167 subject = "AUTOTEST: FAILED tests from job %s" % jobname
168 mail.send("", job.user, "", subject, message_header + message)
mbligh006f2302007-09-13 20:46:46 +0000169
170
Fang Deng9ec66802014-04-28 19:04:33 +0000171def _invalidate_original_tests(orig_job_idx, retry_job_idx):
172 """Retry tests invalidates original tests.
173
174 Whenever a retry job is complete, we want to invalidate the original
175 job's test results, such that the consumers of the tko database
176 (e.g. tko frontend, wmatrix) could figure out which results are the latest.
177
178 When a retry job is parsed, we retrieve the original job's afe_job_id
179 from the retry job's keyvals, which is then converted to tko job_idx and
180 passed into this method as |orig_job_idx|.
181
182 In this method, we are going to invalidate the rows in tko_tests that are
183 associated with the original job by flipping their 'invalid' bit to True.
184 In addition, in tko_tests, we also maintain a pointer from the retry results
185 to the original results, so that later we can always know which rows in
186 tko_tests are retries and which are the corresponding original results.
187 This is done by setting the field 'invalidates_test_idx' of the tests
188 associated with the retry job.
189
190 For example, assume Job(job_idx=105) are retried by Job(job_idx=108), after
191 this method is run, their tko_tests rows will look like:
192 __________________________________________________________________________
193 test_idx| job_idx | test | ... | invalid | invalidates_test_idx
194 10 | 105 | dummy_Fail.Error| ... | 1 | NULL
195 11 | 105 | dummy_Fail.Fail | ... | 1 | NULL
196 ...
197 20 | 108 | dummy_Fail.Error| ... | 0 | 10
198 21 | 108 | dummy_Fail.Fail | ... | 0 | 11
199 __________________________________________________________________________
200 Note the invalid bits of the rows for Job(job_idx=105) are set to '1'.
201 And the 'invalidates_test_idx' fields of the rows for Job(job_idx=108)
202 are set to 10 and 11 (the test_idx of the rows for the original job).
203
204 @param orig_job_idx: An integer representing the original job's
205 tko job_idx. Tests associated with this job will
206 be marked as 'invalid'.
207 @param retry_job_idx: An integer representing the retry job's
208 tko job_idx. The field 'invalidates_test_idx'
209 of the tests associated with this job will be updated.
210
211 """
212 msg = 'orig_job_idx: %s, retry_job_idx: %s' % (orig_job_idx, retry_job_idx)
213 if not orig_job_idx or not retry_job_idx:
214 tko_utils.dprint('ERROR: Could not invalidate tests: ' + msg)
215 # Using django models here makes things easier, but make sure that
216 # before this method is called, all other relevant transactions have been
217 # committed to avoid race condition. In the long run, we might consider
218 # to make the rest of parser use django models.
219 orig_tests = tko_models.Test.objects.filter(job__job_idx=orig_job_idx)
220 retry_tests = tko_models.Test.objects.filter(job__job_idx=retry_job_idx)
221
222 # Invalidate original tests.
223 orig_tests.update(invalid=True)
224
225 # Maintain a dictionary that maps (test, subdir) to original tests.
226 # Note that within the scope of a job, (test, subdir) uniquelly
227 # identifies a test run, but 'test' does not.
228 # In a control file, one could run the same test with different
229 # 'subdir_tag', for example,
230 # job.run_test('dummy_Fail', tag='Error', subdir_tag='subdir_1')
231 # job.run_test('dummy_Fail', tag='Error', subdir_tag='subdir_2')
232 # In tko, we will get
233 # (test='dummy_Fail.Error', subdir='dummy_Fail.Error.subdir_1')
234 # (test='dummy_Fail.Error', subdir='dummy_Fail.Error.subdir_2')
235 invalidated_tests = {(orig_test.test, orig_test.subdir): orig_test
236 for orig_test in orig_tests}
237 for retry in retry_tests:
238 # It is possible that (retry.test, retry.subdir) doesn't exist
239 # in invalidated_tests. This could happen when the original job
240 # didn't run some of its tests. For example, a dut goes offline
241 # since the beginning of the job, in which case invalidated_tests
242 # will only have one entry for 'SERVER_JOB'.
243 orig_test = invalidated_tests.get((retry.test, retry.subdir), None)
244 if orig_test:
245 retry.invalidates_test = orig_test
246 retry.save()
247 tko_utils.dprint('DEBUG: Invalidated tests associated to job: ' + msg)
248
249
Dan Shi4f8c0242017-07-07 15:34:49 -0700250def _throttle_result_size(path):
251 """Limit the total size of test results for the given path.
252
253 @param path: Path of the result directory.
254 """
255 if not result_runner.ENABLE_RESULT_THROTTLING:
256 tko_utils.dprint(
257 'Result throttling is not enabled. Skipping throttling %s' %
258 path)
259 return
260
261 max_result_size_KB = control_data.DEFAULT_MAX_RESULT_SIZE_KB
262 # Client side test saves the test control to file `control`, while server
263 # side test saves the test control to file `control.srv`
264 for control_file in ['control', 'control.srv']:
265 control = os.path.join(path, control_file)
266 try:
267 max_result_size_KB = control_data.parse_control(
268 control, raise_warnings=False).max_result_size_KB
269 # Any value different from the default is considered to be the one
270 # set in the test control file.
271 if max_result_size_KB != control_data.DEFAULT_MAX_RESULT_SIZE_KB:
272 break
273 except IOError as e:
274 tko_utils.dprint(
275 'Failed to access %s. Error: %s\nDetails %s' %
276 (control, e, traceback.format_exc()))
277 except control_data.ControlVariableException as e:
278 tko_utils.dprint(
279 'Failed to parse %s. Error: %s\nDetails %s' %
280 (control, e, traceback.format_exc()))
281
282 try:
283 result_utils.execute(path, max_result_size_KB)
284 except:
285 tko_utils.dprint(
286 'Failed to throttle result size of %s.\nDetails %s' %
287 (path, traceback.format_exc()))
288
289
Michael Tangc89efa72017-08-03 14:27:10 -0700290def export_tko_job_to_file(job, jobname, filename):
291 """Exports the tko job to disk file.
292
293 @param job: database object.
294 @param jobname: the job name as string.
295 @param filename: The path to the results to be parsed.
296 """
297 try:
298 from autotest_lib.tko import job_serializer
299
300 serializer = job_serializer.JobSerializer()
301 serializer.serialize_to_binary(job, jobname, filename)
302 except ImportError:
303 tko_utils.dprint("WARNING: tko_pb2.py doesn't exist. Create by "
304 "compiling tko/tko.proto.")
305
306
Prathmesh Prabhub1241d12018-04-19 18:09:43 -0700307def parse_one(db, pid_file_manager, jobname, path, parse_options):
Fang Deng49822682014-10-21 16:29:22 -0700308 """Parse a single job. Optionally send email on failure.
309
310 @param db: database object.
Prathmesh Prabhub1241d12018-04-19 18:09:43 -0700311 @param pid_file_manager: pidfile.PidFileManager object.
Fang Deng49822682014-10-21 16:29:22 -0700312 @param jobname: the tag used to search for existing job in db,
313 e.g. '1234-chromeos-test/host1'
314 @param path: The path to the results to be parsed.
Aviv Keshet687d2dc2016-10-20 15:41:16 -0700315 @param parse_options: _ParseOptions instance.
jadmanski0afbb632008-06-06 21:10:57 +0000316 """
Aviv Keshet687d2dc2016-10-20 15:41:16 -0700317 reparse = parse_options.reparse
318 mail_on_failure = parse_options.mail_on_failure
Aviv Keshet0b7bab02016-10-20 17:17:36 -0700319 dry_run = parse_options.dry_run
Shuqian Zhao31425d52016-12-07 09:35:03 -0800320 suite_report = parse_options.suite_report
Shuqian Zhao19e62fb2017-01-09 10:10:14 -0800321 datastore_creds = parse_options.datastore_creds
322 export_to_gcloud_path = parse_options.export_to_gcloud_path
Aviv Keshet687d2dc2016-10-20 15:41:16 -0700323
jadmanski0afbb632008-06-06 21:10:57 +0000324 tko_utils.dprint("\nScanning %s (%s)" % (jobname, path))
jadmanski9b6babf2009-04-21 17:57:40 +0000325 old_job_idx = db.find_job(jobname)
Prathmesh Prabhuedac1ee2018-04-18 19:16:34 -0700326 if old_job_idx is not None and not reparse:
327 tko_utils.dprint("! Job is already parsed, done")
328 return
mbligh96cf0512008-04-17 15:25:38 +0000329
jadmanski0afbb632008-06-06 21:10:57 +0000330 # look up the status version
jadmanskidb4f9b52008-12-03 22:52:53 +0000331 job_keyval = models.job.read_keyval(path)
332 status_version = job_keyval.get("status_version", 0)
jadmanski6e8bf752008-05-14 00:17:48 +0000333
Luigi Semenzatoe7064812017-02-03 14:47:59 -0800334 parser = parser_lib.parser(status_version)
jadmanski0afbb632008-06-06 21:10:57 +0000335 job = parser.make_job(path)
Prathmesh Prabhue06c49b2018-04-18 19:01:23 -0700336 tko_utils.dprint("+ Parsing dir=%s, jobname=%s" % (path, jobname))
337 status_log_path = _find_status_log_path(path)
338 if not status_log_path:
jadmanski0afbb632008-06-06 21:10:57 +0000339 tko_utils.dprint("! Unable to parse job, no status file")
340 return
Prathmesh Prabhue06c49b2018-04-18 19:01:23 -0700341 _parse_status_log(parser, job, status_log_path)
jadmanski9b6babf2009-04-21 17:57:40 +0000342
Prathmesh Prabhuedac1ee2018-04-18 19:16:34 -0700343 if old_job_idx is not None:
344 job.job_idx = old_job_idx
345 unmatched_tests = _match_existing_tests(db, job)
Aviv Keshet0b7bab02016-10-20 17:17:36 -0700346 if not dry_run:
Prathmesh Prabhuedac1ee2018-04-18 19:16:34 -0700347 _delete_tests_from_db(db, unmatched_tests)
mbligh96cf0512008-04-17 15:25:38 +0000348
Prathmesh Prabhu30dee862018-04-18 20:24:20 -0700349 job.afe_job_id = tko_utils.get_afe_job_id(jobname)
Prathmesh Prabhu17905882018-04-18 22:09:08 -0700350 job.skylab_task_id = tko_utils.get_skylab_task_id(jobname)
Prathmesh Prabhud25f15a2018-05-03 13:49:58 -0700351 job.afe_parent_job_id = job_keyval.get(constants.PARENT_JOB_ID)
352 job.skylab_parent_task_id = job_keyval.get(constants.PARENT_JOB_ID)
Benny Peakefeb775c2017-02-08 15:14:14 -0800353 job.build = None
354 job.board = None
355 job.build_version = None
356 job.suite = None
357 if job.label:
358 label_info = site_utils.parse_job_name(job.label)
359 if label_info:
360 job.build = label_info.get('build', None)
361 job.build_version = label_info.get('build_version', None)
362 job.board = label_info.get('board', None)
363 job.suite = label_info.get('suite', None)
364
Dan Shi4f8c0242017-07-07 15:34:49 -0700365 result_utils_lib.LOG = tko_utils.dprint
366 _throttle_result_size(path)
367
Dan Shiffd5b822017-07-14 11:16:23 -0700368 # Record test result size to job_keyvals
Dan Shi11e35062017-11-03 10:09:05 -0700369 start_time = time.time()
Dan Shiffd5b822017-07-14 11:16:23 -0700370 result_size_info = site_utils.collect_result_sizes(
371 path, log=tko_utils.dprint)
Dan Shi11e35062017-11-03 10:09:05 -0700372 tko_utils.dprint('Finished collecting result sizes after %s seconds' %
373 (time.time()-start_time))
Dan Shiffd5b822017-07-14 11:16:23 -0700374 job.keyval_dict.update(result_size_info.__dict__)
375
Dan Shiffd5b822017-07-14 11:16:23 -0700376 # TODO(dshi): Update sizes with sponge_invocation.xml and throttle it.
Dan Shi96c3bdc2017-05-24 11:34:30 -0700377
jadmanski0afbb632008-06-06 21:10:57 +0000378 # check for failures
379 message_lines = [""]
Simran Basi1e10e922015-04-16 15:09:56 -0700380 job_successful = True
jadmanski0afbb632008-06-06 21:10:57 +0000381 for test in job.tests:
382 if not test.subdir:
383 continue
Sida Liuafe550a2017-09-03 19:03:40 -0700384 tko_utils.dprint("* testname, subdir, status, reason: %s %s %s %s"
385 % (test.testname, test.subdir, test.status,
386 test.reason))
Simran Basi1e10e922015-04-16 15:09:56 -0700387 if test.status != 'GOOD':
388 job_successful = False
Prathmesh Prabhub1241d12018-04-19 18:09:43 -0700389 pid_file_manager.num_tests_failed += 1
jadmanski0afbb632008-06-06 21:10:57 +0000390 message_lines.append(format_failure_message(
391 jobname, test.kernel.base, test.subdir,
392 test.status, test.reason))
Simran Basi59ca5ac2016-09-22 16:57:56 -0700393 try:
394 message = "\n".join(message_lines)
Simran Basi1e10e922015-04-16 15:09:56 -0700395
Aviv Keshet0b7bab02016-10-20 17:17:36 -0700396 if not dry_run:
397 # send out a email report of failure
398 if len(message) > 2 and mail_on_failure:
399 tko_utils.dprint("Sending email report of failure on %s to %s"
400 % (jobname, job.user))
401 mailfailure(jobname, job, message)
mbligh96cf0512008-04-17 15:25:38 +0000402
Dan Shie5d063f2017-09-29 15:37:34 -0700403 # Upload perf values to the perf dashboard, if applicable.
404 for test in job.tests:
405 perf_uploader.upload_test(job, test, jobname)
406
407 # Upload job details to Sponge.
408 sponge_url = sponge_utils.upload_results(job, log=tko_utils.dprint)
409 if sponge_url:
410 job.keyval_dict['sponge_url'] = sponge_url
411
Prathmesh Prabhu30dee862018-04-18 20:24:20 -0700412 _write_job_to_db(db, jobname, job)
mbligh96cf0512008-04-17 15:25:38 +0000413
Dan Shib0af6212017-07-17 14:40:02 -0700414 # Verify the job data is written to the database.
415 if job.tests:
Prathmesh Prabhuc2a8a6a2018-04-19 16:23:32 -0700416 tests_in_db = db.find_tests(job.job_idx)
Dan Shib0af6212017-07-17 14:40:02 -0700417 tests_in_db_count = len(tests_in_db) if tests_in_db else 0
418 if tests_in_db_count != len(job.tests):
419 tko_utils.dprint(
420 'Failed to find enough tests for job_idx: %d. The '
421 'job should have %d tests, only found %d tests.' %
Prathmesh Prabhuc2a8a6a2018-04-19 16:23:32 -0700422 (job.job_idx, len(job.tests), tests_in_db_count))
Dan Shib0af6212017-07-17 14:40:02 -0700423 metrics.Counter(
424 'chromeos/autotest/result/db_save_failure',
425 description='The number of times parse failed to '
426 'save job to TKO database.').increment()
427
Aviv Keshet0b7bab02016-10-20 17:17:36 -0700428 # Although the cursor has autocommit, we still need to force it to
429 # commit existing changes before we can use django models, otherwise
430 # it will go into deadlock when django models try to start a new
431 # trasaction while the current one has not finished yet.
432 db.commit()
Dennis Jeffreyf9bef6c2013-08-05 11:01:27 -0700433
Aviv Keshet0b7bab02016-10-20 17:17:36 -0700434 # Handle retry job.
435 orig_afe_job_id = job_keyval.get(constants.RETRY_ORIGINAL_JOB_ID,
436 None)
437 if orig_afe_job_id:
438 orig_job_idx = tko_models.Job.objects.get(
439 afe_job_id=orig_afe_job_id).job_idx
Prathmesh Prabhuc2a8a6a2018-04-19 16:23:32 -0700440 _invalidate_original_tests(orig_job_idx, job.job_idx)
Simran Basi59ca5ac2016-09-22 16:57:56 -0700441 except Exception as e:
Simran Basi59ca5ac2016-09-22 16:57:56 -0700442 tko_utils.dprint("Hit exception while uploading to tko db:\n%s" %
443 traceback.format_exc())
Simran Basi59ca5ac2016-09-22 16:57:56 -0700444 raise e
Fang Deng9ec66802014-04-28 19:04:33 +0000445
jamesren7a522042010-06-10 22:53:55 +0000446 # Serializing job into a binary file
Michael Tangc89efa72017-08-03 14:27:10 -0700447 export_tko_to_file = global_config.global_config.get_config_value(
448 'AUTOSERV', 'export_tko_job_to_file', type=bool, default=False)
Michael Tang8303a372017-08-11 11:03:50 -0700449
450 binary_file_name = os.path.join(path, "job.serialize")
Michael Tangc89efa72017-08-03 14:27:10 -0700451 if export_tko_to_file:
Michael Tangc89efa72017-08-03 14:27:10 -0700452 export_tko_job_to_file(job, jobname, binary_file_name)
jamesren4826cc42010-06-15 20:33:22 +0000453
Aviv Keshet0b7bab02016-10-20 17:17:36 -0700454 if not dry_run:
455 db.commit()
mbligh26b992b2008-02-19 15:46:21 +0000456
Shuqian Zhao31425d52016-12-07 09:35:03 -0800457 # Generate a suite report.
458 # Check whether this is a suite job, a suite job will be a hostless job, its
459 # jobname will be <JOB_ID>-<USERNAME>/hostless, the suite field will not be
Shuqian Zhaoa42bba12017-03-10 14:20:11 -0800460 # NULL. Only generate timeline report when datastore_parent_key is given.
Shuqian Zhao31425d52016-12-07 09:35:03 -0800461 try:
Shuqian Zhaoa42bba12017-03-10 14:20:11 -0800462 datastore_parent_key = job_keyval.get('datastore_parent_key', None)
Ningning Xiabbba11f2018-03-16 13:35:24 -0700463 provision_job_id = job_keyval.get('provision_job_id', None)
Shuqian Zhaoa42bba12017-03-10 14:20:11 -0800464 if (suite_report and jobname.endswith('/hostless')
Prathmesh Prabhu6d4d8b62018-04-18 18:24:54 -0700465 and job.suite and datastore_parent_key):
Shuqian Zhao31425d52016-12-07 09:35:03 -0800466 tko_utils.dprint('Start dumping suite timing report...')
467 timing_log = os.path.join(path, 'suite_timing.log')
468 dump_cmd = ("%s/site_utils/dump_suite_report.py %s "
469 "--output='%s' --debug" %
Prathmesh Prabhu6d4d8b62018-04-18 18:24:54 -0700470 (common.autotest_dir, job.afe_job_id,
Shuqian Zhao31425d52016-12-07 09:35:03 -0800471 timing_log))
Ningning Xiabbba11f2018-03-16 13:35:24 -0700472
473 if provision_job_id is not None:
474 dump_cmd += " --provision_job_id=%d" % int(provision_job_id)
475
Shuqian Zhao31425d52016-12-07 09:35:03 -0800476 subprocess.check_output(dump_cmd, shell=True)
477 tko_utils.dprint('Successfully finish dumping suite timing report')
478
Shuqian Zhao19e62fb2017-01-09 10:10:14 -0800479 if (datastore_creds and export_to_gcloud_path
480 and os.path.exists(export_to_gcloud_path)):
Shuqian Zhaoa42bba12017-03-10 14:20:11 -0800481 upload_cmd = [export_to_gcloud_path, datastore_creds,
482 timing_log, '--parent_key',
Shuqian Zhao4ff74732017-03-30 16:20:10 -0700483 datastore_parent_key]
Shuqian Zhao19e62fb2017-01-09 10:10:14 -0800484 tko_utils.dprint('Start exporting timeline report to gcloud')
Shuqian Zhaoa42bba12017-03-10 14:20:11 -0800485 subprocess.check_output(upload_cmd)
Shuqian Zhao19e62fb2017-01-09 10:10:14 -0800486 tko_utils.dprint('Successfully export timeline report to '
487 'gcloud')
488 else:
489 tko_utils.dprint('DEBUG: skip exporting suite timeline to '
490 'gcloud, because either gcloud creds or '
491 'export_to_gcloud script is not found.')
Shuqian Zhao31425d52016-12-07 09:35:03 -0800492 except Exception as e:
Shuqian Zhao19e62fb2017-01-09 10:10:14 -0800493 tko_utils.dprint("WARNING: fail to dump/export suite report. "
494 "Error:\n%s" % e)
Shuqian Zhao31425d52016-12-07 09:35:03 -0800495
Dan Shi5f626332016-01-27 15:25:58 -0800496 # Mark GS_OFFLOADER_NO_OFFLOAD in gs_offloader_instructions at the end of
497 # the function, so any failure, e.g., db connection error, will stop
498 # gs_offloader_instructions being updated, and logs can be uploaded for
499 # troubleshooting.
500 if job_successful:
501 # Check if we should not offload this test's results.
502 if job_keyval.get(constants.JOB_OFFLOAD_FAILURES_KEY, False):
503 # Update the gs_offloader_instructions json file.
504 gs_instructions_file = os.path.join(
505 path, constants.GS_OFFLOADER_INSTRUCTIONS)
506 gs_offloader_instructions = {}
507 if os.path.exists(gs_instructions_file):
508 with open(gs_instructions_file, 'r') as f:
509 gs_offloader_instructions = json.load(f)
510
511 gs_offloader_instructions[constants.GS_OFFLOADER_NO_OFFLOAD] = True
512 with open(gs_instructions_file, 'w') as f:
513 json.dump(gs_offloader_instructions, f)
514
515
Prathmesh Prabhu30dee862018-04-18 20:24:20 -0700516def _write_job_to_db(db, jobname, job):
Prathmesh Prabhu8957a342018-04-18 18:29:09 -0700517 """Write all TKO data associated with a job to DB.
518
519 This updates the job object as a side effect.
520
521 @param db: tko.db.db_sql object.
522 @param jobname: Name of the job to write.
523 @param job: tko.models.job object.
524 """
525 db.insert_or_update_machine(job)
Prathmesh Prabhu30dee862018-04-18 20:24:20 -0700526 db.insert_job(jobname, job)
Prathmesh Prabhu17905882018-04-18 22:09:08 -0700527 db.insert_or_update_task_reference(
528 job,
529 'skylab' if tko_utils.is_skylab_task(jobname) else 'afe',
530 )
Prathmesh Prabhu8957a342018-04-18 18:29:09 -0700531 db.update_job_keyvals(job)
532 for test in job.tests:
533 db.insert_test(job, test)
534
535
Prathmesh Prabhu42a2bb42018-04-18 18:56:16 -0700536def _find_status_log_path(path):
537 if os.path.exists(os.path.join(path, "status.log")):
538 return os.path.join(path, "status.log")
539 if os.path.exists(os.path.join(path, "status")):
540 return os.path.join(path, "status")
541 return ""
542
543
Prathmesh Prabhue06c49b2018-04-18 19:01:23 -0700544def _parse_status_log(parser, job, status_log_path):
545 status_lines = open(status_log_path).readlines()
546 parser.start(job)
547 tests = parser.end(status_lines)
548
549 # parser.end can return the same object multiple times, so filter out dups
550 job.tests = []
551 already_added = set()
552 for test in tests:
553 if test not in already_added:
554 already_added.add(test)
555 job.tests.append(test)
556
557
Prathmesh Prabhuedac1ee2018-04-18 19:16:34 -0700558def _match_existing_tests(db, job):
559 """Find entries in the DB corresponding to the job's tests, update job.
560
561 @return: Any unmatched tests in the db.
562 """
563 old_job_idx = job.job_idx
564 raw_old_tests = db.select("test_idx,subdir,test", "tko_tests",
565 {"job_idx": old_job_idx})
566 if raw_old_tests:
567 old_tests = dict(((test, subdir), test_idx)
568 for test_idx, subdir, test in raw_old_tests)
569 else:
570 old_tests = {}
571
572 for test in job.tests:
573 test_idx = old_tests.pop((test.testname, test.subdir), None)
574 if test_idx is not None:
575 test.test_idx = test_idx
576 else:
577 tko_utils.dprint("! Reparse returned new test "
578 "testname=%r subdir=%r" %
579 (test.testname, test.subdir))
580 return old_tests
581
582
583def _delete_tests_from_db(db, tests):
584 for test_idx in tests.itervalues():
585 where = {'test_idx' : test_idx}
586 db.delete('tko_iteration_result', where)
587 db.delete('tko_iteration_perf_value', where)
588 db.delete('tko_iteration_attributes', where)
589 db.delete('tko_test_attributes', where)
590 db.delete('tko_test_labels_tests', {'test_id': test_idx})
591 db.delete('tko_tests', where)
592
593
jadmanski8e9c2572008-11-11 00:29:02 +0000594def _get_job_subdirs(path):
595 """
596 Returns a list of job subdirectories at path. Returns None if the test
597 is itself a job directory. Does not recurse into the subdirs.
598 """
599 # if there's a .machines file, use it to get the subdirs
jadmanski0afbb632008-06-06 21:10:57 +0000600 machine_list = os.path.join(path, ".machines")
601 if os.path.exists(machine_list):
jadmanski42fbd072009-01-30 15:07:05 +0000602 subdirs = set(line.strip() for line in file(machine_list))
603 existing_subdirs = set(subdir for subdir in subdirs
604 if os.path.exists(os.path.join(path, subdir)))
605 if len(existing_subdirs) != 0:
606 return existing_subdirs
jadmanski8e9c2572008-11-11 00:29:02 +0000607
608 # if this dir contains ONLY subdirectories, return them
609 contents = set(os.listdir(path))
610 contents.discard(".parse.lock")
611 subdirs = set(sub for sub in contents if
612 os.path.isdir(os.path.join(path, sub)))
613 if len(contents) == len(subdirs) != 0:
614 return subdirs
615
616 # this is a job directory, or something else we don't understand
617 return None
618
619
Prathmesh Prabhub1241d12018-04-19 18:09:43 -0700620def parse_leaf_path(db, pid_file_manager, path, level, parse_options):
Fang Deng49822682014-10-21 16:29:22 -0700621 """Parse a leaf path.
622
623 @param db: database handle.
Prathmesh Prabhub1241d12018-04-19 18:09:43 -0700624 @param pid_file_manager: pidfile.PidFileManager object.
Fang Deng49822682014-10-21 16:29:22 -0700625 @param path: The path to the results to be parsed.
626 @param level: Integer, level of subdirectories to include in the job name.
Aviv Keshet687d2dc2016-10-20 15:41:16 -0700627 @param parse_options: _ParseOptions instance.
Fang Deng49822682014-10-21 16:29:22 -0700628
629 @returns: The job name of the parsed job, e.g. '123-chromeos-test/host1'
630 """
mbligha48eeb22009-03-11 16:44:43 +0000631 job_elements = path.split("/")[-level:]
632 jobname = "/".join(job_elements)
633 try:
Prathmesh Prabhub1241d12018-04-19 18:09:43 -0700634 db.run_with_retry(parse_one, db, pid_file_manager, jobname, path,
635 parse_options)
Simran Basi8de306c2016-12-21 12:04:21 -0800636 except Exception as e:
637 tko_utils.dprint("Error parsing leaf path: %s\nException:\n%s\n%s" %
638 (path, e, traceback.format_exc()))
Fang Deng49822682014-10-21 16:29:22 -0700639 return jobname
mbligha48eeb22009-03-11 16:44:43 +0000640
641
Prathmesh Prabhub1241d12018-04-19 18:09:43 -0700642def parse_path(db, pid_file_manager, path, level, parse_options):
Fang Deng49822682014-10-21 16:29:22 -0700643 """Parse a path
644
645 @param db: database handle.
Prathmesh Prabhub1241d12018-04-19 18:09:43 -0700646 @param pid_file_manager: pidfile.PidFileManager object.
Fang Deng49822682014-10-21 16:29:22 -0700647 @param path: The path to the results to be parsed.
648 @param level: Integer, level of subdirectories to include in the job name.
Aviv Keshet687d2dc2016-10-20 15:41:16 -0700649 @param parse_options: _ParseOptions instance.
Fang Deng49822682014-10-21 16:29:22 -0700650
651 @returns: A set of job names of the parsed jobs.
652 set(['123-chromeos-test/host1', '123-chromeos-test/host2'])
653 """
654 processed_jobs = set()
jadmanski8e9c2572008-11-11 00:29:02 +0000655 job_subdirs = _get_job_subdirs(path)
656 if job_subdirs is not None:
mbligha48eeb22009-03-11 16:44:43 +0000657 # parse status.log in current directory, if it exists. multi-machine
658 # synchronous server side tests record output in this directory. without
659 # this check, we do not parse these results.
660 if os.path.exists(os.path.join(path, 'status.log')):
Prathmesh Prabhub1241d12018-04-19 18:09:43 -0700661 new_job = parse_leaf_path(db, pid_file_manager, path, level,
662 parse_options)
Fang Deng49822682014-10-21 16:29:22 -0700663 processed_jobs.add(new_job)
jadmanski0afbb632008-06-06 21:10:57 +0000664 # multi-machine job
jadmanski8e9c2572008-11-11 00:29:02 +0000665 for subdir in job_subdirs:
666 jobpath = os.path.join(path, subdir)
Prathmesh Prabhub1241d12018-04-19 18:09:43 -0700667 new_jobs = parse_path(db, pid_file_manager, jobpath, level + 1,
668 parse_options)
Fang Deng49822682014-10-21 16:29:22 -0700669 processed_jobs.update(new_jobs)
jadmanski0afbb632008-06-06 21:10:57 +0000670 else:
671 # single machine job
Prathmesh Prabhub1241d12018-04-19 18:09:43 -0700672 new_job = parse_leaf_path(db, pid_file_manager, path, level,
673 parse_options)
Fang Deng49822682014-10-21 16:29:22 -0700674 processed_jobs.add(new_job)
675 return processed_jobs
676
677
Prathmesh Prabhu3e319da2017-08-30 19:13:03 -0700678def _detach_from_parent_process():
679 """Allow reparenting the parse process away from caller.
680
681 When monitor_db is run via upstart, restarting the job sends SIGTERM to
682 the whole process group. This makes us immune from that.
683 """
684 if os.getpid() != os.getpgid(0):
685 os.setsid()
mblighbb7b8912006-10-08 03:59:02 +0000686
mbligh96cf0512008-04-17 15:25:38 +0000687def main():
Fang Deng49822682014-10-21 16:29:22 -0700688 """Main entrance."""
689 start_time = datetime.datetime.now()
690 # Record the processed jobs so that
691 # we can send the duration of parsing to metadata db.
692 processed_jobs = set()
693
jadmanski0afbb632008-06-06 21:10:57 +0000694 options, args = parse_args()
Prathmesh Prabhu3e319da2017-08-30 19:13:03 -0700695
696 if options.detach:
697 _detach_from_parent_process()
698
Aviv Keshet0b7bab02016-10-20 17:17:36 -0700699 parse_options = _ParseOptions(options.reparse, options.mailit,
Shuqian Zhao19e62fb2017-01-09 10:10:14 -0800700 options.dry_run, options.suite_report,
701 options.datastore_creds,
702 options.export_to_gcloud_path)
jadmanski0afbb632008-06-06 21:10:57 +0000703 results_dir = os.path.abspath(args[0])
704 assert os.path.exists(results_dir)
mbligh96cf0512008-04-17 15:25:38 +0000705
Dan Shibea26912017-07-21 12:26:10 -0700706 site_utils.SetupTsMonGlobalState('tko_parse', indirect=False,
707 short_lived=True)
708
jadmanskid5ab8c52008-12-03 16:27:07 +0000709 pid_file_manager = pidfile.PidFileManager("parser", results_dir)
mbligh96cf0512008-04-17 15:25:38 +0000710
jadmanskid5ab8c52008-12-03 16:27:07 +0000711 if options.write_pidfile:
712 pid_file_manager.open_file()
mbligh96cf0512008-04-17 15:25:38 +0000713
jadmanskid5ab8c52008-12-03 16:27:07 +0000714 try:
715 # build up the list of job dirs to parse
716 if options.singledir:
717 jobs_list = [results_dir]
718 else:
719 jobs_list = [os.path.join(results_dir, subdir)
720 for subdir in os.listdir(results_dir)]
721
722 # build up the database
723 db = tko_db.db(autocommit=False, host=options.db_host,
724 user=options.db_user, password=options.db_pass,
725 database=options.db_name)
726
727 # parse all the jobs
728 for path in jobs_list:
729 lockfile = open(os.path.join(path, ".parse.lock"), "w")
730 flags = fcntl.LOCK_EX
731 if options.noblock:
mblighdb18b0e2009-01-30 00:34:32 +0000732 flags |= fcntl.LOCK_NB
jadmanskid5ab8c52008-12-03 16:27:07 +0000733 try:
734 fcntl.flock(lockfile, flags)
735 except IOError, e:
mblighdb18b0e2009-01-30 00:34:32 +0000736 # lock is not available and nonblock has been requested
jadmanskid5ab8c52008-12-03 16:27:07 +0000737 if e.errno == errno.EWOULDBLOCK:
738 lockfile.close()
739 continue
740 else:
741 raise # something unexpected happened
742 try:
Prathmesh Prabhub1241d12018-04-19 18:09:43 -0700743 new_jobs = parse_path(db, pid_file_manager, path, options.level,
744 parse_options)
Fang Deng49822682014-10-21 16:29:22 -0700745 processed_jobs.update(new_jobs)
mbligh9e936402009-05-13 20:42:17 +0000746
jadmanskid5ab8c52008-12-03 16:27:07 +0000747 finally:
748 fcntl.flock(lockfile, fcntl.LOCK_UN)
jadmanski0afbb632008-06-06 21:10:57 +0000749 lockfile.close()
mblighe97e0e62009-05-21 01:41:58 +0000750
Dan Shib7a36ea2017-02-28 21:52:20 -0800751 except Exception as e:
jadmanskid5ab8c52008-12-03 16:27:07 +0000752 pid_file_manager.close_file(1)
753 raise
754 else:
755 pid_file_manager.close_file(0)
Dan Shibea26912017-07-21 12:26:10 -0700756 finally:
757 metrics.Flush()
Fang Deng49822682014-10-21 16:29:22 -0700758 duration_secs = (datetime.datetime.now() - start_time).total_seconds()
mbligh71d340d2008-03-05 15:51:16 +0000759
mbligh532cb272007-11-26 18:54:20 +0000760
mbligh96cf0512008-04-17 15:25:38 +0000761if __name__ == "__main__":
jadmanski0afbb632008-06-06 21:10:57 +0000762 main()