blob: 3ebd905b414d5081b4a2cfd05c6d8cd8559f8063 [file] [log] [blame]
Chris Masone24b80f12012-02-14 14:18:01 -08001#!/usr/bin/python
2#
3# Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
4# Use of this source code is governed by a BSD-style license that can be
5# found in the LICENSE file.
6
7"""Tool for running suites of tests and waiting for completion.
8
9The desired test suite will be scheduled with autotest, and then
10this tool will block until the job is complete, printing a summary
11at the end. Error conditions result in exceptions.
12
13This is intended for use only with Chrome OS test suits that leverage the
14dynamic suite infrastructure in server/cros/dynamic_suite.py.
15"""
16
Prashanth B923ca262014-03-14 12:36:29 -070017import datetime as datetime_base
beeps6f02d192013-03-22 13:15:49 -070018import getpass, hashlib, logging, optparse, os, re, sys, time
Chris Masonecfa7efc2012-09-06 16:00:07 -070019from datetime import datetime
20
Chris Masone24b80f12012-02-14 14:18:01 -080021import common
Chris Masonecfa7efc2012-09-06 16:00:07 -070022
J. Richard Barnette3cbd76b2013-11-27 12:11:25 -080023from autotest_lib.client.common_lib import global_config, enum
24from autotest_lib.client.common_lib import priorities
Prashanth B923ca262014-03-14 12:36:29 -070025from autotest_lib.frontend.afe.json_rpc import proxy
J. Richard Barnette3cbd76b2013-11-27 12:11:25 -080026from autotest_lib.server import utils
Chris Masone44e4d6c2012-08-15 14:25:53 -070027from autotest_lib.server.cros.dynamic_suite import constants
Chris Masoneb4935552012-08-14 12:05:54 -070028from autotest_lib.server.cros.dynamic_suite import frontend_wrappers
29from autotest_lib.server.cros.dynamic_suite import job_status
Prashanth B923ca262014-03-14 12:36:29 -070030from autotest_lib.server.cros.dynamic_suite import reporting_utils
J. Richard Barnettee7b98bb2013-08-21 16:34:16 -070031from autotest_lib.server.cros.dynamic_suite import tools
beeps6f02d192013-03-22 13:15:49 -070032from autotest_lib.site_utils.graphite import stats
Prashanth B923ca262014-03-14 12:36:29 -070033from autotest_lib.site_utils import diagnosis_utils
Chris Masone24b80f12012-02-14 14:18:01 -080034
Chris Masone1120cdf2012-02-27 17:35:07 -080035CONFIG = global_config.global_config
36
Simran Basi22aa9fe2012-12-07 16:37:09 -080037# Return code that will be sent back to autotest_rpc_server.py
Aviv Keshet1480c4a2013-03-21 16:38:31 -070038RETURN_CODES = enum.Enum('OK', 'ERROR', 'WARNING')
Simran Basi22aa9fe2012-12-07 16:37:09 -080039
Chris Masonedfa0beba2012-03-19 11:41:47 -070040
Scott Zawalski94457b72012-07-02 18:45:07 -040041def setup_logging(logfile=None):
42 """Setup basic logging with all logging info stripped.
43
44 Calls to logging will only show the message. No severity is logged.
45
46 @param logfile: If specified dump output to a file as well.
47 """
Dan Shi8a2dddd2013-08-09 12:58:44 -070048 # Remove all existing handlers. client/common_lib/logging_config adds
49 # a StreamHandler to logger when modules are imported, e.g.,
50 # autotest_lib.client.bin.utils. A new StreamHandler will be added here to
51 # log only messages, not severity.
52 logging.getLogger().handlers = []
53
Scott Zawalski94457b72012-07-02 18:45:07 -040054 screen_handler = logging.StreamHandler()
55 screen_handler.setFormatter(logging.Formatter('%(message)s'))
56 logging.getLogger().addHandler(screen_handler)
57 logging.getLogger().setLevel(logging.INFO)
58 if logfile:
59 file_handler = logging.FileHandler(logfile)
60 file_handler.setLevel(logging.DEBUG)
61 logging.getLogger().addHandler(file_handler)
Chris Masonedfa0beba2012-03-19 11:41:47 -070062
63
Chris Masone24b80f12012-02-14 14:18:01 -080064def parse_options():
Aviv Keshet1480c4a2013-03-21 16:38:31 -070065 #pylint: disable-msg=C0111
Zdenek Behan77290c32012-06-26 17:39:47 +020066 usage = "usage: %prog [options]"
Chris Masone24b80f12012-02-14 14:18:01 -080067 parser = optparse.OptionParser(usage=usage)
68 parser.add_option("-b", "--board", dest="board")
69 parser.add_option("-i", "--build", dest="build")
Chris Masone359c0fd2012-03-13 15:18:59 -070070 # This should just be a boolean flag, but the autotest "proxy" code
71 # can't handle flags that don't take arguments.
Alex Millerab33ddb2012-10-03 12:56:02 -070072 parser.add_option("-n", "--no_wait", dest="no_wait", default="False",
73 help='Must pass "True" or "False" if used.')
Alex Miller0032e932013-10-23 12:52:58 -070074 # If you really want no pool, --pool="" will do it. USE WITH CARE.
75 parser.add_option("-p", "--pool", dest="pool", default="suites")
Chris Masone24b80f12012-02-14 14:18:01 -080076 parser.add_option("-s", "--suite_name", dest="name")
Simran Basi25effe32013-11-26 13:02:11 -080077 parser.add_option("-a", "--afe_timeout_mins", dest="afe_timeout_mins",
78 default=30)
79 parser.add_option("-t", "--timeout_mins", dest="timeout_mins",
80 default=1440)
Chris Masone8ac66712012-02-15 14:21:02 -080081 parser.add_option("-d", "--delay_sec", dest="delay_sec", default=10)
Chris Masone986459e2012-04-11 11:36:48 -070082 parser.add_option("-m", "--mock_job_id", dest="mock_job_id",
83 help="Skips running suite; creates report for given ID.")
Alex Miller05a2fff2012-09-10 10:14:34 -070084 parser.add_option("-u", "--num", dest="num", type="int", default=None,
Chris Masone8906ab12012-07-23 15:37:56 -070085 help="Run on at most NUM machines.")
Alex Millerf43d0eb2012-10-01 13:43:13 -070086 # Same boolean flag issue applies here.
Alex Millerab33ddb2012-10-03 12:56:02 -070087 parser.add_option("-f", "--file_bugs", dest="file_bugs", default='False',
88 help='File bugs on test failures. Must pass "True" or '
89 '"False" if used.')
Dan Shia02181f2013-01-29 14:03:32 -080090 parser.add_option("-l", "--bypass_labstatus", dest="bypass_labstatus",
91 action="store_true", help='Bypass lab status check.')
Alex Miller88762a82013-09-04 15:41:28 -070092 # We allow either a number or a string for the priority. This way, if you
93 # know what you're doing, one can specify a custom priority level between
94 # other levels.
95 parser.add_option("-r", "--priority", dest="priority",
96 default=priorities.Priority.DEFAULT,
97 action="store", help="Priority of suite")
Aviv Keshet7cd12312013-07-25 10:25:55 -070098 parser.add_option("--suite_args", dest="suite_args",
99 default=None, action="store",
100 help="Argument string for suite control file.")
Alex Millerf43d0eb2012-10-01 13:43:13 -0700101
Chris Masone24b80f12012-02-14 14:18:01 -0800102 options, args = parser.parse_args()
103 return parser, options, args
104
105
106def get_pretty_status(status):
Aviv Keshet1480c4a2013-03-21 16:38:31 -0700107 """
108 Converts a status string into a pretty-for-printing string.
109
110 @param status: Status to convert.
111
112 @return: Returns pretty string.
113 GOOD -> [ PASSED ]
114 TEST_NA -> [ INFO ]
115 other -> [ FAILED ]
116 """
Chris Masone24b80f12012-02-14 14:18:01 -0800117 if status == 'GOOD':
118 return '[ PASSED ]'
Chris Masone8906ab12012-07-23 15:37:56 -0700119 elif status == 'TEST_NA':
120 return '[ INFO ]'
Chris Masone24b80f12012-02-14 14:18:01 -0800121 return '[ FAILED ]'
122
Zdenek Behan150fbd62012-04-06 17:20:01 +0200123def is_fail_status(status):
Aviv Keshet1480c4a2013-03-21 16:38:31 -0700124 """
125 Check if the given status corresponds to a failure.
126
127 @param status: The status to check. (string)
128
129 @return: True if status is FAIL or ERROR. False otherwise.
130 """
Zdenek Behan150fbd62012-04-06 17:20:01 +0200131 # All the statuses tests can have when they fail.
beeps619189a2013-04-10 20:56:09 -0700132 if status in ['FAIL', 'ERROR', 'ABORT']:
Zdenek Behan150fbd62012-04-06 17:20:01 +0200133 return True
134 return False
135
Chris Masone24b80f12012-02-14 14:18:01 -0800136
beeps619189a2013-04-10 20:56:09 -0700137def get_view_info(suite_job_id, view, build, suite):
Zdenek Behan150fbd62012-04-06 17:20:01 +0200138 """
139 Parse a view for the slave job name and job_id.
140
141 @param suite_job_id: The job id of our master suite job.
142 @param view: Test result view.
beeps619189a2013-04-10 20:56:09 -0700143 @param build: build passed in via the -b option.
144 eg: lumpy-release/R28-3947.0.0
145 @param suite: suite passed in via the -s option.
146 eg: dummy
147 @return A tuple job_name, experimental, name of the slave test run
148 described by view. eg:
149 experimental_dummy_Pass fails: (1130-owner, True, dummy_Pass)
150 experimental_dummy_Pass aborts: (1130-owner, True,
151 experimental_dummy_Pass)
152 dummy_Fail: (1130-owner, False, dummy_Fail.Error)
Zdenek Behan150fbd62012-04-06 17:20:01 +0200153 """
154 # By default, we are the main suite job since there is no
155 # keyval entry for our job_name.
156 job_name = '%s-%s' % (suite_job_id, getpass.getuser())
157 experimental = False
beeps619189a2013-04-10 20:56:09 -0700158 test_name = ''
Dan Shid1521802013-05-24 13:08:37 -0700159 # raw test name is the test_name from tko status view. tko_job_keyvals may
160 # have a record of the hash of this name mapping to job_id-owner, which can
161 # be used to reference the test to its job url. The change is made to
162 # support tests in different jobs within a suite that shares the same test
163 # class, e.g., AU suite.
164 raw_test_name = view['test_name']
Zdenek Behan150fbd62012-04-06 17:20:01 +0200165 if 'job_keyvals' in view:
beeps619189a2013-04-10 20:56:09 -0700166 # For a test invocation like:
167 # NAME = "dummy_Fail"
168 # job.run_test('dummy_Fail', tag='Error', to_throw='TestError')
169 # we will:
170 # Record a keyval of the jobs test_name field: dummy_Fail
171 # On success, yield a tko status with the tagged name:
172 # dummy_Fail.Error
173 # On abort, yield a status (not from tko) with the job name:
174 # /build/suite/dummy_Fail.Error
175 # Note the last 2 options include the tag. The tag is seperated
176 # from the rest of the name with a '.'. The tag or test name can
177 # also include a /, and we must isolate the tag before we compare it
178 # to the hashed keyval. Based on this we have the following cases:
179 # 1. Regular test failure with or without a tag '.': std_job_name is
180 # set to the view test_name, after removing the tag.
181 # 2. Regular test Aborts: we know that dynamic_suite inserted a name
182 # like build/suite/test.name (eg:
183 # lumpy-release/R28-3947.0.0/dummy/dummy_Fail.Error), so we
184 # intersect the build/suite/ string we already have with the
185 # test_name in the view. The name of the aborted test is
186 # instrumental in generating the job_name, which is used in
187 # creating a link to the logs.
188 # 3. Experimental tests, Aborts and Failures: The test view
189 # corresponding to the afe_job_id of the suite job contains
190 # stubs for each test in this suite. The names of these jobs
191 # will contain an experimental prefix if they were aborted;
192 # If they failed the same names will not contain an experimental
193 # prefix but we would have hashed the name with a prefix. Eg:
194 # Test name = experimental_pass
195 # keyval contains: hash(experimental_pass)
196 # Fail/Pass view['test_name'] = pass
197 # Abort view['test_name'] = board/build/experimental_pass
198 # So we need to add the experimental prefix only if the test was
199 # aborted. Everything else is the same as [2].
200 # 4. Experimental server job failures: eg verify passes, something on
201 # the DUT crashes, the experimental server job fails to ssh in. We
202 # need to manually set the experimental flag in this case because the
203 # server job name isn't recorded in the keyvals. For a normal suite
204 # the views will contain: SERVER_JOB, try_new_image, test_name. i.e
205 # the test server jobs should be handled transparently and only the
206 # suite server job should appear in the view. If a server job fails
207 # (for an experimental test or otherwise) we insert the server job
208 # entry into the tko database instead. Put more generally we insert
209 # the last stage we knew about into the db record associated with
210 # that suites afe_job_id. This could lead to a view containing:
211 # SERVER_JOB, try_new_image,
212 # lumpy-release/R28-4008.0.0/bvt/experimental_pass_SERVER_JOB.
Alex Miller9a1987a2013-08-21 15:51:16 -0700213 # Neither of these operations will stomp on a pristine string.
Dan Shi605f7642013-11-04 16:32:54 -0800214 test_name = tools.get_test_name(build, suite, view['test_name'])
Alex Miller9a1987a2013-08-21 15:51:16 -0700215 std_job_name = test_name.split('.')[0]
Chris Masone11aae452012-05-21 16:08:39 -0700216
beeps619189a2013-04-10 20:56:09 -0700217 if (job_status.view_is_for_infrastructure_fail(view) and
218 std_job_name.startswith(constants.EXPERIMENTAL_PREFIX)):
219 experimental = True
220
221 if std_job_name.startswith(constants.EXPERIMENTAL_PREFIX):
222 exp_job_name = std_job_name
223 else:
224 exp_job_name = constants.EXPERIMENTAL_PREFIX + std_job_name
Chris Masoned9f13c52012-08-29 10:37:08 -0700225 std_job_hash = hashlib.md5(std_job_name).hexdigest()
226 exp_job_hash = hashlib.md5(exp_job_name).hexdigest()
Dan Shid1521802013-05-24 13:08:37 -0700227 raw_test_name_hash = hashlib.md5(raw_test_name).hexdigest()
Chris Masone11aae452012-05-21 16:08:39 -0700228
beeps619189a2013-04-10 20:56:09 -0700229 # In the experimental abort case both these clauses can evaluate
230 # to True.
Chris Masone11aae452012-05-21 16:08:39 -0700231 if std_job_hash in view['job_keyvals']:
232 job_name = view['job_keyvals'][std_job_hash]
beeps619189a2013-04-10 20:56:09 -0700233 if exp_job_hash in view['job_keyvals']:
Chris Masone11aae452012-05-21 16:08:39 -0700234 experimental = True
235 job_name = view['job_keyvals'][exp_job_hash]
Dan Shid1521802013-05-24 13:08:37 -0700236 if raw_test_name_hash in view['job_keyvals']:
237 job_name = view['job_keyvals'][raw_test_name_hash]
Chris Masone11aae452012-05-21 16:08:39 -0700238
beeps619189a2013-04-10 20:56:09 -0700239 # If the name being returned is the test name it needs to include the tag
240 return job_name, experimental, std_job_name if not test_name else test_name
Zdenek Behan150fbd62012-04-06 17:20:01 +0200241
242
Craig Harrison25eb0f32012-08-23 16:48:49 -0700243class LogLink(object):
J. Richard Barnetteb9c911d2013-08-23 11:24:21 -0700244 """Information needed to record a link in the logs.
Craig Harrison25eb0f32012-08-23 16:48:49 -0700245
J. Richard Barnetteb9c911d2013-08-23 11:24:21 -0700246 Depending on context and the information provided at
247 construction time, the link may point to either to log files for
248 a job, or to a bug filed for a failure in the job.
Craig Harrison25eb0f32012-08-23 16:48:49 -0700249
J. Richard Barnetteb9c911d2013-08-23 11:24:21 -0700250 @var anchor The link text.
251 @var url The link url.
252 @var bug_id Id of a bug to link to, or None.
253 """
254
255 _BUG_URL_PREFIX = CONFIG.get_config_value('BUG_REPORTING',
256 'tracker_url')
J. Richard Barnetteb9c911d2013-08-23 11:24:21 -0700257 _URL_PATTERN = CONFIG.get_config_value('CROS',
258 'log_url_pattern', type=str)
259
260
Fang Deng53c6ff52014-02-24 17:51:24 -0800261 def __init__(self, anchor, server, job_string, bug_info=None, reason=None):
J. Richard Barnetteb9c911d2013-08-23 11:24:21 -0700262 """Initialize the LogLink by generating the log URL.
263
264 @param anchor The link text.
Alex Millerc7a59522013-10-30 15:18:57 -0700265 @param server The hostname of the server this suite ran on.
J. Richard Barnetteb9c911d2013-08-23 11:24:21 -0700266 @param job_string The job whose logs we'd like to link to.
267 @param bug_info Info about the bug, if one was filed.
Fang Deng53c6ff52014-02-24 17:51:24 -0800268 @param reason A string representing the reason of failure if any.
Craig Harrison25eb0f32012-08-23 16:48:49 -0700269 """
270 self.anchor = anchor
Alex Millerc7a59522013-10-30 15:18:57 -0700271 self.url = self._URL_PATTERN % (server, job_string)
Fang Deng53c6ff52014-02-24 17:51:24 -0800272 self.reason = reason
J. Richard Barnetteb9c911d2013-08-23 11:24:21 -0700273 if bug_info:
274 self.bug_id, self.bug_count = bug_info
275 else:
276 self.bug_id = None
277 self.bug_count = None
Craig Harrison25eb0f32012-08-23 16:48:49 -0700278
279
280 def GenerateBuildbotLink(self):
J. Richard Barnetteb9c911d2013-08-23 11:24:21 -0700281 """Generate a link formatted to meet buildbot expectations.
282
283 If there is a bug associated with this link, report that;
284 otherwise report a link to the job logs.
Craig Harrison25eb0f32012-08-23 16:48:49 -0700285
286 @return A link formatted for the buildbot log annotator.
287 """
J. Richard Barnetteb9c911d2013-08-23 11:24:21 -0700288 if self.bug_id:
289 url = '%s%s' % (self._BUG_URL_PREFIX, self.bug_id)
beepsad4daf82013-09-26 10:07:33 -0700290 if self.bug_count is None:
Fang Deng53c6ff52014-02-24 17:51:24 -0800291 anchor_text = '%s (Unknown number of reports)' % (
beepsad4daf82013-09-26 10:07:33 -0700292 self.anchor.strip())
293 elif self.bug_count == 1:
Fang Deng53c6ff52014-02-24 17:51:24 -0800294 anchor_text = '%s (new)' % self.anchor.strip()
J. Richard Barnetteb9c911d2013-08-23 11:24:21 -0700295 else:
Fang Deng53c6ff52014-02-24 17:51:24 -0800296 anchor_text = '%s (%s reports)' % (
J. Richard Barnetteb9c911d2013-08-23 11:24:21 -0700297 self.anchor.strip(), self.bug_count)
298 else:
299 url = self.url
300 anchor_text = self.anchor.strip()
Fang Deng53c6ff52014-02-24 17:51:24 -0800301
302 if self.reason:
303 anchor_text = '%s - %s' % (anchor_text, self.reason)
304
305 return '@@@STEP_LINK@%s@%s@@@'% (anchor_text, url)
Craig Harrison25eb0f32012-08-23 16:48:49 -0700306
307
Craig Harrisond8451572012-08-31 10:29:33 -0700308 def GenerateTextLink(self):
J. Richard Barnetteb9c911d2013-08-23 11:24:21 -0700309 """Generate a link to the job's logs, for consumption by a human.
Craig Harrison25eb0f32012-08-23 16:48:49 -0700310
Craig Harrisond8451572012-08-31 10:29:33 -0700311 @return A link formatted for human readability.
Craig Harrison25eb0f32012-08-23 16:48:49 -0700312 """
Fang Deng53c6ff52014-02-24 17:51:24 -0800313 return '%s%s' % (self.anchor, self.url)
Craig Harrison25eb0f32012-08-23 16:48:49 -0700314
315
Chris Masoneb61b4052012-04-30 14:35:28 -0700316class Timings(object):
317 """Timings for important events during a suite.
318
319 All timestamps are datetime.datetime objects.
320
321 @var suite_start_time: the time the suite started.
Chris Masoneb61b4052012-04-30 14:35:28 -0700322 @var tests_start_time: the time the first test started running.
323 """
beeps6f02d192013-03-22 13:15:49 -0700324
325 # Recorded in create_suite_job as we're staging the components of a
326 # build on the devserver. Only the artifacts necessary to start
327 # installing images onto DUT's will be staged when we record
328 # payload_end_time, the remaining artifacts are downloaded after we kick
329 # off the reimaging job, at which point we record artifact_end_time.
Chris Masonea8066a92012-05-01 16:52:31 -0700330 download_start_time = None
331 payload_end_time = None
332 artifact_end_time = None
beeps6f02d192013-03-22 13:15:49 -0700333
334 # The test_start_time, but taken off the view that corresponds to the
335 # suite instead of an individual test.
Chris Masoneb61b4052012-04-30 14:35:28 -0700336 suite_start_time = None
beeps6f02d192013-03-22 13:15:49 -0700337
beeps6f02d192013-03-22 13:15:49 -0700338 # Earliest and Latest tests in the set of TestViews passed to us.
Chris Masoneb61b4052012-04-30 14:35:28 -0700339 tests_start_time = None
340 tests_end_time = None
341
342
beeps6f02d192013-03-22 13:15:49 -0700343 def _GetDatetime(self, timing_string, timing_string_format):
344 """
345 Formats the timing_string according to the timing_string_format.
346
347 @param timing_string: A datetime timing string.
348 @param timing_string_format: Format of the time in timing_string.
349 @return: A datetime object for the given timing string.
350 """
351 try:
352 return datetime.strptime(timing_string, timing_string_format)
353 except TypeError:
354 return None
355
356
Chris Masoned9f13c52012-08-29 10:37:08 -0700357 def RecordTiming(self, view):
358 """Given a test report view, extract and record pertinent time info.
Chris Masoneb61b4052012-04-30 14:35:28 -0700359
360 get_detailed_test_views() returns a list of entries that provide
361 info about the various parts of a suite run. This method can take
362 any one of these entries and look up timestamp info we might want
363 and record it.
364
Chris Masonecfa7efc2012-09-06 16:00:07 -0700365 If timestamps are unavailable, datetime.datetime.min/max will be used.
366
Chris Masoned9f13c52012-08-29 10:37:08 -0700367 @param view: a view dict, as returned by get_detailed_test_views().
Chris Masoneb61b4052012-04-30 14:35:28 -0700368 """
Chris Masonecfa7efc2012-09-06 16:00:07 -0700369 start_candidate = datetime.min
370 end_candidate = datetime.max
371 if view['test_started_time']:
372 start_candidate = datetime.strptime(view['test_started_time'],
373 job_status.TIME_FMT)
374 if view['test_finished_time']:
375 end_candidate = datetime.strptime(view['test_finished_time'],
376 job_status.TIME_FMT)
377
Chris Masoned9f13c52012-08-29 10:37:08 -0700378 if job_status.view_is_for_suite_prep(view):
Chris Masoneb61b4052012-04-30 14:35:28 -0700379 self.suite_start_time = start_candidate
Chris Masoneb61b4052012-04-30 14:35:28 -0700380 else:
381 self._UpdateFirstTestStartTime(start_candidate)
382 self._UpdateLastTestEndTime(end_candidate)
Chris Masoned9f13c52012-08-29 10:37:08 -0700383 if 'job_keyvals' in view:
384 keyvals = view['job_keyvals']
beeps6f02d192013-03-22 13:15:49 -0700385 self.download_start_time = self._GetDatetime(
386 keyvals.get(constants.DOWNLOAD_STARTED_TIME),
387 job_status.TIME_FMT)
388
389 self.payload_end_time = self._GetDatetime(
390 keyvals.get(constants.PAYLOAD_FINISHED_TIME),
391 job_status.TIME_FMT)
392
393 self.artifact_end_time = self._GetDatetime(
394 keyvals.get(constants.ARTIFACT_FINISHED_TIME),
395 job_status.TIME_FMT)
Chris Masone44e4d6c2012-08-15 14:25:53 -0700396
Chris Masoneb61b4052012-04-30 14:35:28 -0700397
398 def _UpdateFirstTestStartTime(self, candidate):
399 """Update self.tests_start_time, iff candidate is an earlier time.
400
401 @param candidate: a datetime.datetime object.
402 """
403 if not self.tests_start_time or candidate < self.tests_start_time:
404 self.tests_start_time = candidate
405
406
407 def _UpdateLastTestEndTime(self, candidate):
408 """Update self.tests_end_time, iff candidate is a later time.
409
410 @param candidate: a datetime.datetime object.
411 """
412 if not self.tests_end_time or candidate > self.tests_end_time:
413 self.tests_end_time = candidate
414
415
416 def __str__(self):
417 return ('\n'
418 'Suite timings:\n'
Chris Masonea8066a92012-05-01 16:52:31 -0700419 'Downloads started at %s\n'
420 'Payload downloads ended at %s\n'
Chris Masoneb61b4052012-04-30 14:35:28 -0700421 'Suite started at %s\n'
Chris Masonea8066a92012-05-01 16:52:31 -0700422 'Artifact downloads ended (at latest) at %s\n'
Chris Masoneb61b4052012-04-30 14:35:28 -0700423 'Testing started at %s\n'
Chris Masonea8066a92012-05-01 16:52:31 -0700424 'Testing ended at %s\n' % (self.download_start_time,
425 self.payload_end_time,
426 self.suite_start_time,
Chris Masonea8066a92012-05-01 16:52:31 -0700427 self.artifact_end_time,
Chris Masoneb61b4052012-04-30 14:35:28 -0700428 self.tests_start_time,
429 self.tests_end_time))
430
431
beeps6f02d192013-03-22 13:15:49 -0700432 def _GetDataKeyForStatsd(self, suite, build, board):
433 """
434 Constructs the key used for logging statsd timing data.
435
436 @param suite: scheduled suite that we want to record the results of.
437 @param build: The build string. This string should have a consistent
438 format eg: x86-mario-release/R26-3570.0.0. If the format of this
439 string changes such that we can't determine build_type or branch
440 we give up and use the parametes we're sure of instead (suite,
441 board). eg:
442 1. build = x86-alex-pgo-release/R26-3570.0.0
443 branch = 26
444 build_type = pgo-release
445 2. build = lumpy-paladin/R28-3993.0.0-rc5
446 branch = 28
447 build_type = paladin
448 @param board: The board that this suite ran on.
449 @return: The key used to log timing information in statsd.
450 """
451 try:
J. Richard Barnette3cbd76b2013-11-27 12:11:25 -0800452 _board, build_type, branch = utils.ParseBuildName(build)[:3]
453 except utils.ParseBuildNameException as e:
beeps6f02d192013-03-22 13:15:49 -0700454 logging.error(str(e))
455 branch = 'Unknown'
456 build_type = 'Unknown'
457 else:
458 embedded_str = re.search(r'x86-\w+-(.*)', _board)
459 if embedded_str:
460 build_type = embedded_str.group(1) + '-' + build_type
461
462 data_key_dict = {
463 'board': board,
464 'branch': branch,
465 'build_type': build_type,
466 'suite': suite,
467 }
468 return ('run_suite.%(board)s.%(build_type)s.%(branch)s.%(suite)s'
469 % data_key_dict)
470
471
472 def SendResultsToStatsd(self, suite, build, board):
473 """
474 Sends data to statsd.
475
476 1. Makes a data_key of the form: run_suite.$board.$branch.$suite
477 eg: stats/gauges/<hostname>/run_suite/<board>/<branch>/<suite>/
478 2. Computes timings for several start and end event pairs.
Alex Miller9a1987a2013-08-21 15:51:16 -0700479 3. Sends all timing values to statsd.
beeps6f02d192013-03-22 13:15:49 -0700480
481 @param suite: scheduled suite that we want to record the results of.
482 @param build: the build that this suite ran on.
483 eg: 'lumpy-release/R26-3570.0.0'
484 @param board: the board that this suite ran on.
485 """
486 if sys.version_info < (2, 7):
487 logging.error('Sending run_suite perf data to statsd requires'
488 'python 2.7 or greater.')
489 return
490
491 data_key = self._GetDataKeyForStatsd(suite, build, board)
492
493 # Since we don't want to try subtracting corrupted datetime values
494 # we catch TypeErrors in _GetDatetime and insert None instead. This
495 # means that even if, say, keyvals.get(constants.ARTIFACT_FINISHED_TIME)
496 # returns a corrupt value the member artifact_end_time is set to None.
497 if self.download_start_time:
498 if self.payload_end_time:
499 stats.Timer(data_key).send('payload_download_time',
500 (self.payload_end_time -
501 self.download_start_time).total_seconds())
502
503 if self.artifact_end_time:
504 stats.Timer(data_key).send('artifact_download_time',
505 (self.artifact_end_time -
506 self.download_start_time).total_seconds())
507
508 if self.tests_end_time:
509 if self.suite_start_time:
510 stats.Timer(data_key).send('suite_run_time',
511 (self.tests_end_time -
512 self.suite_start_time).total_seconds())
513
514 if self.tests_start_time:
515 stats.Timer(data_key).send('tests_run_time',
516 (self.tests_end_time -
517 self.tests_start_time).total_seconds())
518
beeps6f02d192013-03-22 13:15:49 -0700519
beeps619189a2013-04-10 20:56:09 -0700520def _full_test_name(job_id, view, build, suite):
521 """
522 Generates the full test name for printing to logs and generating a link to
523 the results.
Craig Harrisond8451572012-08-31 10:29:33 -0700524
525 @param job_id: the job id.
526 @param view: the view for which we are generating the name.
beeps619189a2013-04-10 20:56:09 -0700527 @param build: the build for this invocation of run_suite.
528 @param suite: the suite for this invocation of run_suite.
Craig Harrisond8451572012-08-31 10:29:33 -0700529 @return The test name, possibly with a descriptive prefix appended.
530 """
beeps619189a2013-04-10 20:56:09 -0700531 experimental, test_name = get_view_info(job_id, view, build, suite)[1:]
532
533 # If an experimental test is aborted get_view_info returns a name which
534 # includes the prefix.
535 prefix = constants.EXPERIMENTAL_PREFIX if (experimental and
536 not test_name.startswith(constants.EXPERIMENTAL_PREFIX)) else ''
537 return prefix + test_name
Craig Harrisond8451572012-08-31 10:29:33 -0700538
539
Alex Millerc7a59522013-10-30 15:18:57 -0700540_DEFAULT_AUTOTEST_INSTANCE = CONFIG.get_config_value(
541 'SERVER', 'hostname', type=str)
542
543
544def instance_for_pool(pool_name):
545 """
546 Return the hostname of the server that should be used to service a suite
547 for the specified pool.
548
549 @param pool_name: The pool (without 'pool:' to schedule the suite against.
550 @return: The correct host that should be used to service this suite run.
551 """
552 return CONFIG.get_config_value(
553 'POOL_INSTANCE_SHARDING', pool_name,
554 default=_DEFAULT_AUTOTEST_INSTANCE)
555
556
Chris Masone24b80f12012-02-14 14:18:01 -0800557def main():
Aviv Keshet1480c4a2013-03-21 16:38:31 -0700558 """
559 Entry point for run_suite script.
560 """
Chris Masone24b80f12012-02-14 14:18:01 -0800561 parser, options, args = parse_options()
Chris Masone3a850642012-07-11 11:11:18 -0700562 log_name = 'run_suite-default.log'
Chris Masone986459e2012-04-11 11:36:48 -0700563 if not options.mock_job_id:
Zdenek Behan77290c32012-06-26 17:39:47 +0200564 if args:
565 print 'Unknown arguments: ' + str(args)
566 parser.print_help()
567 return
568 if not options.build:
569 print 'Need to specify which build to use'
570 parser.print_help()
571 return
572 if not options.board:
573 print 'Need to specify board'
574 parser.print_help()
575 return
576 if not options.name:
577 print 'Need to specify suite name'
Chris Masone986459e2012-04-11 11:36:48 -0700578 parser.print_help()
579 return
Chris Masone3a850642012-07-11 11:11:18 -0700580 # convert build name from containing / to containing only _
581 log_name = 'run_suite-%s.log' % options.build.replace('/', '_')
582 log_dir = os.path.join(common.autotest_dir, 'logs')
583 if os.path.exists(log_dir):
584 log_name = os.path.join(log_dir, log_name)
Alex Miller8e75d0d2012-07-31 15:13:32 -0700585 if options.num is not None and options.num < 1:
586 print 'Number of machines must be more than 0, if specified.'
587 parser.print_help()
588 return
Alex Millerab33ddb2012-10-03 12:56:02 -0700589 if options.no_wait != 'True' and options.no_wait != 'False':
590 print 'Please specify "True" or "False" for --no_wait.'
591 parser.print_help()
592 return
593 if options.file_bugs != 'True' and options.file_bugs != 'False':
594 print 'Please specify "True" or "False" for --file_bugs.'
595 parser.print_help()
596 return
Alex Miller88762a82013-09-04 15:41:28 -0700597
598 try:
599 priority = int(options.priority)
600 except ValueError:
601 try:
602 priority = priorities.Priority.get_value(options.priority)
603 except AttributeError:
604 print 'Unknown priority level %s. Try one of %s.' % (
605 options.priority, ', '.join(priorities.Priority.names))
606
Scott Zawalski94457b72012-07-02 18:45:07 -0400607 setup_logging(logfile=log_name)
Chris Masonedfa0beba2012-03-19 11:41:47 -0700608
Simran Basi22aa9fe2012-12-07 16:37:09 -0800609 try:
Dan Shia02181f2013-01-29 14:03:32 -0800610 if not options.bypass_labstatus:
J. Richard Barnetteabbe0962013-12-10 18:15:44 -0800611 utils.check_lab_status(options.build)
612 except utils.TestLabException as e:
Alex Millerc3262f02013-11-15 11:27:19 -0800613 logging.warning('Error Message: %s', e)
Aviv Keshet1480c4a2013-03-21 16:38:31 -0700614 return RETURN_CODES.WARNING
Simran Basi22aa9fe2012-12-07 16:37:09 -0800615
Alex Millerc7a59522013-10-30 15:18:57 -0700616 instance_server = instance_for_pool(options.pool)
617 afe = frontend_wrappers.RetryingAFE(server=instance_server,
Simran Basi25effe32013-11-26 13:02:11 -0800618 timeout_min=options.afe_timeout_mins,
Chris Masone8ac66712012-02-15 14:21:02 -0800619 delay_sec=options.delay_sec)
Alex Millerc7a59522013-10-30 15:18:57 -0700620 logging.info('Autotest instance: %s', instance_server)
Chris Masone359c0fd2012-03-13 15:18:59 -0700621
Dan Shi95122412013-11-12 16:20:33 -0800622 wait = options.no_wait == 'False'
623 file_bugs = options.file_bugs == 'True'
Prashanth B923ca262014-03-14 12:36:29 -0700624 logging.info('%s Submitted create_suite_job rpc',
625 diagnosis_utils.JobTimer.format_time(datetime.now()))
Chris Masone986459e2012-04-11 11:36:48 -0700626 if options.mock_job_id:
627 job_id = int(options.mock_job_id)
628 else:
Alex Miller8e75d0d2012-07-31 15:13:32 -0700629 job_id = afe.run('create_suite_job', suite_name=options.name,
630 board=options.board, build=options.build,
Alex Millerf43d0eb2012-10-01 13:43:13 -0700631 check_hosts=wait, pool=options.pool, num=options.num,
Aviv Keshet7cd12312013-07-25 10:25:55 -0700632 file_bugs=file_bugs, priority=priority,
Dan Shi95122412013-11-12 16:20:33 -0800633 suite_args=options.suite_args,
Simran Basi25effe32013-11-26 13:02:11 -0800634 wait_for_results=wait,
635 timeout_mins=options.timeout_mins)
Prashanth B923ca262014-03-14 12:36:29 -0700636 job_timer = diagnosis_utils.JobTimer(
637 time.time(), float(options.timeout_mins))
638 logging.info('%s Created suite job: %s',
639 job_timer.format_time(job_timer.job_created_time),
640 reporting_utils.link_job(
641 job_id, instance_server=instance_server))
642
Alex Millerc7a59522013-10-30 15:18:57 -0700643 TKO = frontend_wrappers.RetryingTKO(server=instance_server,
Simran Basi25effe32013-11-26 13:02:11 -0800644 timeout_min=options.afe_timeout_mins,
Chris Masone8ac66712012-02-15 14:21:02 -0800645 delay_sec=options.delay_sec)
Aviv Keshet1480c4a2013-03-21 16:38:31 -0700646 code = RETURN_CODES.OK
Prashanth B923ca262014-03-14 12:36:29 -0700647 rpc_helper = diagnosis_utils.RPCHelper(afe)
J. Richard Barnette712eb402013-08-13 18:03:00 -0700648 if wait:
649 while not afe.get_jobs(id=job_id, finished=True):
Prashanth B923ca262014-03-14 12:36:29 -0700650 # Note that this call logs output, preventing buildbot's
651 # 9000 second silent timeout from kicking in. Let there be no
652 # doubt, this is a hack. The timeout is from upstream buildbot and
653 # this is the easiest work around.
654 if job_timer.first_past_halftime():
655 rpc_helper.diagnose_job(job_id)
656 logging.info('The suite job has another %s till timeout \n',
657 job_timer.timeout_hours - job_timer.elapsed_time())
Alex Miller764227d2013-11-15 10:28:56 -0800658 time.sleep(10)
J. Richard Barnette712eb402013-08-13 18:03:00 -0700659
Scott Zawalski0acfe112012-03-06 09:21:44 -0500660 views = TKO.run('get_detailed_test_views', afe_job_id=job_id)
Dan Shif327b912013-07-18 15:35:05 -0700661 # The intended behavior is to refrain from recording stats if the suite
662 # was aborted (either by a user or through the golo rpc). Since all the
663 # views associated with the afe_job_id of the suite contain the keyvals
664 # of the suite and not the individual tests themselves, we can achieve
665 # this without digging through the views.
666 is_aborted = any([view['job_keyvals'].get('aborted_by')
667 for view in views])
Dan Shi76af8022013-10-19 01:59:49 -0700668 # For hostless job in Starting status, there is no test view associated.
669 # This can happen when a suite job in Starting status is aborted. When
670 # the scheduler hits some limit, e.g., max_hostless_jobs_per_drone,
671 # max_jobs_started_per_cycle, a suite job can stays in Starting status.
672 if not views:
673 code = RETURN_CODES.ERROR
674 returnmessage = RETURN_CODES.get_string(code)
675 logging.info('\nNo test view was found.\n'
676 'Will return from run_suite with status: %s',
677 returnmessage)
678 return code
679
beeps619189a2013-04-10 20:56:09 -0700680 width = max((len(_full_test_name(job_id, view, options.build,
Dan Shi76af8022013-10-19 01:59:49 -0700681 options.name)) for view in views)) + 3
Scott Zawalski0acfe112012-03-06 09:21:44 -0500682
Chris Masoned9f13c52012-08-29 10:37:08 -0700683 relevant_views = filter(job_status.view_is_relevant, views)
Scott Zawalski0acfe112012-03-06 09:21:44 -0500684 if not relevant_views:
Zdenek Behan150fbd62012-04-06 17:20:01 +0200685 # The main suite job most likely failed in SERVER_JOB.
686 relevant_views = views
Scott Zawalski0acfe112012-03-06 09:21:44 -0500687
Chris Masoneb61b4052012-04-30 14:35:28 -0700688 timings = Timings()
Craig Harrison25eb0f32012-08-23 16:48:49 -0700689 web_links = []
690 buildbot_links = []
Chris Masoned9f13c52012-08-29 10:37:08 -0700691 for view in relevant_views:
692 timings.RecordTiming(view)
693 if job_status.view_is_for_suite_prep(view):
694 view['test_name'] = 'Suite prep'
Chris Masone3a850642012-07-11 11:11:18 -0700695
beeps619189a2013-04-10 20:56:09 -0700696 job_name, experimental = get_view_info(job_id, view, options.build,
697 options.name)[:2]
698 test_view = _full_test_name(job_id, view, options.build,
699 options.name).ljust(width)
Chris Masoned9f13c52012-08-29 10:37:08 -0700700 logging.info("%s%s", test_view, get_pretty_status(view['status']))
beeps8ead53c2013-04-26 19:12:46 -0700701
J. Richard Barnettee7b98bb2013-08-21 16:34:16 -0700702 # It's important that we use the test name in the view
703 # and not the name returned by full_test_name, as this
704 # was the name inserted after the test ran, e.g. for an
705 # aborted test full_test_name will return
706 # 'experimental_testname' but the view and the bug_id
707 # keyval will use '/build/suite/experimental_testname'.
J. Richard Barnetteb9c911d2013-08-23 11:24:21 -0700708 bug_info = tools.get_test_failure_bug_info(
J. Richard Barnettee7b98bb2013-08-21 16:34:16 -0700709 view['job_keyvals'], view['test_name'])
beeps8ead53c2013-04-26 19:12:46 -0700710
Fang Deng53c6ff52014-02-24 17:51:24 -0800711 link = LogLink(test_view, instance_server, job_name,
712 bug_info)
Craig Harrison25eb0f32012-08-23 16:48:49 -0700713 web_links.append(link)
Chris Masoned9f13c52012-08-29 10:37:08 -0700714
J. Richard Barnette02f815e2013-08-22 16:28:32 -0700715 # Don't show links on the buildbot waterfall for tests with
716 # GOOD status.
Chris Masoned9f13c52012-08-29 10:37:08 -0700717 if view['status'] != 'GOOD':
718 logging.info("%s %s: %s", test_view, view['status'],
719 view['reason'])
Fang Deng53c6ff52014-02-24 17:51:24 -0800720 link.reason = '%s: %s' % (view['status'], view['reason'])
Chris Masone8906ab12012-07-23 15:37:56 -0700721 if view['status'] == 'TEST_NA':
722 # Didn't run; nothing to do here!
723 continue
J. Richard Barnette02f815e2013-08-22 16:28:32 -0700724 buildbot_links.append(link)
Aviv Keshet1480c4a2013-03-21 16:38:31 -0700725 if code == RETURN_CODES.ERROR:
Zdenek Behan150fbd62012-04-06 17:20:01 +0200726 # Failed already, no need to worry further.
727 continue
beeps619189a2013-04-10 20:56:09 -0700728
729 # Any non experimental test that has a status other than WARN
730 # or GOOD will result in the tree closing. Experimental tests
731 # will not close the tree, even if they have been aborted.
beepsb8117ff2013-10-01 13:50:07 -0700732 if not experimental:
733 if view['status'] == 'WARN':
734 code = RETURN_CODES.WARNING
735 elif is_fail_status(view['status']):
736 code = RETURN_CODES.ERROR
Aviv Keshet1480c4a2013-03-21 16:38:31 -0700737
Dan Shif327b912013-07-18 15:35:05 -0700738 # Do not record stats for aborted suites.
Fang Deng1cf62342013-11-22 10:06:32 -0800739 if not is_aborted and not options.mock_job_id:
Dan Shif327b912013-07-18 15:35:05 -0700740 timings.SendResultsToStatsd(options.name, options.build,
741 options.board)
Scott Zawalski94457b72012-07-02 18:45:07 -0400742 logging.info(timings)
Craig Harrison25eb0f32012-08-23 16:48:49 -0700743 logging.info('\n'
744 'Links to test logs:')
745 for link in web_links:
Craig Harrisond8451572012-08-31 10:29:33 -0700746 logging.info(link.GenerateTextLink())
Aviv Keshet1480c4a2013-03-21 16:38:31 -0700747
748 try:
749 returnmessage = RETURN_CODES.get_string(code)
750 except ValueError:
751 returnmessage = 'UNKNOWN'
752 logging.info('\n'
753 'Will return from run_suite with status: %s',
754 returnmessage)
755
Prashanth B923ca262014-03-14 12:36:29 -0700756 # There is a minor race condition here where we might have aborted for
757 # some reason other than a timeout, and the job_timer thinks it's a
758 # timeout because of the jitter in waiting for results. This shouldn't
759 # harm us since all diagnose_pool does is log information about a pool.
760 if job_timer.is_suite_timeout():
761 logging.info('\nAttempting to diagnose pool: %s', options.pool)
762 try:
763 # Add some jitter to make up for any latency in
764 # aborting the suite or checking for results.
765 cutoff = (job_timer.timeout_hours +
766 datetime_base.timedelta(hours=0.3))
767 rpc_helper.diagnose_pool(
768 options.board, options.pool, cutoff)
769 except proxy.JSONRPCException as e:
770 logging.warning('Unable to diagnose suite abort.')
771
Craig Harrison25eb0f32012-08-23 16:48:49 -0700772 logging.info('\n'
773 'Output below this line is for buildbot consumption:')
774 for link in buildbot_links:
775 logging.info(link.GenerateBuildbotLink())
Chris Masoned5939fe2012-03-13 10:11:06 -0700776 else:
Scott Zawalski94457b72012-07-02 18:45:07 -0400777 logging.info('Created suite job: %r', job_id)
Alex Millera05498f2013-11-01 16:16:21 -0700778 link = LogLink(options.name, instance_server,
779 '%s-%s' % (job_id, getpass.getuser()))
Craig Harrison25eb0f32012-08-23 16:48:49 -0700780 logging.info(link.GenerateBuildbotLink())
Scott Zawalski94457b72012-07-02 18:45:07 -0400781 logging.info('--no_wait specified; Exiting.')
Chris Masone24b80f12012-02-14 14:18:01 -0800782 return code
783
784if __name__ == "__main__":
785 sys.exit(main())