blob: 44a21f172fe7fa1b7300d396dbcaced4bd76dbd3 [file] [log] [blame]
Chris Masone24b80f12012-02-14 14:18:01 -08001#!/usr/bin/python
2#
3# Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
4# Use of this source code is governed by a BSD-style license that can be
5# found in the LICENSE file.
6
7"""Tool for running suites of tests and waiting for completion.
8
9The desired test suite will be scheduled with autotest, and then
10this tool will block until the job is complete, printing a summary
11at the end. Error conditions result in exceptions.
12
13This is intended for use only with Chrome OS test suits that leverage the
14dynamic suite infrastructure in server/cros/dynamic_suite.py.
15"""
16
beeps6f02d192013-03-22 13:15:49 -070017import getpass, hashlib, logging, optparse, os, re, sys, time
Chris Masonecfa7efc2012-09-06 16:00:07 -070018from datetime import datetime
19
Chris Masone24b80f12012-02-14 14:18:01 -080020import common
Chris Masonecfa7efc2012-09-06 16:00:07 -070021
Aviv Keshet1480c4a2013-03-21 16:38:31 -070022from autotest_lib.client.common_lib import global_config, error, utils, enum
Alex Miller88762a82013-09-04 15:41:28 -070023from autotest_lib.client.common_lib import site_utils, priorities
Chris Masone44e4d6c2012-08-15 14:25:53 -070024from autotest_lib.server.cros.dynamic_suite import constants
Chris Masoneb4935552012-08-14 12:05:54 -070025from autotest_lib.server.cros.dynamic_suite import frontend_wrappers
26from autotest_lib.server.cros.dynamic_suite import job_status
J. Richard Barnettee7b98bb2013-08-21 16:34:16 -070027from autotest_lib.server.cros.dynamic_suite import tools
beeps6f02d192013-03-22 13:15:49 -070028from autotest_lib.site_utils.graphite import stats
Chris Masone24b80f12012-02-14 14:18:01 -080029
Chris Masone1120cdf2012-02-27 17:35:07 -080030CONFIG = global_config.global_config
31
Simran Basi22aa9fe2012-12-07 16:37:09 -080032# Return code that will be sent back to autotest_rpc_server.py
Aviv Keshet1480c4a2013-03-21 16:38:31 -070033RETURN_CODES = enum.Enum('OK', 'ERROR', 'WARNING')
Simran Basi22aa9fe2012-12-07 16:37:09 -080034
Chris Masonedfa0beba2012-03-19 11:41:47 -070035
Scott Zawalski94457b72012-07-02 18:45:07 -040036def setup_logging(logfile=None):
37 """Setup basic logging with all logging info stripped.
38
39 Calls to logging will only show the message. No severity is logged.
40
41 @param logfile: If specified dump output to a file as well.
42 """
Dan Shi8a2dddd2013-08-09 12:58:44 -070043 # Remove all existing handlers. client/common_lib/logging_config adds
44 # a StreamHandler to logger when modules are imported, e.g.,
45 # autotest_lib.client.bin.utils. A new StreamHandler will be added here to
46 # log only messages, not severity.
47 logging.getLogger().handlers = []
48
Scott Zawalski94457b72012-07-02 18:45:07 -040049 screen_handler = logging.StreamHandler()
50 screen_handler.setFormatter(logging.Formatter('%(message)s'))
51 logging.getLogger().addHandler(screen_handler)
52 logging.getLogger().setLevel(logging.INFO)
53 if logfile:
54 file_handler = logging.FileHandler(logfile)
55 file_handler.setLevel(logging.DEBUG)
56 logging.getLogger().addHandler(file_handler)
Chris Masonedfa0beba2012-03-19 11:41:47 -070057
58
Chris Masone24b80f12012-02-14 14:18:01 -080059def parse_options():
Aviv Keshet1480c4a2013-03-21 16:38:31 -070060 #pylint: disable-msg=C0111
Zdenek Behan77290c32012-06-26 17:39:47 +020061 usage = "usage: %prog [options]"
Chris Masone24b80f12012-02-14 14:18:01 -080062 parser = optparse.OptionParser(usage=usage)
63 parser.add_option("-b", "--board", dest="board")
64 parser.add_option("-i", "--build", dest="build")
Chris Masone359c0fd2012-03-13 15:18:59 -070065 # This should just be a boolean flag, but the autotest "proxy" code
66 # can't handle flags that don't take arguments.
Alex Millerab33ddb2012-10-03 12:56:02 -070067 parser.add_option("-n", "--no_wait", dest="no_wait", default="False",
68 help='Must pass "True" or "False" if used.')
Alex Miller86ed9792013-11-05 07:54:51 -080069 parser.add_option("-p", "--pool", dest="pool", default="")
Chris Masone24b80f12012-02-14 14:18:01 -080070 parser.add_option("-s", "--suite_name", dest="name")
Chris Masone8ac66712012-02-15 14:21:02 -080071 parser.add_option("-t", "--timeout_min", dest="timeout_min", default=30)
72 parser.add_option("-d", "--delay_sec", dest="delay_sec", default=10)
Chris Masone986459e2012-04-11 11:36:48 -070073 parser.add_option("-m", "--mock_job_id", dest="mock_job_id",
74 help="Skips running suite; creates report for given ID.")
Alex Miller05a2fff2012-09-10 10:14:34 -070075 parser.add_option("-u", "--num", dest="num", type="int", default=None,
Chris Masone8906ab12012-07-23 15:37:56 -070076 help="Run on at most NUM machines.")
Alex Millerf43d0eb2012-10-01 13:43:13 -070077 # Same boolean flag issue applies here.
Alex Millerab33ddb2012-10-03 12:56:02 -070078 parser.add_option("-f", "--file_bugs", dest="file_bugs", default='False',
79 help='File bugs on test failures. Must pass "True" or '
80 '"False" if used.')
Dan Shia02181f2013-01-29 14:03:32 -080081 parser.add_option("-l", "--bypass_labstatus", dest="bypass_labstatus",
82 action="store_true", help='Bypass lab status check.')
Alex Miller88762a82013-09-04 15:41:28 -070083 # We allow either a number or a string for the priority. This way, if you
84 # know what you're doing, one can specify a custom priority level between
85 # other levels.
86 parser.add_option("-r", "--priority", dest="priority",
87 default=priorities.Priority.DEFAULT,
88 action="store", help="Priority of suite")
Aviv Keshet7cd12312013-07-25 10:25:55 -070089 parser.add_option("--suite_args", dest="suite_args",
90 default=None, action="store",
91 help="Argument string for suite control file.")
Alex Millerf43d0eb2012-10-01 13:43:13 -070092
Chris Masone24b80f12012-02-14 14:18:01 -080093 options, args = parser.parse_args()
94 return parser, options, args
95
96
97def get_pretty_status(status):
Aviv Keshet1480c4a2013-03-21 16:38:31 -070098 """
99 Converts a status string into a pretty-for-printing string.
100
101 @param status: Status to convert.
102
103 @return: Returns pretty string.
104 GOOD -> [ PASSED ]
105 TEST_NA -> [ INFO ]
106 other -> [ FAILED ]
107 """
Chris Masone24b80f12012-02-14 14:18:01 -0800108 if status == 'GOOD':
109 return '[ PASSED ]'
Chris Masone8906ab12012-07-23 15:37:56 -0700110 elif status == 'TEST_NA':
111 return '[ INFO ]'
Chris Masone24b80f12012-02-14 14:18:01 -0800112 return '[ FAILED ]'
113
Zdenek Behan150fbd62012-04-06 17:20:01 +0200114def is_fail_status(status):
Aviv Keshet1480c4a2013-03-21 16:38:31 -0700115 """
116 Check if the given status corresponds to a failure.
117
118 @param status: The status to check. (string)
119
120 @return: True if status is FAIL or ERROR. False otherwise.
121 """
Zdenek Behan150fbd62012-04-06 17:20:01 +0200122 # All the statuses tests can have when they fail.
beeps619189a2013-04-10 20:56:09 -0700123 if status in ['FAIL', 'ERROR', 'ABORT']:
Zdenek Behan150fbd62012-04-06 17:20:01 +0200124 return True
125 return False
126
Chris Masone24b80f12012-02-14 14:18:01 -0800127
beeps619189a2013-04-10 20:56:09 -0700128def get_view_info(suite_job_id, view, build, suite):
Zdenek Behan150fbd62012-04-06 17:20:01 +0200129 """
130 Parse a view for the slave job name and job_id.
131
132 @param suite_job_id: The job id of our master suite job.
133 @param view: Test result view.
beeps619189a2013-04-10 20:56:09 -0700134 @param build: build passed in via the -b option.
135 eg: lumpy-release/R28-3947.0.0
136 @param suite: suite passed in via the -s option.
137 eg: dummy
138 @return A tuple job_name, experimental, name of the slave test run
139 described by view. eg:
140 experimental_dummy_Pass fails: (1130-owner, True, dummy_Pass)
141 experimental_dummy_Pass aborts: (1130-owner, True,
142 experimental_dummy_Pass)
143 dummy_Fail: (1130-owner, False, dummy_Fail.Error)
Zdenek Behan150fbd62012-04-06 17:20:01 +0200144 """
145 # By default, we are the main suite job since there is no
146 # keyval entry for our job_name.
147 job_name = '%s-%s' % (suite_job_id, getpass.getuser())
148 experimental = False
beeps619189a2013-04-10 20:56:09 -0700149 test_name = ''
Dan Shid1521802013-05-24 13:08:37 -0700150 # raw test name is the test_name from tko status view. tko_job_keyvals may
151 # have a record of the hash of this name mapping to job_id-owner, which can
152 # be used to reference the test to its job url. The change is made to
153 # support tests in different jobs within a suite that shares the same test
154 # class, e.g., AU suite.
155 raw_test_name = view['test_name']
Zdenek Behan150fbd62012-04-06 17:20:01 +0200156 if 'job_keyvals' in view:
beeps619189a2013-04-10 20:56:09 -0700157 # For a test invocation like:
158 # NAME = "dummy_Fail"
159 # job.run_test('dummy_Fail', tag='Error', to_throw='TestError')
160 # we will:
161 # Record a keyval of the jobs test_name field: dummy_Fail
162 # On success, yield a tko status with the tagged name:
163 # dummy_Fail.Error
164 # On abort, yield a status (not from tko) with the job name:
165 # /build/suite/dummy_Fail.Error
166 # Note the last 2 options include the tag. The tag is seperated
167 # from the rest of the name with a '.'. The tag or test name can
168 # also include a /, and we must isolate the tag before we compare it
169 # to the hashed keyval. Based on this we have the following cases:
170 # 1. Regular test failure with or without a tag '.': std_job_name is
171 # set to the view test_name, after removing the tag.
172 # 2. Regular test Aborts: we know that dynamic_suite inserted a name
173 # like build/suite/test.name (eg:
174 # lumpy-release/R28-3947.0.0/dummy/dummy_Fail.Error), so we
175 # intersect the build/suite/ string we already have with the
176 # test_name in the view. The name of the aborted test is
177 # instrumental in generating the job_name, which is used in
178 # creating a link to the logs.
179 # 3. Experimental tests, Aborts and Failures: The test view
180 # corresponding to the afe_job_id of the suite job contains
181 # stubs for each test in this suite. The names of these jobs
182 # will contain an experimental prefix if they were aborted;
183 # If they failed the same names will not contain an experimental
184 # prefix but we would have hashed the name with a prefix. Eg:
185 # Test name = experimental_pass
186 # keyval contains: hash(experimental_pass)
187 # Fail/Pass view['test_name'] = pass
188 # Abort view['test_name'] = board/build/experimental_pass
189 # So we need to add the experimental prefix only if the test was
190 # aborted. Everything else is the same as [2].
191 # 4. Experimental server job failures: eg verify passes, something on
192 # the DUT crashes, the experimental server job fails to ssh in. We
193 # need to manually set the experimental flag in this case because the
194 # server job name isn't recorded in the keyvals. For a normal suite
195 # the views will contain: SERVER_JOB, try_new_image, test_name. i.e
196 # the test server jobs should be handled transparently and only the
197 # suite server job should appear in the view. If a server job fails
198 # (for an experimental test or otherwise) we insert the server job
199 # entry into the tko database instead. Put more generally we insert
200 # the last stage we knew about into the db record associated with
201 # that suites afe_job_id. This could lead to a view containing:
202 # SERVER_JOB, try_new_image,
203 # lumpy-release/R28-4008.0.0/bvt/experimental_pass_SERVER_JOB.
Alex Miller9a1987a2013-08-21 15:51:16 -0700204 # Neither of these operations will stomp on a pristine string.
205 test_name = view['test_name'].replace('%s/%s/'% (build, suite), '')
206 std_job_name = test_name.split('.')[0]
Chris Masone11aae452012-05-21 16:08:39 -0700207
beeps619189a2013-04-10 20:56:09 -0700208 if (job_status.view_is_for_infrastructure_fail(view) and
209 std_job_name.startswith(constants.EXPERIMENTAL_PREFIX)):
210 experimental = True
211
212 if std_job_name.startswith(constants.EXPERIMENTAL_PREFIX):
213 exp_job_name = std_job_name
214 else:
215 exp_job_name = constants.EXPERIMENTAL_PREFIX + std_job_name
Chris Masoned9f13c52012-08-29 10:37:08 -0700216 std_job_hash = hashlib.md5(std_job_name).hexdigest()
217 exp_job_hash = hashlib.md5(exp_job_name).hexdigest()
Dan Shid1521802013-05-24 13:08:37 -0700218 raw_test_name_hash = hashlib.md5(raw_test_name).hexdigest()
Chris Masone11aae452012-05-21 16:08:39 -0700219
beeps619189a2013-04-10 20:56:09 -0700220 # In the experimental abort case both these clauses can evaluate
221 # to True.
Chris Masone11aae452012-05-21 16:08:39 -0700222 if std_job_hash in view['job_keyvals']:
223 job_name = view['job_keyvals'][std_job_hash]
beeps619189a2013-04-10 20:56:09 -0700224 if exp_job_hash in view['job_keyvals']:
Chris Masone11aae452012-05-21 16:08:39 -0700225 experimental = True
226 job_name = view['job_keyvals'][exp_job_hash]
Dan Shid1521802013-05-24 13:08:37 -0700227 if raw_test_name_hash in view['job_keyvals']:
228 job_name = view['job_keyvals'][raw_test_name_hash]
Chris Masone11aae452012-05-21 16:08:39 -0700229
beeps619189a2013-04-10 20:56:09 -0700230 # If the name being returned is the test name it needs to include the tag
231 return job_name, experimental, std_job_name if not test_name else test_name
Zdenek Behan150fbd62012-04-06 17:20:01 +0200232
233
Craig Harrison25eb0f32012-08-23 16:48:49 -0700234class LogLink(object):
J. Richard Barnetteb9c911d2013-08-23 11:24:21 -0700235 """Information needed to record a link in the logs.
Craig Harrison25eb0f32012-08-23 16:48:49 -0700236
J. Richard Barnetteb9c911d2013-08-23 11:24:21 -0700237 Depending on context and the information provided at
238 construction time, the link may point to either to log files for
239 a job, or to a bug filed for a failure in the job.
Craig Harrison25eb0f32012-08-23 16:48:49 -0700240
J. Richard Barnetteb9c911d2013-08-23 11:24:21 -0700241 @var anchor The link text.
242 @var url The link url.
243 @var bug_id Id of a bug to link to, or None.
244 """
245
246 _BUG_URL_PREFIX = CONFIG.get_config_value('BUG_REPORTING',
247 'tracker_url')
J. Richard Barnetteb9c911d2013-08-23 11:24:21 -0700248 _URL_PATTERN = CONFIG.get_config_value('CROS',
249 'log_url_pattern', type=str)
250
251
Alex Millerc7a59522013-10-30 15:18:57 -0700252 def __init__(self, anchor, server, job_string, bug_info=None):
J. Richard Barnetteb9c911d2013-08-23 11:24:21 -0700253 """Initialize the LogLink by generating the log URL.
254
255 @param anchor The link text.
Alex Millerc7a59522013-10-30 15:18:57 -0700256 @param server The hostname of the server this suite ran on.
J. Richard Barnetteb9c911d2013-08-23 11:24:21 -0700257 @param job_string The job whose logs we'd like to link to.
258 @param bug_info Info about the bug, if one was filed.
Craig Harrison25eb0f32012-08-23 16:48:49 -0700259 """
260 self.anchor = anchor
Alex Millerc7a59522013-10-30 15:18:57 -0700261 self.url = self._URL_PATTERN % (server, job_string)
J. Richard Barnetteb9c911d2013-08-23 11:24:21 -0700262 if bug_info:
263 self.bug_id, self.bug_count = bug_info
264 else:
265 self.bug_id = None
266 self.bug_count = None
Craig Harrison25eb0f32012-08-23 16:48:49 -0700267
268
269 def GenerateBuildbotLink(self):
J. Richard Barnetteb9c911d2013-08-23 11:24:21 -0700270 """Generate a link formatted to meet buildbot expectations.
271
272 If there is a bug associated with this link, report that;
273 otherwise report a link to the job logs.
Craig Harrison25eb0f32012-08-23 16:48:49 -0700274
275 @return A link formatted for the buildbot log annotator.
276 """
J. Richard Barnetteb9c911d2013-08-23 11:24:21 -0700277 if self.bug_id:
278 url = '%s%s' % (self._BUG_URL_PREFIX, self.bug_id)
beepsad4daf82013-09-26 10:07:33 -0700279 if self.bug_count is None:
280 anchor_text = "%s (Unknown number of reports)" % (
281 self.anchor.strip())
282 elif self.bug_count == 1:
J. Richard Barnetteb9c911d2013-08-23 11:24:21 -0700283 anchor_text = "%s (new)" % self.anchor.strip()
284 else:
285 anchor_text = "%s (%s reports)" % (
286 self.anchor.strip(), self.bug_count)
287 else:
288 url = self.url
289 anchor_text = self.anchor.strip()
290 return "@@@STEP_LINK@%s@%s@@@"% (anchor_text, url)
Craig Harrison25eb0f32012-08-23 16:48:49 -0700291
292
Craig Harrisond8451572012-08-31 10:29:33 -0700293 def GenerateTextLink(self):
J. Richard Barnetteb9c911d2013-08-23 11:24:21 -0700294 """Generate a link to the job's logs, for consumption by a human.
Craig Harrison25eb0f32012-08-23 16:48:49 -0700295
Craig Harrisond8451572012-08-31 10:29:33 -0700296 @return A link formatted for human readability.
Craig Harrison25eb0f32012-08-23 16:48:49 -0700297 """
Craig Harrisond8451572012-08-31 10:29:33 -0700298 return "%s%s" % (self.anchor, self.url)
Craig Harrison25eb0f32012-08-23 16:48:49 -0700299
300
Chris Masoneb61b4052012-04-30 14:35:28 -0700301class Timings(object):
302 """Timings for important events during a suite.
303
304 All timestamps are datetime.datetime objects.
305
306 @var suite_start_time: the time the suite started.
Chris Masoneb61b4052012-04-30 14:35:28 -0700307 @var tests_start_time: the time the first test started running.
308 """
beeps6f02d192013-03-22 13:15:49 -0700309
310 # Recorded in create_suite_job as we're staging the components of a
311 # build on the devserver. Only the artifacts necessary to start
312 # installing images onto DUT's will be staged when we record
313 # payload_end_time, the remaining artifacts are downloaded after we kick
314 # off the reimaging job, at which point we record artifact_end_time.
Chris Masonea8066a92012-05-01 16:52:31 -0700315 download_start_time = None
316 payload_end_time = None
317 artifact_end_time = None
beeps6f02d192013-03-22 13:15:49 -0700318
319 # The test_start_time, but taken off the view that corresponds to the
320 # suite instead of an individual test.
Chris Masoneb61b4052012-04-30 14:35:28 -0700321 suite_start_time = None
beeps6f02d192013-03-22 13:15:49 -0700322
beeps6f02d192013-03-22 13:15:49 -0700323 # Earliest and Latest tests in the set of TestViews passed to us.
Chris Masoneb61b4052012-04-30 14:35:28 -0700324 tests_start_time = None
325 tests_end_time = None
326
327
beeps6f02d192013-03-22 13:15:49 -0700328 def _GetDatetime(self, timing_string, timing_string_format):
329 """
330 Formats the timing_string according to the timing_string_format.
331
332 @param timing_string: A datetime timing string.
333 @param timing_string_format: Format of the time in timing_string.
334 @return: A datetime object for the given timing string.
335 """
336 try:
337 return datetime.strptime(timing_string, timing_string_format)
338 except TypeError:
339 return None
340
341
Chris Masoned9f13c52012-08-29 10:37:08 -0700342 def RecordTiming(self, view):
343 """Given a test report view, extract and record pertinent time info.
Chris Masoneb61b4052012-04-30 14:35:28 -0700344
345 get_detailed_test_views() returns a list of entries that provide
346 info about the various parts of a suite run. This method can take
347 any one of these entries and look up timestamp info we might want
348 and record it.
349
Chris Masonecfa7efc2012-09-06 16:00:07 -0700350 If timestamps are unavailable, datetime.datetime.min/max will be used.
351
Chris Masoned9f13c52012-08-29 10:37:08 -0700352 @param view: a view dict, as returned by get_detailed_test_views().
Chris Masoneb61b4052012-04-30 14:35:28 -0700353 """
Chris Masonecfa7efc2012-09-06 16:00:07 -0700354 start_candidate = datetime.min
355 end_candidate = datetime.max
356 if view['test_started_time']:
357 start_candidate = datetime.strptime(view['test_started_time'],
358 job_status.TIME_FMT)
359 if view['test_finished_time']:
360 end_candidate = datetime.strptime(view['test_finished_time'],
361 job_status.TIME_FMT)
362
Chris Masoned9f13c52012-08-29 10:37:08 -0700363 if job_status.view_is_for_suite_prep(view):
Chris Masoneb61b4052012-04-30 14:35:28 -0700364 self.suite_start_time = start_candidate
Chris Masoneb61b4052012-04-30 14:35:28 -0700365 else:
366 self._UpdateFirstTestStartTime(start_candidate)
367 self._UpdateLastTestEndTime(end_candidate)
Chris Masoned9f13c52012-08-29 10:37:08 -0700368 if 'job_keyvals' in view:
369 keyvals = view['job_keyvals']
beeps6f02d192013-03-22 13:15:49 -0700370 self.download_start_time = self._GetDatetime(
371 keyvals.get(constants.DOWNLOAD_STARTED_TIME),
372 job_status.TIME_FMT)
373
374 self.payload_end_time = self._GetDatetime(
375 keyvals.get(constants.PAYLOAD_FINISHED_TIME),
376 job_status.TIME_FMT)
377
378 self.artifact_end_time = self._GetDatetime(
379 keyvals.get(constants.ARTIFACT_FINISHED_TIME),
380 job_status.TIME_FMT)
Chris Masone44e4d6c2012-08-15 14:25:53 -0700381
Chris Masoneb61b4052012-04-30 14:35:28 -0700382
383 def _UpdateFirstTestStartTime(self, candidate):
384 """Update self.tests_start_time, iff candidate is an earlier time.
385
386 @param candidate: a datetime.datetime object.
387 """
388 if not self.tests_start_time or candidate < self.tests_start_time:
389 self.tests_start_time = candidate
390
391
392 def _UpdateLastTestEndTime(self, candidate):
393 """Update self.tests_end_time, iff candidate is a later time.
394
395 @param candidate: a datetime.datetime object.
396 """
397 if not self.tests_end_time or candidate > self.tests_end_time:
398 self.tests_end_time = candidate
399
400
401 def __str__(self):
402 return ('\n'
403 'Suite timings:\n'
Chris Masonea8066a92012-05-01 16:52:31 -0700404 'Downloads started at %s\n'
405 'Payload downloads ended at %s\n'
Chris Masoneb61b4052012-04-30 14:35:28 -0700406 'Suite started at %s\n'
Chris Masonea8066a92012-05-01 16:52:31 -0700407 'Artifact downloads ended (at latest) at %s\n'
Chris Masoneb61b4052012-04-30 14:35:28 -0700408 'Testing started at %s\n'
Chris Masonea8066a92012-05-01 16:52:31 -0700409 'Testing ended at %s\n' % (self.download_start_time,
410 self.payload_end_time,
411 self.suite_start_time,
Chris Masonea8066a92012-05-01 16:52:31 -0700412 self.artifact_end_time,
Chris Masoneb61b4052012-04-30 14:35:28 -0700413 self.tests_start_time,
414 self.tests_end_time))
415
416
beeps6f02d192013-03-22 13:15:49 -0700417 def _GetDataKeyForStatsd(self, suite, build, board):
418 """
419 Constructs the key used for logging statsd timing data.
420
421 @param suite: scheduled suite that we want to record the results of.
422 @param build: The build string. This string should have a consistent
423 format eg: x86-mario-release/R26-3570.0.0. If the format of this
424 string changes such that we can't determine build_type or branch
425 we give up and use the parametes we're sure of instead (suite,
426 board). eg:
427 1. build = x86-alex-pgo-release/R26-3570.0.0
428 branch = 26
429 build_type = pgo-release
430 2. build = lumpy-paladin/R28-3993.0.0-rc5
431 branch = 28
432 build_type = paladin
433 @param board: The board that this suite ran on.
434 @return: The key used to log timing information in statsd.
435 """
436 try:
Fang Deng3197b392013-06-26 11:42:02 -0700437 _board, build_type, branch = site_utils.ParseBuildName(build)[:3]
438 except site_utils.ParseBuildNameException as e:
beeps6f02d192013-03-22 13:15:49 -0700439 logging.error(str(e))
440 branch = 'Unknown'
441 build_type = 'Unknown'
442 else:
443 embedded_str = re.search(r'x86-\w+-(.*)', _board)
444 if embedded_str:
445 build_type = embedded_str.group(1) + '-' + build_type
446
447 data_key_dict = {
448 'board': board,
449 'branch': branch,
450 'build_type': build_type,
451 'suite': suite,
452 }
453 return ('run_suite.%(board)s.%(build_type)s.%(branch)s.%(suite)s'
454 % data_key_dict)
455
456
457 def SendResultsToStatsd(self, suite, build, board):
458 """
459 Sends data to statsd.
460
461 1. Makes a data_key of the form: run_suite.$board.$branch.$suite
462 eg: stats/gauges/<hostname>/run_suite/<board>/<branch>/<suite>/
463 2. Computes timings for several start and end event pairs.
Alex Miller9a1987a2013-08-21 15:51:16 -0700464 3. Sends all timing values to statsd.
beeps6f02d192013-03-22 13:15:49 -0700465
466 @param suite: scheduled suite that we want to record the results of.
467 @param build: the build that this suite ran on.
468 eg: 'lumpy-release/R26-3570.0.0'
469 @param board: the board that this suite ran on.
470 """
471 if sys.version_info < (2, 7):
472 logging.error('Sending run_suite perf data to statsd requires'
473 'python 2.7 or greater.')
474 return
475
476 data_key = self._GetDataKeyForStatsd(suite, build, board)
477
478 # Since we don't want to try subtracting corrupted datetime values
479 # we catch TypeErrors in _GetDatetime and insert None instead. This
480 # means that even if, say, keyvals.get(constants.ARTIFACT_FINISHED_TIME)
481 # returns a corrupt value the member artifact_end_time is set to None.
482 if self.download_start_time:
483 if self.payload_end_time:
484 stats.Timer(data_key).send('payload_download_time',
485 (self.payload_end_time -
486 self.download_start_time).total_seconds())
487
488 if self.artifact_end_time:
489 stats.Timer(data_key).send('artifact_download_time',
490 (self.artifact_end_time -
491 self.download_start_time).total_seconds())
492
493 if self.tests_end_time:
494 if self.suite_start_time:
495 stats.Timer(data_key).send('suite_run_time',
496 (self.tests_end_time -
497 self.suite_start_time).total_seconds())
498
499 if self.tests_start_time:
500 stats.Timer(data_key).send('tests_run_time',
501 (self.tests_end_time -
502 self.tests_start_time).total_seconds())
503
beeps6f02d192013-03-22 13:15:49 -0700504
beeps619189a2013-04-10 20:56:09 -0700505def _full_test_name(job_id, view, build, suite):
506 """
507 Generates the full test name for printing to logs and generating a link to
508 the results.
Craig Harrisond8451572012-08-31 10:29:33 -0700509
510 @param job_id: the job id.
511 @param view: the view for which we are generating the name.
beeps619189a2013-04-10 20:56:09 -0700512 @param build: the build for this invocation of run_suite.
513 @param suite: the suite for this invocation of run_suite.
Craig Harrisond8451572012-08-31 10:29:33 -0700514 @return The test name, possibly with a descriptive prefix appended.
515 """
beeps619189a2013-04-10 20:56:09 -0700516 experimental, test_name = get_view_info(job_id, view, build, suite)[1:]
517
518 # If an experimental test is aborted get_view_info returns a name which
519 # includes the prefix.
520 prefix = constants.EXPERIMENTAL_PREFIX if (experimental and
521 not test_name.startswith(constants.EXPERIMENTAL_PREFIX)) else ''
522 return prefix + test_name
Craig Harrisond8451572012-08-31 10:29:33 -0700523
524
Alex Millerc7a59522013-10-30 15:18:57 -0700525_DEFAULT_AUTOTEST_INSTANCE = CONFIG.get_config_value(
526 'SERVER', 'hostname', type=str)
527
528
529def instance_for_pool(pool_name):
530 """
531 Return the hostname of the server that should be used to service a suite
532 for the specified pool.
533
534 @param pool_name: The pool (without 'pool:' to schedule the suite against.
535 @return: The correct host that should be used to service this suite run.
536 """
537 return CONFIG.get_config_value(
538 'POOL_INSTANCE_SHARDING', pool_name,
539 default=_DEFAULT_AUTOTEST_INSTANCE)
540
541
Chris Masone24b80f12012-02-14 14:18:01 -0800542def main():
Aviv Keshet1480c4a2013-03-21 16:38:31 -0700543 """
544 Entry point for run_suite script.
545 """
Chris Masone24b80f12012-02-14 14:18:01 -0800546 parser, options, args = parse_options()
Chris Masone3a850642012-07-11 11:11:18 -0700547 log_name = 'run_suite-default.log'
Chris Masone986459e2012-04-11 11:36:48 -0700548 if not options.mock_job_id:
Zdenek Behan77290c32012-06-26 17:39:47 +0200549 if args:
550 print 'Unknown arguments: ' + str(args)
551 parser.print_help()
552 return
553 if not options.build:
554 print 'Need to specify which build to use'
555 parser.print_help()
556 return
557 if not options.board:
558 print 'Need to specify board'
559 parser.print_help()
560 return
561 if not options.name:
562 print 'Need to specify suite name'
Chris Masone986459e2012-04-11 11:36:48 -0700563 parser.print_help()
564 return
Chris Masone3a850642012-07-11 11:11:18 -0700565 # convert build name from containing / to containing only _
566 log_name = 'run_suite-%s.log' % options.build.replace('/', '_')
567 log_dir = os.path.join(common.autotest_dir, 'logs')
568 if os.path.exists(log_dir):
569 log_name = os.path.join(log_dir, log_name)
Alex Miller8e75d0d2012-07-31 15:13:32 -0700570 if options.num is not None and options.num < 1:
571 print 'Number of machines must be more than 0, if specified.'
572 parser.print_help()
573 return
Alex Millerab33ddb2012-10-03 12:56:02 -0700574 if options.no_wait != 'True' and options.no_wait != 'False':
575 print 'Please specify "True" or "False" for --no_wait.'
576 parser.print_help()
577 return
578 if options.file_bugs != 'True' and options.file_bugs != 'False':
579 print 'Please specify "True" or "False" for --file_bugs.'
580 parser.print_help()
581 return
Alex Miller88762a82013-09-04 15:41:28 -0700582
583 try:
584 priority = int(options.priority)
585 except ValueError:
586 try:
587 priority = priorities.Priority.get_value(options.priority)
588 except AttributeError:
589 print 'Unknown priority level %s. Try one of %s.' % (
590 options.priority, ', '.join(priorities.Priority.names))
591
Scott Zawalski94457b72012-07-02 18:45:07 -0400592 setup_logging(logfile=log_name)
Chris Masonedfa0beba2012-03-19 11:41:47 -0700593
Simran Basi22aa9fe2012-12-07 16:37:09 -0800594 try:
Dan Shia02181f2013-01-29 14:03:32 -0800595 if not options.bypass_labstatus:
596 utils.check_lab_status(options.board)
Simran Basi41bfae42013-01-09 10:50:47 -0800597 except (error.LabIsDownException, error.BoardIsDisabledException) as e:
598 logging.debug('Error Message: %s', e)
Simran Basi22aa9fe2012-12-07 16:37:09 -0800599 print str(e)
Aviv Keshet1480c4a2013-03-21 16:38:31 -0700600 return RETURN_CODES.WARNING
Simran Basi22aa9fe2012-12-07 16:37:09 -0800601
Alex Millerc7a59522013-10-30 15:18:57 -0700602 instance_server = instance_for_pool(options.pool)
603 afe = frontend_wrappers.RetryingAFE(server=instance_server,
604 timeout_min=options.timeout_min,
Chris Masone8ac66712012-02-15 14:21:02 -0800605 delay_sec=options.delay_sec)
Alex Millerc7a59522013-10-30 15:18:57 -0700606 logging.info('Autotest instance: %s', instance_server)
Chris Masone359c0fd2012-03-13 15:18:59 -0700607
Alex Millerab33ddb2012-10-03 12:56:02 -0700608 wait = (options.no_wait == 'False')
609 file_bugs = (options.file_bugs == 'True')
Chris Masone986459e2012-04-11 11:36:48 -0700610 if options.mock_job_id:
611 job_id = int(options.mock_job_id)
612 else:
Alex Miller8e75d0d2012-07-31 15:13:32 -0700613 job_id = afe.run('create_suite_job', suite_name=options.name,
614 board=options.board, build=options.build,
Alex Millerf43d0eb2012-10-01 13:43:13 -0700615 check_hosts=wait, pool=options.pool, num=options.num,
Aviv Keshet7cd12312013-07-25 10:25:55 -0700616 file_bugs=file_bugs, priority=priority,
617 suite_args=options.suite_args)
Alex Millerc7a59522013-10-30 15:18:57 -0700618 TKO = frontend_wrappers.RetryingTKO(server=instance_server,
619 timeout_min=options.timeout_min,
Chris Masone8ac66712012-02-15 14:21:02 -0800620 delay_sec=options.delay_sec)
Scott Zawalski94457b72012-07-02 18:45:07 -0400621 logging.info('Started suite job: %s', job_id)
Simran Basi22aa9fe2012-12-07 16:37:09 -0800622
Aviv Keshet1480c4a2013-03-21 16:38:31 -0700623 code = RETURN_CODES.OK
J. Richard Barnette712eb402013-08-13 18:03:00 -0700624 if wait:
625 while not afe.get_jobs(id=job_id, finished=True):
Chris Masone24b80f12012-02-14 14:18:01 -0800626 time.sleep(1)
J. Richard Barnette712eb402013-08-13 18:03:00 -0700627
Scott Zawalski0acfe112012-03-06 09:21:44 -0500628 views = TKO.run('get_detailed_test_views', afe_job_id=job_id)
Dan Shif327b912013-07-18 15:35:05 -0700629 # The intended behavior is to refrain from recording stats if the suite
630 # was aborted (either by a user or through the golo rpc). Since all the
631 # views associated with the afe_job_id of the suite contain the keyvals
632 # of the suite and not the individual tests themselves, we can achieve
633 # this without digging through the views.
634 is_aborted = any([view['job_keyvals'].get('aborted_by')
635 for view in views])
beeps619189a2013-04-10 20:56:09 -0700636 width = max((len(_full_test_name(job_id, view, options.build,
637 options.name)) for view in views)) + 3
Scott Zawalski0acfe112012-03-06 09:21:44 -0500638
Chris Masoned9f13c52012-08-29 10:37:08 -0700639 relevant_views = filter(job_status.view_is_relevant, views)
Scott Zawalski0acfe112012-03-06 09:21:44 -0500640 if not relevant_views:
Zdenek Behan150fbd62012-04-06 17:20:01 +0200641 # The main suite job most likely failed in SERVER_JOB.
642 relevant_views = views
Scott Zawalski0acfe112012-03-06 09:21:44 -0500643
Chris Masoneb61b4052012-04-30 14:35:28 -0700644 timings = Timings()
Craig Harrison25eb0f32012-08-23 16:48:49 -0700645 web_links = []
646 buildbot_links = []
Chris Masoned9f13c52012-08-29 10:37:08 -0700647 for view in relevant_views:
648 timings.RecordTiming(view)
649 if job_status.view_is_for_suite_prep(view):
650 view['test_name'] = 'Suite prep'
Chris Masone3a850642012-07-11 11:11:18 -0700651
beeps619189a2013-04-10 20:56:09 -0700652 job_name, experimental = get_view_info(job_id, view, options.build,
653 options.name)[:2]
654 test_view = _full_test_name(job_id, view, options.build,
655 options.name).ljust(width)
Chris Masoned9f13c52012-08-29 10:37:08 -0700656 logging.info("%s%s", test_view, get_pretty_status(view['status']))
beeps8ead53c2013-04-26 19:12:46 -0700657
J. Richard Barnettee7b98bb2013-08-21 16:34:16 -0700658 # It's important that we use the test name in the view
659 # and not the name returned by full_test_name, as this
660 # was the name inserted after the test ran, e.g. for an
661 # aborted test full_test_name will return
662 # 'experimental_testname' but the view and the bug_id
663 # keyval will use '/build/suite/experimental_testname'.
J. Richard Barnetteb9c911d2013-08-23 11:24:21 -0700664 bug_info = tools.get_test_failure_bug_info(
J. Richard Barnettee7b98bb2013-08-21 16:34:16 -0700665 view['job_keyvals'], view['test_name'])
beeps8ead53c2013-04-26 19:12:46 -0700666
Alex Millerc7a59522013-10-30 15:18:57 -0700667 link = LogLink(test_view, instance_server, job_name, bug_info)
Craig Harrison25eb0f32012-08-23 16:48:49 -0700668 web_links.append(link)
Chris Masoned9f13c52012-08-29 10:37:08 -0700669
J. Richard Barnette02f815e2013-08-22 16:28:32 -0700670 # Don't show links on the buildbot waterfall for tests with
671 # GOOD status.
Chris Masoned9f13c52012-08-29 10:37:08 -0700672 if view['status'] != 'GOOD':
673 logging.info("%s %s: %s", test_view, view['status'],
674 view['reason'])
Chris Masone8906ab12012-07-23 15:37:56 -0700675 if view['status'] == 'TEST_NA':
676 # Didn't run; nothing to do here!
677 continue
J. Richard Barnette02f815e2013-08-22 16:28:32 -0700678 buildbot_links.append(link)
Aviv Keshet1480c4a2013-03-21 16:38:31 -0700679 if code == RETURN_CODES.ERROR:
Zdenek Behan150fbd62012-04-06 17:20:01 +0200680 # Failed already, no need to worry further.
681 continue
beeps619189a2013-04-10 20:56:09 -0700682
683 # Any non experimental test that has a status other than WARN
684 # or GOOD will result in the tree closing. Experimental tests
685 # will not close the tree, even if they have been aborted.
beepsb8117ff2013-10-01 13:50:07 -0700686 if not experimental:
687 if view['status'] == 'WARN':
688 code = RETURN_CODES.WARNING
689 elif is_fail_status(view['status']):
690 code = RETURN_CODES.ERROR
Aviv Keshet1480c4a2013-03-21 16:38:31 -0700691
Dan Shif327b912013-07-18 15:35:05 -0700692 # Do not record stats for aborted suites.
693 if not is_aborted:
694 timings.SendResultsToStatsd(options.name, options.build,
695 options.board)
Scott Zawalski94457b72012-07-02 18:45:07 -0400696 logging.info(timings)
Craig Harrison25eb0f32012-08-23 16:48:49 -0700697 logging.info('\n'
698 'Links to test logs:')
699 for link in web_links:
Craig Harrisond8451572012-08-31 10:29:33 -0700700 logging.info(link.GenerateTextLink())
Aviv Keshet1480c4a2013-03-21 16:38:31 -0700701
702 try:
703 returnmessage = RETURN_CODES.get_string(code)
704 except ValueError:
705 returnmessage = 'UNKNOWN'
706 logging.info('\n'
707 'Will return from run_suite with status: %s',
708 returnmessage)
709
Craig Harrison25eb0f32012-08-23 16:48:49 -0700710 logging.info('\n'
711 'Output below this line is for buildbot consumption:')
712 for link in buildbot_links:
713 logging.info(link.GenerateBuildbotLink())
Chris Masoned5939fe2012-03-13 10:11:06 -0700714 else:
Scott Zawalski94457b72012-07-02 18:45:07 -0400715 logging.info('Created suite job: %r', job_id)
Alex Millera05498f2013-11-01 16:16:21 -0700716 link = LogLink(options.name, instance_server,
717 '%s-%s' % (job_id, getpass.getuser()))
Craig Harrison25eb0f32012-08-23 16:48:49 -0700718 logging.info(link.GenerateBuildbotLink())
Scott Zawalski94457b72012-07-02 18:45:07 -0400719 logging.info('--no_wait specified; Exiting.')
Chris Masone24b80f12012-02-14 14:18:01 -0800720 return code
721
722if __name__ == "__main__":
723 sys.exit(main())