blob: e86fe12012155f0bc426bc59958ed711cf66906b [file] [log] [blame]
Chris Masone24b80f12012-02-14 14:18:01 -08001#!/usr/bin/python
2#
3# Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
4# Use of this source code is governed by a BSD-style license that can be
5# found in the LICENSE file.
6
Fang Deng5a43be62014-05-07 17:17:04 -07007
Chris Masone24b80f12012-02-14 14:18:01 -08008"""Tool for running suites of tests and waiting for completion.
9
Fang Deng5a43be62014-05-07 17:17:04 -070010The desired test suite will be scheduled with autotest. By default,
Chris Masone24b80f12012-02-14 14:18:01 -080011this tool will block until the job is complete, printing a summary
12at the end. Error conditions result in exceptions.
13
14This is intended for use only with Chrome OS test suits that leverage the
15dynamic suite infrastructure in server/cros/dynamic_suite.py.
Fang Deng5a43be62014-05-07 17:17:04 -070016
17This script exits with one of the following codes:
180 - OK: Suite finished successfully
191 - ERROR: Test(s) failed, or hits its own timeout
Fang Dengaeab6172014-05-07 17:17:04 -0700202 - WARNING: Test(s) raised a warning or passed on retry, none failed/timed out.
Fang Deng5a43be62014-05-07 17:17:04 -0700213 - INFRA_FAILURE: Infrastructure related issues, e.g.
22 * Lab is down
23 * Too many duts (defined as a constant) in repair failed status
24 * Suite job issues, like bug in dynamic suite,
25 user aborted the suite, lose a drone/all devservers/rpc server,
26 0 tests ran, etc.
274 - SUITE_TIMEOUT: Suite timed out, some tests ran,
28 none failed by the time the suite job was aborted. This will cover,
29 but not limited to, the following cases:
30 * A devserver failure that manifests as a timeout
31 * No DUTs available midway through a suite
32 * Provision/Reset/Cleanup took longer time than expected for new image
33 * A regression in scheduler tick time.
Chris Masone24b80f12012-02-14 14:18:01 -080034"""
35
Fang Deng5a43be62014-05-07 17:17:04 -070036
Prashanth B923ca262014-03-14 12:36:29 -070037import datetime as datetime_base
Fang Dengdd20e452014-04-07 15:39:47 -070038import getpass, logging, optparse, os, re, sys, time
Chris Masonecfa7efc2012-09-06 16:00:07 -070039from datetime import datetime
40
Chris Masone24b80f12012-02-14 14:18:01 -080041import common
Chris Masonecfa7efc2012-09-06 16:00:07 -070042
Fang Deng5a43be62014-05-07 17:17:04 -070043from autotest_lib.client.common_lib import error
J. Richard Barnette3cbd76b2013-11-27 12:11:25 -080044from autotest_lib.client.common_lib import global_config, enum
45from autotest_lib.client.common_lib import priorities
Michael Liangda8c60a2014-06-03 13:24:51 -070046from autotest_lib.client.common_lib.cros.graphite import stats
Prashanth B923ca262014-03-14 12:36:29 -070047from autotest_lib.frontend.afe.json_rpc import proxy
J. Richard Barnette3cbd76b2013-11-27 12:11:25 -080048from autotest_lib.server import utils
Chris Masone44e4d6c2012-08-15 14:25:53 -070049from autotest_lib.server.cros.dynamic_suite import constants
Chris Masoneb4935552012-08-14 12:05:54 -070050from autotest_lib.server.cros.dynamic_suite import frontend_wrappers
51from autotest_lib.server.cros.dynamic_suite import job_status
Prashanth B923ca262014-03-14 12:36:29 -070052from autotest_lib.server.cros.dynamic_suite import reporting_utils
J. Richard Barnettee7b98bb2013-08-21 16:34:16 -070053from autotest_lib.server.cros.dynamic_suite import tools
Prashanth B923ca262014-03-14 12:36:29 -070054from autotest_lib.site_utils import diagnosis_utils
Chris Masone24b80f12012-02-14 14:18:01 -080055
Chris Masone1120cdf2012-02-27 17:35:07 -080056CONFIG = global_config.global_config
57
Simran Basi22aa9fe2012-12-07 16:37:09 -080058# Return code that will be sent back to autotest_rpc_server.py
Fang Deng5a43be62014-05-07 17:17:04 -070059RETURN_CODES = enum.Enum(
60 'OK', 'ERROR', 'WARNING', 'INFRA_FAILURE', 'SUITE_TIMEOUT')
61# The severity of return code. If multiple codes
62# apply, the script should always return the severest one.
63# E.g. if we have a test failure and the suite also timed out,
64# we should return 'ERROR'.
65SEVERITY = {RETURN_CODES.OK: 0,
66 RETURN_CODES.WARNING: 1,
67 RETURN_CODES.INFRA_FAILURE: 2,
68 RETURN_CODES.SUITE_TIMEOUT: 3,
69 RETURN_CODES.ERROR: 4}
70
71
72def get_worse_code(code1, code2):
Fang Dengaeab6172014-05-07 17:17:04 -070073 """Compare the severity of two codes and return the worse code.
Fang Deng5a43be62014-05-07 17:17:04 -070074
75 @param code1: An enum value of RETURN_CODES
76 @param code2: An enum value of RETURN_CODES
77
Fang Dengaeab6172014-05-07 17:17:04 -070078 @returns: the more severe one between code1 and code2.
Fang Deng5a43be62014-05-07 17:17:04 -070079
80 """
Fang Dengaeab6172014-05-07 17:17:04 -070081 return code1 if SEVERITY[code1] >= SEVERITY[code2] else code2
Simran Basi22aa9fe2012-12-07 16:37:09 -080082
Chris Masonedfa0beba2012-03-19 11:41:47 -070083
Scott Zawalski94457b72012-07-02 18:45:07 -040084def setup_logging(logfile=None):
85 """Setup basic logging with all logging info stripped.
86
87 Calls to logging will only show the message. No severity is logged.
88
89 @param logfile: If specified dump output to a file as well.
90 """
Dan Shi8a2dddd2013-08-09 12:58:44 -070091 # Remove all existing handlers. client/common_lib/logging_config adds
92 # a StreamHandler to logger when modules are imported, e.g.,
93 # autotest_lib.client.bin.utils. A new StreamHandler will be added here to
94 # log only messages, not severity.
95 logging.getLogger().handlers = []
96
Scott Zawalski94457b72012-07-02 18:45:07 -040097 screen_handler = logging.StreamHandler()
98 screen_handler.setFormatter(logging.Formatter('%(message)s'))
99 logging.getLogger().addHandler(screen_handler)
100 logging.getLogger().setLevel(logging.INFO)
101 if logfile:
102 file_handler = logging.FileHandler(logfile)
103 file_handler.setLevel(logging.DEBUG)
104 logging.getLogger().addHandler(file_handler)
Chris Masonedfa0beba2012-03-19 11:41:47 -0700105
106
Chris Masone24b80f12012-02-14 14:18:01 -0800107def parse_options():
Aviv Keshet1480c4a2013-03-21 16:38:31 -0700108 #pylint: disable-msg=C0111
Zdenek Behan77290c32012-06-26 17:39:47 +0200109 usage = "usage: %prog [options]"
Chris Masone24b80f12012-02-14 14:18:01 -0800110 parser = optparse.OptionParser(usage=usage)
111 parser.add_option("-b", "--board", dest="board")
112 parser.add_option("-i", "--build", dest="build")
Chris Masone359c0fd2012-03-13 15:18:59 -0700113 # This should just be a boolean flag, but the autotest "proxy" code
114 # can't handle flags that don't take arguments.
Alex Millerab33ddb2012-10-03 12:56:02 -0700115 parser.add_option("-n", "--no_wait", dest="no_wait", default="False",
116 help='Must pass "True" or "False" if used.')
Alex Miller0032e932013-10-23 12:52:58 -0700117 # If you really want no pool, --pool="" will do it. USE WITH CARE.
118 parser.add_option("-p", "--pool", dest="pool", default="suites")
Chris Masone24b80f12012-02-14 14:18:01 -0800119 parser.add_option("-s", "--suite_name", dest="name")
Simran Basi25effe32013-11-26 13:02:11 -0800120 parser.add_option("-a", "--afe_timeout_mins", dest="afe_timeout_mins",
121 default=30)
122 parser.add_option("-t", "--timeout_mins", dest="timeout_mins",
123 default=1440)
Chris Masone8ac66712012-02-15 14:21:02 -0800124 parser.add_option("-d", "--delay_sec", dest="delay_sec", default=10)
Chris Masone986459e2012-04-11 11:36:48 -0700125 parser.add_option("-m", "--mock_job_id", dest="mock_job_id",
126 help="Skips running suite; creates report for given ID.")
Alex Miller05a2fff2012-09-10 10:14:34 -0700127 parser.add_option("-u", "--num", dest="num", type="int", default=None,
Chris Masone8906ab12012-07-23 15:37:56 -0700128 help="Run on at most NUM machines.")
Alex Millerf43d0eb2012-10-01 13:43:13 -0700129 # Same boolean flag issue applies here.
Alex Millerab33ddb2012-10-03 12:56:02 -0700130 parser.add_option("-f", "--file_bugs", dest="file_bugs", default='False',
131 help='File bugs on test failures. Must pass "True" or '
132 '"False" if used.')
Dan Shia02181f2013-01-29 14:03:32 -0800133 parser.add_option("-l", "--bypass_labstatus", dest="bypass_labstatus",
134 action="store_true", help='Bypass lab status check.')
Alex Miller88762a82013-09-04 15:41:28 -0700135 # We allow either a number or a string for the priority. This way, if you
136 # know what you're doing, one can specify a custom priority level between
137 # other levels.
138 parser.add_option("-r", "--priority", dest="priority",
139 default=priorities.Priority.DEFAULT,
140 action="store", help="Priority of suite")
Fang Deng058860c2014-05-15 15:41:50 -0700141 parser.add_option('--retry', dest='retry', default='False',
142 action='store', help='Enable test retry. '
143 'Must pass "True" or "False" if used.')
Dan Shi8de6d1b2014-06-12 09:10:37 -0700144 parser.add_option('--minimum_duts', dest='minimum_duts', type=int,
145 default=0, action='store',
146 help='Minimum Number of available machines required to '
147 'run the suite. Default is set to 0, which means do not '
148 'force the check of available machines before running '
149 'the suite.')
Aviv Keshet7cd12312013-07-25 10:25:55 -0700150 parser.add_option("--suite_args", dest="suite_args",
151 default=None, action="store",
152 help="Argument string for suite control file.")
Alex Millerf43d0eb2012-10-01 13:43:13 -0700153
Chris Masone24b80f12012-02-14 14:18:01 -0800154 options, args = parser.parse_args()
155 return parser, options, args
156
157
Fang Deng0454e632014-04-07 15:39:47 -0700158def verify_options_and_args(options, args):
Fang Dengdd20e452014-04-07 15:39:47 -0700159 """Verify the validity of options and args.
160
Fang Dengdd20e452014-04-07 15:39:47 -0700161 @param options: The parsed options to verify.
162 @param args: The parsed args to verify.
163
164 @returns: True if verification passes, False otherwise.
165
166 """
167 if not options.mock_job_id:
168 if args:
169 print 'Unknown arguments: ' + str(args)
170 return False
171 if not options.build:
172 print 'Need to specify which build to use'
173 return False
174 if not options.board:
175 print 'Need to specify board'
176 return False
177 if not options.name:
178 print 'Need to specify suite name'
179 return False
180 if options.num is not None and options.num < 1:
181 print 'Number of machines must be more than 0, if specified.'
182 return False
183 if options.no_wait != 'True' and options.no_wait != 'False':
184 print 'Please specify "True" or "False" for --no_wait.'
185 return False
186 if options.file_bugs != 'True' and options.file_bugs != 'False':
187 print 'Please specify "True" or "False" for --file_bugs.'
188 return False
Fang Deng058860c2014-05-15 15:41:50 -0700189 if options.retry != 'True' and options.retry != 'False':
190 print 'Please specify "True" or "False" for --retry'
191 return False
192 if options.no_wait == 'True' and options.retry == 'True':
193 print 'Test retry is not available when using --no_wait=True'
Fang Dengdd20e452014-04-07 15:39:47 -0700194 return True
195
196
Chris Masone24b80f12012-02-14 14:18:01 -0800197def get_pretty_status(status):
Aviv Keshet1480c4a2013-03-21 16:38:31 -0700198 """
199 Converts a status string into a pretty-for-printing string.
200
201 @param status: Status to convert.
202
203 @return: Returns pretty string.
204 GOOD -> [ PASSED ]
205 TEST_NA -> [ INFO ]
206 other -> [ FAILED ]
207 """
Chris Masone24b80f12012-02-14 14:18:01 -0800208 if status == 'GOOD':
209 return '[ PASSED ]'
Chris Masone8906ab12012-07-23 15:37:56 -0700210 elif status == 'TEST_NA':
211 return '[ INFO ]'
Chris Masone24b80f12012-02-14 14:18:01 -0800212 return '[ FAILED ]'
213
Fang Dengdd20e452014-04-07 15:39:47 -0700214
Chris Masone24b80f12012-02-14 14:18:01 -0800215
Craig Harrison25eb0f32012-08-23 16:48:49 -0700216class LogLink(object):
J. Richard Barnetteb9c911d2013-08-23 11:24:21 -0700217 """Information needed to record a link in the logs.
Craig Harrison25eb0f32012-08-23 16:48:49 -0700218
J. Richard Barnetteb9c911d2013-08-23 11:24:21 -0700219 Depending on context and the information provided at
220 construction time, the link may point to either to log files for
221 a job, or to a bug filed for a failure in the job.
Craig Harrison25eb0f32012-08-23 16:48:49 -0700222
J. Richard Barnetteb9c911d2013-08-23 11:24:21 -0700223 @var anchor The link text.
224 @var url The link url.
225 @var bug_id Id of a bug to link to, or None.
226 """
227
228 _BUG_URL_PREFIX = CONFIG.get_config_value('BUG_REPORTING',
229 'tracker_url')
J. Richard Barnetteb9c911d2013-08-23 11:24:21 -0700230 _URL_PATTERN = CONFIG.get_config_value('CROS',
231 'log_url_pattern', type=str)
232
233
Fang Dengaeab6172014-05-07 17:17:04 -0700234 def __init__(self, anchor, server, job_string, bug_info=None, reason=None,
235 retry_count=0):
J. Richard Barnetteb9c911d2013-08-23 11:24:21 -0700236 """Initialize the LogLink by generating the log URL.
237
238 @param anchor The link text.
Alex Millerc7a59522013-10-30 15:18:57 -0700239 @param server The hostname of the server this suite ran on.
J. Richard Barnetteb9c911d2013-08-23 11:24:21 -0700240 @param job_string The job whose logs we'd like to link to.
241 @param bug_info Info about the bug, if one was filed.
Fang Deng53c6ff52014-02-24 17:51:24 -0800242 @param reason A string representing the reason of failure if any.
Fang Dengaeab6172014-05-07 17:17:04 -0700243 @param retry_count How many times the test has been retried.
Craig Harrison25eb0f32012-08-23 16:48:49 -0700244 """
245 self.anchor = anchor
Alex Millerc7a59522013-10-30 15:18:57 -0700246 self.url = self._URL_PATTERN % (server, job_string)
Fang Deng53c6ff52014-02-24 17:51:24 -0800247 self.reason = reason
Fang Dengaeab6172014-05-07 17:17:04 -0700248 self.retry_count = retry_count
J. Richard Barnetteb9c911d2013-08-23 11:24:21 -0700249 if bug_info:
250 self.bug_id, self.bug_count = bug_info
251 else:
252 self.bug_id = None
253 self.bug_count = None
Craig Harrison25eb0f32012-08-23 16:48:49 -0700254
255
256 def GenerateBuildbotLink(self):
J. Richard Barnetteb9c911d2013-08-23 11:24:21 -0700257 """Generate a link formatted to meet buildbot expectations.
258
259 If there is a bug associated with this link, report that;
260 otherwise report a link to the job logs.
Craig Harrison25eb0f32012-08-23 16:48:49 -0700261
262 @return A link formatted for the buildbot log annotator.
263 """
Fang Dengaeab6172014-05-07 17:17:04 -0700264 info_strings = []
265 if self.retry_count > 0:
266 info_strings.append('retry_count: %d' % self.retry_count)
267
J. Richard Barnetteb9c911d2013-08-23 11:24:21 -0700268 if self.bug_id:
269 url = '%s%s' % (self._BUG_URL_PREFIX, self.bug_id)
beepsad4daf82013-09-26 10:07:33 -0700270 if self.bug_count is None:
Fang Dengaeab6172014-05-07 17:17:04 -0700271 bug_info = 'unknown number of reports'
beepsad4daf82013-09-26 10:07:33 -0700272 elif self.bug_count == 1:
Fang Dengaeab6172014-05-07 17:17:04 -0700273 bug_info = 'new report'
J. Richard Barnetteb9c911d2013-08-23 11:24:21 -0700274 else:
Fang Dengaeab6172014-05-07 17:17:04 -0700275 bug_info = '%s reports' % self.bug_count
276 info_strings.append(bug_info)
J. Richard Barnetteb9c911d2013-08-23 11:24:21 -0700277 else:
278 url = self.url
Fang Deng53c6ff52014-02-24 17:51:24 -0800279
280 if self.reason:
Fang Dengaeab6172014-05-07 17:17:04 -0700281 info_strings.append(self.reason.strip())
282
283 if info_strings:
284 info = ', '.join(info_strings)
285 anchor_text = '%(anchor)s: %(info)s' % {
286 'anchor': self.anchor.strip(), 'info': info}
287 else:
288 anchor_text = self.anchor.strip()
Fang Deng53c6ff52014-02-24 17:51:24 -0800289
290 return '@@@STEP_LINK@%s@%s@@@'% (anchor_text, url)
Craig Harrison25eb0f32012-08-23 16:48:49 -0700291
292
Craig Harrisond8451572012-08-31 10:29:33 -0700293 def GenerateTextLink(self):
J. Richard Barnetteb9c911d2013-08-23 11:24:21 -0700294 """Generate a link to the job's logs, for consumption by a human.
Craig Harrison25eb0f32012-08-23 16:48:49 -0700295
Craig Harrisond8451572012-08-31 10:29:33 -0700296 @return A link formatted for human readability.
Craig Harrison25eb0f32012-08-23 16:48:49 -0700297 """
Fang Deng53c6ff52014-02-24 17:51:24 -0800298 return '%s%s' % (self.anchor, self.url)
Craig Harrison25eb0f32012-08-23 16:48:49 -0700299
300
Chris Masoneb61b4052012-04-30 14:35:28 -0700301class Timings(object):
302 """Timings for important events during a suite.
303
304 All timestamps are datetime.datetime objects.
305
Fang Dengdd20e452014-04-07 15:39:47 -0700306 @var suite_job_id: the afe job id of the suite job for which
307 we are recording the timing for.
308 @var download_start_time: the time the devserver starts staging
309 the build artifacts. Recorded in create_suite_job.
310 @var payload_end_time: the time when the artifacts only necessary to start
311 installsing images onto DUT's are staged.
312 Recorded in create_suite_job.
313 @var artifact_end_time: the remaining artifacts are downloaded after we kick
314 off the reimaging job, at which point we record
315 artifact_end_time. Recorded in dynamic_suite.py.
Chris Masoneb61b4052012-04-30 14:35:28 -0700316 @var suite_start_time: the time the suite started.
Chris Masoneb61b4052012-04-30 14:35:28 -0700317 @var tests_start_time: the time the first test started running.
Fang Dengdd20e452014-04-07 15:39:47 -0700318 @var tests_end_time: the time the last test finished running.
Chris Masoneb61b4052012-04-30 14:35:28 -0700319 """
beeps6f02d192013-03-22 13:15:49 -0700320
Fang Dengdd20e452014-04-07 15:39:47 -0700321 def __init__(self, suite_job_id):
322 self.suite_job_id = suite_job_id
323 # Timings related to staging artifacts on devserver.
324 self.download_start_time = None
325 self.payload_end_time = None
326 self.artifact_end_time = None
beeps6f02d192013-03-22 13:15:49 -0700327
Fang Dengdd20e452014-04-07 15:39:47 -0700328 # The test_start_time, but taken off the view that corresponds to the
329 # suite instead of an individual test.
330 self.suite_start_time = None
beeps6f02d192013-03-22 13:15:49 -0700331
Fang Dengdd20e452014-04-07 15:39:47 -0700332 # Earliest and Latest tests in the set of TestViews passed to us.
333 self.tests_start_time = None
334 self.tests_end_time = None
335
Chris Masoneb61b4052012-04-30 14:35:28 -0700336
337
beeps6f02d192013-03-22 13:15:49 -0700338 def _GetDatetime(self, timing_string, timing_string_format):
339 """
340 Formats the timing_string according to the timing_string_format.
341
342 @param timing_string: A datetime timing string.
343 @param timing_string_format: Format of the time in timing_string.
344 @return: A datetime object for the given timing string.
345 """
346 try:
347 return datetime.strptime(timing_string, timing_string_format)
348 except TypeError:
349 return None
350
351
Chris Masoned9f13c52012-08-29 10:37:08 -0700352 def RecordTiming(self, view):
353 """Given a test report view, extract and record pertinent time info.
Chris Masoneb61b4052012-04-30 14:35:28 -0700354
355 get_detailed_test_views() returns a list of entries that provide
356 info about the various parts of a suite run. This method can take
357 any one of these entries and look up timestamp info we might want
358 and record it.
359
Chris Masonecfa7efc2012-09-06 16:00:07 -0700360 If timestamps are unavailable, datetime.datetime.min/max will be used.
361
Fang Dengaeab6172014-05-07 17:17:04 -0700362 @param view: A TestView object.
Chris Masoneb61b4052012-04-30 14:35:28 -0700363 """
Chris Masonecfa7efc2012-09-06 16:00:07 -0700364 start_candidate = datetime.min
365 end_candidate = datetime.max
366 if view['test_started_time']:
367 start_candidate = datetime.strptime(view['test_started_time'],
368 job_status.TIME_FMT)
369 if view['test_finished_time']:
370 end_candidate = datetime.strptime(view['test_finished_time'],
371 job_status.TIME_FMT)
372
Fang Dengaeab6172014-05-07 17:17:04 -0700373 if view.get_testname() == TestView.SUITE_PREP:
Chris Masoneb61b4052012-04-30 14:35:28 -0700374 self.suite_start_time = start_candidate
Chris Masoneb61b4052012-04-30 14:35:28 -0700375 else:
376 self._UpdateFirstTestStartTime(start_candidate)
377 self._UpdateLastTestEndTime(end_candidate)
Fang Dengdd20e452014-04-07 15:39:47 -0700378 if view['afe_job_id'] == self.suite_job_id and 'job_keyvals' in view:
Chris Masoned9f13c52012-08-29 10:37:08 -0700379 keyvals = view['job_keyvals']
beeps6f02d192013-03-22 13:15:49 -0700380 self.download_start_time = self._GetDatetime(
381 keyvals.get(constants.DOWNLOAD_STARTED_TIME),
382 job_status.TIME_FMT)
383
384 self.payload_end_time = self._GetDatetime(
385 keyvals.get(constants.PAYLOAD_FINISHED_TIME),
386 job_status.TIME_FMT)
387
388 self.artifact_end_time = self._GetDatetime(
389 keyvals.get(constants.ARTIFACT_FINISHED_TIME),
390 job_status.TIME_FMT)
Chris Masone44e4d6c2012-08-15 14:25:53 -0700391
Chris Masoneb61b4052012-04-30 14:35:28 -0700392
393 def _UpdateFirstTestStartTime(self, candidate):
394 """Update self.tests_start_time, iff candidate is an earlier time.
395
396 @param candidate: a datetime.datetime object.
397 """
398 if not self.tests_start_time or candidate < self.tests_start_time:
399 self.tests_start_time = candidate
400
401
402 def _UpdateLastTestEndTime(self, candidate):
403 """Update self.tests_end_time, iff candidate is a later time.
404
405 @param candidate: a datetime.datetime object.
406 """
407 if not self.tests_end_time or candidate > self.tests_end_time:
408 self.tests_end_time = candidate
409
410
411 def __str__(self):
412 return ('\n'
413 'Suite timings:\n'
Chris Masonea8066a92012-05-01 16:52:31 -0700414 'Downloads started at %s\n'
415 'Payload downloads ended at %s\n'
Chris Masoneb61b4052012-04-30 14:35:28 -0700416 'Suite started at %s\n'
Chris Masonea8066a92012-05-01 16:52:31 -0700417 'Artifact downloads ended (at latest) at %s\n'
Chris Masoneb61b4052012-04-30 14:35:28 -0700418 'Testing started at %s\n'
Chris Masonea8066a92012-05-01 16:52:31 -0700419 'Testing ended at %s\n' % (self.download_start_time,
420 self.payload_end_time,
421 self.suite_start_time,
Chris Masonea8066a92012-05-01 16:52:31 -0700422 self.artifact_end_time,
Chris Masoneb61b4052012-04-30 14:35:28 -0700423 self.tests_start_time,
424 self.tests_end_time))
425
426
beeps6f02d192013-03-22 13:15:49 -0700427 def _GetDataKeyForStatsd(self, suite, build, board):
428 """
429 Constructs the key used for logging statsd timing data.
430
431 @param suite: scheduled suite that we want to record the results of.
432 @param build: The build string. This string should have a consistent
433 format eg: x86-mario-release/R26-3570.0.0. If the format of this
434 string changes such that we can't determine build_type or branch
435 we give up and use the parametes we're sure of instead (suite,
436 board). eg:
437 1. build = x86-alex-pgo-release/R26-3570.0.0
438 branch = 26
439 build_type = pgo-release
440 2. build = lumpy-paladin/R28-3993.0.0-rc5
441 branch = 28
442 build_type = paladin
443 @param board: The board that this suite ran on.
444 @return: The key used to log timing information in statsd.
445 """
446 try:
J. Richard Barnette3cbd76b2013-11-27 12:11:25 -0800447 _board, build_type, branch = utils.ParseBuildName(build)[:3]
448 except utils.ParseBuildNameException as e:
beeps6f02d192013-03-22 13:15:49 -0700449 logging.error(str(e))
450 branch = 'Unknown'
451 build_type = 'Unknown'
452 else:
453 embedded_str = re.search(r'x86-\w+-(.*)', _board)
454 if embedded_str:
455 build_type = embedded_str.group(1) + '-' + build_type
456
457 data_key_dict = {
458 'board': board,
459 'branch': branch,
460 'build_type': build_type,
461 'suite': suite,
462 }
463 return ('run_suite.%(board)s.%(build_type)s.%(branch)s.%(suite)s'
464 % data_key_dict)
465
466
467 def SendResultsToStatsd(self, suite, build, board):
468 """
469 Sends data to statsd.
470
471 1. Makes a data_key of the form: run_suite.$board.$branch.$suite
472 eg: stats/gauges/<hostname>/run_suite/<board>/<branch>/<suite>/
473 2. Computes timings for several start and end event pairs.
Alex Miller9a1987a2013-08-21 15:51:16 -0700474 3. Sends all timing values to statsd.
beeps6f02d192013-03-22 13:15:49 -0700475
476 @param suite: scheduled suite that we want to record the results of.
477 @param build: the build that this suite ran on.
478 eg: 'lumpy-release/R26-3570.0.0'
479 @param board: the board that this suite ran on.
480 """
481 if sys.version_info < (2, 7):
482 logging.error('Sending run_suite perf data to statsd requires'
483 'python 2.7 or greater.')
484 return
485
486 data_key = self._GetDataKeyForStatsd(suite, build, board)
487
488 # Since we don't want to try subtracting corrupted datetime values
489 # we catch TypeErrors in _GetDatetime and insert None instead. This
490 # means that even if, say, keyvals.get(constants.ARTIFACT_FINISHED_TIME)
491 # returns a corrupt value the member artifact_end_time is set to None.
492 if self.download_start_time:
493 if self.payload_end_time:
494 stats.Timer(data_key).send('payload_download_time',
495 (self.payload_end_time -
496 self.download_start_time).total_seconds())
497
498 if self.artifact_end_time:
499 stats.Timer(data_key).send('artifact_download_time',
500 (self.artifact_end_time -
501 self.download_start_time).total_seconds())
502
503 if self.tests_end_time:
504 if self.suite_start_time:
505 stats.Timer(data_key).send('suite_run_time',
506 (self.tests_end_time -
507 self.suite_start_time).total_seconds())
508
509 if self.tests_start_time:
510 stats.Timer(data_key).send('tests_run_time',
511 (self.tests_end_time -
512 self.tests_start_time).total_seconds())
513
beeps6f02d192013-03-22 13:15:49 -0700514
Alex Millerc7a59522013-10-30 15:18:57 -0700515_DEFAULT_AUTOTEST_INSTANCE = CONFIG.get_config_value(
516 'SERVER', 'hostname', type=str)
517
518
519def instance_for_pool(pool_name):
520 """
521 Return the hostname of the server that should be used to service a suite
522 for the specified pool.
523
524 @param pool_name: The pool (without 'pool:' to schedule the suite against.
525 @return: The correct host that should be used to service this suite run.
526 """
527 return CONFIG.get_config_value(
528 'POOL_INSTANCE_SHARDING', pool_name,
529 default=_DEFAULT_AUTOTEST_INSTANCE)
530
531
Fang Dengaeab6172014-05-07 17:17:04 -0700532class TestView(object):
533 """Represents a test view and provides a set of helper functions."""
534
535
536 SUITE_PREP = 'Suite prep'
537
538
539 def __init__(self, view, suite_job_id, suite_name, build):
540 """Init a TestView object representing a tko test view.
541
542 @param view: A dictionary representing a tko test view.
543 @param suite_job_id: The id of the suite job
544 that kicks off the test.
545 @param suite_name: The name of the suite
546 that the test belongs to.
547 @param build: The build for which the test is run.
548 """
549 self.view = view
550 self.suite_job_id = suite_job_id
551 self.suite_name = suite_name
552 self.build = build
553 self.is_suite_view = suite_job_id == view['afe_job_id']
554 # This is the test name that will be shown in the output.
555 self.testname = None
556
557
558 def __getitem__(self, key):
559 """Overload __getitem__ so that we can still use []
560
561 @param key: A key of the tko test view.
562
563 @returns: The value of an attribute in the view.
564
565 """
566 return self.view[key]
567
568
569 def __setitem__(self, key, item):
570 """Overload __setitem so that we can still use []
571
572 @param key: A key of the tko test view.
573 @param item: A value to set
574
575 """
576 self.view[key] = item
577
578
579 def __iter__(self):
580 """Overload __iter__ so that it supports 'in' operator."""
581 return iter(self.view)
582
583
584 def get_testname(self):
585 """Get test name that should be shown in the output.
586
587 Formalize the test_name we got from the test view.
588
589 Remove 'build/suite' prefix if any. And append 'experimental' prefix
590 for experimental tests if their names do not start with 'experimental'.
591
592 If one runs a test in control file via the following code,
593 job.runtest('my_Test', tag='tag')
594 for most of the cases, view['test_name'] would look like 'my_Test.tag'.
595 If this is the case, this method will just return the original
596 test name, i.e. 'my_Test.tag'.
597
598 There are four special cases.
599 1) A test view is for the suite job's SERVER_JOB.
600 In this case, this method will return 'Suite prep'.
601
602 2) A test view is of a child job and for a SERVER_JOB or CLIENT_JOB.
603 In this case, we will take the job name, remove the build/suite
604 prefix from the job name, and append the rest to 'SERVER_JOB'
605 or 'CLIENT_JOB' as a prefix. So the names returned by this
606 method will look like:
607 'experimental_Telemetry Smoothness Measurement_SERVER_JOB'
608 'experimental_dummy_Pass_SERVER_JOB'
609 'dummy_Fail_SERVER_JOB'
610
611 2) A test view is of a suite job and its status is ABORT.
612 In this case, the view['test_name'] is the child job's name.
613 If it is an experimental test, 'experimental' will be part
614 of the name. For instance,
615 'lumpy-release/R35-5712.0.0/perf_v2/
616 experimental_Telemetry Smoothness Measurement'
617 'lumpy-release/R35-5712.0.0/dummy/experimental_dummy_Pass'
618 'lumpy-release/R35-5712.0.0/dummy/dummy_Fail'
619 The above names will be converted to the following:
620 'experimental_Telemetry Smoothness Measurement'
621 'experimental_dummy_Pass'
622 'dummy_Fail'
623
624 3) A test view's status is of a suite job and its status is TEST_NA.
625 In this case, the view['test_name'] is the NAME field of the control
626 file. If it is an experimental test, 'experimental' will part of
627 the name. For instance,
628 'experimental_Telemetry Smoothness Measurement'
629 'experimental_dummy_Pass'
630 'dummy_Fail'
631 This method will not modify these names.
632
633 @returns: Test name after normalization.
634
635 """
636 if self.testname is not None:
637 return self.testname
638
639 if (self.is_suite_view and
640 self.view['test_name'].startswith('SERVER_JOB')):
641 # Rename suite job's SERVER_JOB to 'Suite prep'.
642 self.testname = self.SUITE_PREP
643 return self.testname
644
645 if (self.view['test_name'].startswith('SERVER_JOB') or
646 self.view['test_name'].startswith('CLIENT_JOB')):
647 # Append job name as a prefix for SERVER_JOB and CLIENT_JOB
648 testname= '%s_%s' % (self.view['job_name'], self.view['test_name'])
649 else:
650 testname = self.view['test_name']
651 experimental = self.is_experimental()
652 # Remove the build and suite name from testname if any.
653 testname = tools.get_test_name(
654 self.build, self.suite_name, testname)
655 # If an experimental test was aborted, testname
656 # would include the 'experimental' prefix already.
657 prefix = constants.EXPERIMENTAL_PREFIX if (
658 experimental and not
659 testname.startswith(constants.EXPERIMENTAL_PREFIX)) else ''
660 self.testname = prefix + testname
661 return self.testname
662
663
664 def is_relevant_suite_view(self):
665 """Checks whether this is a suite view we should care about.
666
667 @returns: True if it is relevant. False otherwise.
668 """
669 return (self.get_testname() == self.SUITE_PREP or
670 (self.is_suite_view and
671 not self.view['test_name'].startswith('CLIENT_JOB') and
672 not self.view['subdir']))
673
674
675 def is_test(self):
676 """Return whether the view is for an actual test.
677
678 @returns True if the view is for an actual test.
679 False if the view is for SERVER_JOB or CLIENT_JOB.
680
681 """
682 return not (self.view['test_name'].startswith('SERVER_JOB') or
683 self.view['test_name'].startswith('CLIENT_JOB'))
684
685
686 def is_retry(self):
687 """Check whether the view is for a retry.
688
689 @returns: True, if the view is for a retry; False otherwise.
690
691 """
692 return self.view['job_keyvals'].get('retry_original_job_id') is not None
693
694
695 def is_experimental(self):
696 """Check whether a test view is for an experimental test.
697
698 @returns: True if it is for an experimental test, False otherwise.
699
700 """
701 return (self.view['job_keyvals'].get('experimental') == 'True' or
702 tools.get_test_name(self.build, self.suite_name,
703 self.view['test_name']).startswith('experimental'))
704
705
706 def is_timeout_and_not_run(self):
707 """Check whether the view is for a test that timed out and did not run.
708
709 A child job that was aborted before it got a chance to
710 run will have a view assoicated to suite job.
711 This mostly happens when a job timed out due to
712 starving for hosts (unless aborted manually by user,
713 which we don't consider here).
714
715 @returns: True if the test represented by the view
716 timed out and did not run; False otherwise.
717
718 """
719 return (self.is_suite_view and self.view['status'] == 'ABORT')
720
721
722 def is_in_fail_status(self):
723 """
724 Check if the given test's status corresponds to a failure.
725
726 @returns: True if the test's status is FAIL or ERROR. False otherwise.
727
728 """
729 # All the statuses tests can have when they fail.
730 return self.view['status'] in ['FAIL', 'ERROR', 'ABORT']
731
732
733 def get_buildbot_link_reason(self):
734 """Generate the buildbot link reason for the test.
735
736 @returns: A string representing the reason.
737
738 """
739 return ('%s: %s' % (self.view['status'], self.view['reason'])
740 if self.view['reason'] else self.view['status'])
741
742
743 def get_job_id_owner_str(self):
744 """Generate the job_id_owner string for a test.
745
746 @returns: A string which looks like 135036-username
747
748 """
749 return '%s-%s' % (self.view['afe_job_id'], getpass.getuser())
750
751
752 def get_bug_info(self, suite_job_keyvals):
753 """Get the bug info from suite_job_keyvals.
754
755 If a bug has been filed for the test, its bug info (bug id and counts)
756 will be stored in the suite job's keyvals. This method attempts to
757 retrieve bug info of the test from |suite_job_keyvals|. It will return
758 None if no bug info is found. No need to check bug info if the view is
759 SUITE_PREP.
760
761 @param suite_job_keyvals: The job keyval dictionary of the suite job.
762 All the bug info about child jobs are stored in
763 suite job's keyvals.
764
765 @returns: None if there is no bug info, or a pair with the
766 id of the bug, and the count of the number of
767 times the bug has been seen.
768
769 """
770 if self.get_testname() == self.SUITE_PREP:
771 return None
772 if (self.view['test_name'].startswith('SERVER_JOB') or
773 self.view['test_name'].startswith('CLIENT_JOB')):
774 # Append job name as a prefix for SERVER_JOB and CLIENT_JOB
775 testname= '%s_%s' % (self.view['job_name'], self.view['test_name'])
776 else:
777 testname = self.view['test_name']
778
779 return tools.get_test_failure_bug_info(
780 suite_job_keyvals, self.view['afe_job_id'],
781 testname)
782
783
784 def should_display_buildbot_link(self):
785 """Check whether a buildbot link should show for this view.
786
787 Show links on the buildbot waterfall if
788 1) it is a normal test that are not in GOOD/TEST_NA status
789 and did not time out before running; or
790 2) this is a retry.
791
792 @returns: True if we should show the buildbot link.
793 False otherwise.
794 """
795 is_bad_status = (self.view['status'] != 'GOOD' and
796 self.view['status'] != 'TEST_NA')
797 is_timeout = self.is_timeout_and_not_run()
798 return (is_bad_status and not is_timeout) or self.is_retry()
799
800
Fang Dengdd20e452014-04-07 15:39:47 -0700801class ResultCollector(object):
802 """Collect test results of a suite.
803
804 Once a suite job has finished, use this class to collect test results.
805 `run` is the core method that is to be called first. Then the caller
806 could retrieve information like return code, return message, is_aborted,
807 and timings by accessing the collector's public attributes. And output
808 the test results and links by calling the 'output_*' methods.
809
810 Here is a overview of what `run` method does.
811
812 1) Collect the suite job's results from tko_test_view_2.
813 For the suite job, we only pull test views without a 'subdir'.
814 A NULL subdir indicates that the test was _not_ executed. This could be
815 that no child job was scheduled for this test or the child job got
816 aborted before starts running.
817 (Note 'SERVER_JOB'/'CLIENT_JOB' are handled specially)
818
819 2) Collect the child jobs' results from tko_test_view_2.
820 For child jobs, we pull all the test views associated with them.
821 (Note 'SERVER_JOB'/'CLIENT_JOB' are handled speically)
822
Fang Dengaeab6172014-05-07 17:17:04 -0700823 3) Generate web and buildbot links.
Fang Dengdd20e452014-04-07 15:39:47 -0700824 4) Compute timings of the suite run.
825 5) Compute the return code based on test results.
826
827 @var _instance_server: The hostname of the server that is used
828 to service the suite.
829 @var _afe: The afe rpc client.
830 @var _tko: The tko rpc client.
831 @var _build: The build for which the suite is run,
832 e.g. 'lumpy-release/R35-5712.0.0'
833 @var _suite_name: The suite name, e.g. 'bvt', 'dummy'.
834 @var _suite_job_id: The job id of the suite for which we are going to
835 collect results.
Fang Dengaeab6172014-05-07 17:17:04 -0700836 @var _suite_views: A list of TestView objects, representing relevant
837 test views of the suite job.
838 @var _child_views: A list of TestView objects, representing test views
839 of the child jobs.
840 @var _test_views: A list of TestView objects, representing all test views
841 from _suite_views and _child_views.
Fang Dengdd20e452014-04-07 15:39:47 -0700842 @var _web_links: A list of web links pointing to the results of jobs.
843 @var _buildbot_links: A list of buildbot links for non-passing tests.
Fang Dengaeab6172014-05-07 17:17:04 -0700844 @var _max_testname_width: Max width of all test names.
Fang Dengdd20e452014-04-07 15:39:47 -0700845 @var return_code: The exit code that should be returned by run_suite.
846 @var return_message: Any message that should be displayed to explain
847 the return code.
848 @var is_aborted: Whether the suite was aborted or not.
849 True, False or None (aborting status is unknown yet)
850 @var timings: A Timing object that records the suite's timings.
851
852 """
853
854
Fang Dengdd20e452014-04-07 15:39:47 -0700855 def __init__(self, instance_server, afe, tko, build,
856 suite_name, suite_job_id):
857 self._instance_server = instance_server
858 self._afe = afe
859 self._tko = tko
860 self._build = build
861 self._suite_name = suite_name
862 self._suite_job_id = suite_job_id
Fang Deng0454e632014-04-07 15:39:47 -0700863 self._suite_views = []
864 self._child_views = []
Fang Dengdd20e452014-04-07 15:39:47 -0700865 self._test_views = []
Fang Dengaeab6172014-05-07 17:17:04 -0700866 self._retry_counts = {}
Fang Dengdd20e452014-04-07 15:39:47 -0700867 self._web_links = []
868 self._buildbot_links = []
Fang Deng0454e632014-04-07 15:39:47 -0700869 self._max_testname_width = 0
Fang Dengdd20e452014-04-07 15:39:47 -0700870 self.return_code = None
Fang Deng0454e632014-04-07 15:39:47 -0700871 self.return_message = ''
Fang Dengdd20e452014-04-07 15:39:47 -0700872 self.is_aborted = None
873 self.timings = None
874
875
Fang Dengdd20e452014-04-07 15:39:47 -0700876 def _fetch_relevant_test_views_of_suite(self):
877 """Fetch relevant test views of the suite job.
878
879 For the suite job, there will be a test view for SERVER_JOB, and views
880 for results of its child jobs. For example, assume we've ceated
881 a suite job (afe_job_id: 40) that runs dummy_Pass, dummy_Fail,
882 dummy_Pass.bluetooth. Assume dummy_Pass was aborted before running while
883 dummy_Path.bluetooth got TEST_NA as no duts have bluetooth.
884 So the suite job's test views would look like
885 _____________________________________________________________________
886 test_idx| job_idx|test_name |subdir |afe_job_id|status
887 10 | 1000 |SERVER_JOB |---- |40 |GOOD
888 11 | 1000 |dummy_Pass |NULL |40 |ABORT
889 12 | 1000 |dummy_Fail.Fail |41-onwer/...|40 |FAIL
890 13 | 1000 |dummy_Fail.Error |42-owner/...|40 |ERROR
891 14 | 1000 |dummy_Pass.bluetooth|NULL |40 |TEST_NA
892
893 For a suite job, we only care about
894 a) The test view for the suite job's SERVER_JOB
895 b) The test views for real tests without a subdir. A NULL subdir
896 indicates that a test didn't get executed.
897 So, for the above example, we only keep test views whose test_idxs
898 are 10, 11, 14.
899
Fang Dengaeab6172014-05-07 17:17:04 -0700900 @returns: A list of TestView objects, representing relevant
901 test views of the suite job.
Fang Dengdd20e452014-04-07 15:39:47 -0700902
903 """
Fang Deng0454e632014-04-07 15:39:47 -0700904 views = self._tko.run(call='get_detailed_test_views',
905 afe_job_id=self._suite_job_id)
Fang Dengdd20e452014-04-07 15:39:47 -0700906 relevant_views = []
907 for v in views:
Fang Dengaeab6172014-05-07 17:17:04 -0700908 v = TestView(v, self._suite_job_id, self._suite_name, self._build)
909 if v.is_relevant_suite_view():
Fang Dengdd20e452014-04-07 15:39:47 -0700910 relevant_views.append(v)
Fang Dengaeab6172014-05-07 17:17:04 -0700911 if v.is_timeout_and_not_run():
912 v['reason'] = 'Timed out, did not run.'
Fang Dengdd20e452014-04-07 15:39:47 -0700913 return relevant_views
914
915
Fang Dengaeab6172014-05-07 17:17:04 -0700916 def _compute_retry_count(self, view):
917 """Return how many times the test has been retried.
918
919 @param view: A TestView instance.
920 @returns: An int value indicating the retry count.
921
922 """
923 old_job = view['job_keyvals'].get('retry_original_job_id')
924 count = 0
925 while old_job:
926 count += 1
927 views = self._tko.run(
928 call='get_detailed_test_views', afe_job_id=old_job)
929 old_job = (views[0]['job_keyvals'].get('retry_original_job_id')
930 if views else None)
931 return count
932
933
Fang Dengdd20e452014-04-07 15:39:47 -0700934 def _fetch_test_views_of_child_jobs(self):
935 """Fetch test views of child jobs.
936
Fang Dengaeab6172014-05-07 17:17:04 -0700937 @returns: A tuple (child_views, retry_counts)
938 child_views is list of TestView objects, representing
939 all valid views. retry_counts is a dictionary that maps
940 test_idx to retry counts. It only stores retry
941 counts that are greater than 0.
Fang Deng0454e632014-04-07 15:39:47 -0700942
Fang Dengdd20e452014-04-07 15:39:47 -0700943 """
944 child_job_ids = set(job.id for job in
945 self._afe.get_jobs(
946 parent_job_id=self._suite_job_id))
947 child_views = []
Fang Dengaeab6172014-05-07 17:17:04 -0700948 retry_counts = {}
Fang Dengdd20e452014-04-07 15:39:47 -0700949 for job_id in child_job_ids:
Fang Dengaeab6172014-05-07 17:17:04 -0700950 views = [TestView(v, self._suite_job_id,
951 self._suite_name, self._build)
952 for v in self._tko.run(
953 call='get_detailed_test_views', afe_job_id=job_id,
954 invalid=0)]
Fang Dengdd20e452014-04-07 15:39:47 -0700955 contains_test_failure = any(
Fang Dengaeab6172014-05-07 17:17:04 -0700956 v.is_test() and v['status'] != 'GOOD' for v in views)
Fang Dengdd20e452014-04-07 15:39:47 -0700957 for v in views:
Fang Dengaeab6172014-05-07 17:17:04 -0700958 if (v.is_test() or
959 v['status'] != 'GOOD' and not contains_test_failure):
960 # For normal test view, just keep it.
961 # For SERVER_JOB or CLIENT_JOB, only keep it
962 # if it fails and no other test failure.
Fang Dengdd20e452014-04-07 15:39:47 -0700963 child_views.append(v)
Fang Dengaeab6172014-05-07 17:17:04 -0700964 retry_count = self._compute_retry_count(v)
965 if retry_count > 0:
966 retry_counts[v['test_idx']] = retry_count
967 return child_views, retry_counts
Fang Dengdd20e452014-04-07 15:39:47 -0700968
969
970 def _generate_web_and_buildbot_links(self):
971 """Generate web links and buildbot links."""
972 # TODO(fdeng): If a job was aborted before it reaches Running
973 # state, we read the test view from the suite job
974 # and thus this method generates a link pointing to the
975 # suite job's page for the aborted job. Need a fix.
976 self._web_links = []
977 self._buildbot_links = []
978 # Bug info are stored in the suite job's keyvals.
979 suite_job_keyvals = self._suite_views[0]['job_keyvals']
980 for v in self._test_views:
Fang Dengaeab6172014-05-07 17:17:04 -0700981 retry_count = self._retry_counts.get(v['test_idx'], 0)
982 bug_info = v.get_bug_info(suite_job_keyvals)
983 job_id_owner = v.get_job_id_owner_str()
Fang Dengdd20e452014-04-07 15:39:47 -0700984 link = LogLink(
Fang Dengaeab6172014-05-07 17:17:04 -0700985 anchor=v.get_testname().ljust(
Fang Dengdd20e452014-04-07 15:39:47 -0700986 self._max_testname_width),
987 server=self._instance_server,
988 job_string=job_id_owner,
Fang Dengaeab6172014-05-07 17:17:04 -0700989 bug_info=bug_info, retry_count=retry_count)
Fang Dengdd20e452014-04-07 15:39:47 -0700990 self._web_links.append(link)
991
Fang Dengaeab6172014-05-07 17:17:04 -0700992 if v.should_display_buildbot_link():
993 link.reason = v.get_buildbot_link_reason()
Fang Dengdd20e452014-04-07 15:39:47 -0700994 self._buildbot_links.append(link)
995
996
997 def _record_timings(self):
998 """Record suite timings."""
999 self.timings = Timings(self._suite_job_id)
1000 for v in self._test_views:
1001 self.timings.RecordTiming(v)
1002
1003
Fang Dengaeab6172014-05-07 17:17:04 -07001004 def _get_return_msg(self, code, tests_passed_after_retry):
1005 """Return the proper message for a given return code.
1006
1007 @param code: An enum value of RETURN_CODES
1008 @param test_passed_after_retry: True/False, indicating
1009 whether there are test(s) that have passed after retry.
1010
1011 @returns: A string, representing the message.
1012
1013 """
1014 if code == RETURN_CODES.INFRA_FAILURE:
1015 return 'Suite job failed'
1016 elif code == RETURN_CODES.SUITE_TIMEOUT:
1017 return ('Some test(s) was aborted before running,'
1018 ' suite must have timed out.')
1019 elif code == RETURN_CODES.WARNING:
1020 if tests_passed_after_retry:
1021 return 'Some test(s) passed after retry.'
1022 else:
1023 return 'Some test(s) raised a warning.'
1024 elif code == RETURN_CODES.ERROR:
1025 return 'Some test(s) failed.'
1026 else:
1027 return ''
1028
1029
Fang Dengdd20e452014-04-07 15:39:47 -07001030 def _compute_return_code(self):
1031 """Compute the exit code based on test results."""
1032 code = RETURN_CODES.OK
Fang Deng5a43be62014-05-07 17:17:04 -07001033 message = ''
Fang Dengaeab6172014-05-07 17:17:04 -07001034 tests_passed_after_retry = False
1035
Fang Dengdd20e452014-04-07 15:39:47 -07001036 for v in self._test_views:
Fang Dengaeab6172014-05-07 17:17:04 -07001037 if v.is_experimental():
Fang Deng5a43be62014-05-07 17:17:04 -07001038 continue
Fang Dengaeab6172014-05-07 17:17:04 -07001039 if (v.get_testname() == TestView.SUITE_PREP
1040 and v.is_in_fail_status()):
Fang Deng5a43be62014-05-07 17:17:04 -07001041 # Suite job failed (with status FAIL/ERROR/ABORT).
1042 # If the suite job was aborted, it may or may not be
1043 # caused by timing out. Here we only categorize it as
Fang Dengaeab6172014-05-07 17:17:04 -07001044 # INFRA_FAILURE which is less severe than SUITE_TIMEOUT.
Fang Deng5a43be62014-05-07 17:17:04 -07001045 # Suite timing out will be diagnosed later using
1046 # other methods which will upgrade the severity accordingly.
Fang Deng5a43be62014-05-07 17:17:04 -07001047 code = get_worse_code(code, RETURN_CODES.INFRA_FAILURE)
Fang Dengaeab6172014-05-07 17:17:04 -07001048 elif v.is_timeout_and_not_run():
Fang Deng5a43be62014-05-07 17:17:04 -07001049 code = get_worse_code(code, RETURN_CODES.SUITE_TIMEOUT)
Fang Deng5a43be62014-05-07 17:17:04 -07001050 elif v['status'] == 'WARN':
1051 code = get_worse_code(code, RETURN_CODES.WARNING)
Fang Dengaeab6172014-05-07 17:17:04 -07001052 elif v.is_in_fail_status():
Fang Deng5a43be62014-05-07 17:17:04 -07001053 code = get_worse_code(code, RETURN_CODES.ERROR)
Fang Dengaeab6172014-05-07 17:17:04 -07001054 elif v.is_retry():
1055 code = get_worse_code(code, RETURN_CODES.WARNING)
1056 tests_passed_after_retry = True
Fang Deng5a43be62014-05-07 17:17:04 -07001057 else:
1058 code = get_worse_code(code, RETURN_CODES.OK)
Fang Dengdd20e452014-04-07 15:39:47 -07001059 self.return_code = code
Fang Dengaeab6172014-05-07 17:17:04 -07001060 self.return_message = self._get_return_msg(
1061 code, tests_passed_after_retry)
Fang Dengdd20e452014-04-07 15:39:47 -07001062
1063
1064 def output_results(self):
1065 """Output test results, timings and web links."""
1066 # Output test results
1067 for v in self._test_views:
Fang Dengaeab6172014-05-07 17:17:04 -07001068 display_name = v.get_testname().ljust(self._max_testname_width)
Fang Dengdd20e452014-04-07 15:39:47 -07001069 logging.info('%s%s', display_name,
1070 get_pretty_status(v['status']))
1071 if v['status'] != 'GOOD':
Fang Dengaeab6172014-05-07 17:17:04 -07001072 logging.info('%s %s: %s', display_name, v['status'],
Fang Dengdd20e452014-04-07 15:39:47 -07001073 v['reason'])
Fang Dengaeab6172014-05-07 17:17:04 -07001074 if v.is_retry():
1075 retry_count = self._retry_counts.get(v['test_idx'], 0)
1076 logging.info('%s retry_count: %s',
1077 display_name, retry_count)
Fang Dengdd20e452014-04-07 15:39:47 -07001078 # Output suite timings
1079 logging.info(self.timings)
1080 # Output links to test logs
1081 logging.info('\nLinks to test logs:')
1082 for link in self._web_links:
1083 logging.info(link.GenerateTextLink())
Fang Deng5a43be62014-05-07 17:17:04 -07001084 logging.info('\n')
Fang Dengdd20e452014-04-07 15:39:47 -07001085
1086
1087 def output_buildbot_links(self):
1088 """Output buildbot links."""
1089 for link in self._buildbot_links:
1090 logging.info(link.GenerateBuildbotLink())
1091
1092
1093 def run(self):
1094 """Collect test results.
1095
1096 This method goes through the following steps:
1097 Fetch relevent test views of the suite job.
1098 Fetch test views of child jobs
1099 Check whether the suite was aborted.
Fang Dengaeab6172014-05-07 17:17:04 -07001100 Generate links.
Fang Dengdd20e452014-04-07 15:39:47 -07001101 Calculate suite timings.
1102 Compute return code based on the test result.
1103
1104 """
1105 self._suite_views = self._fetch_relevant_test_views_of_suite()
Fang Dengaeab6172014-05-07 17:17:04 -07001106 self._child_views, self._retry_counts = (
1107 self._fetch_test_views_of_child_jobs())
Fang Dengdd20e452014-04-07 15:39:47 -07001108 self._test_views = self._suite_views + self._child_views
1109 # For hostless job in Starting status, there is no test view associated.
1110 # This can happen when a suite job in Starting status is aborted. When
1111 # the scheduler hits some limit, e.g., max_hostless_jobs_per_drone,
1112 # max_jobs_started_per_cycle, a suite job can stays in Starting status.
1113 if not self._test_views:
Fang Deng5a43be62014-05-07 17:17:04 -07001114 self.return_code = RETURN_CODES.INFRA_FAILURE
Fang Dengdd20e452014-04-07 15:39:47 -07001115 self.return_message = 'No test view was found.'
1116 return
1117 self.is_aborted = any([view['job_keyvals'].get('aborted_by')
1118 for view in self._suite_views])
Fang Dengaeab6172014-05-07 17:17:04 -07001119 self._max_testname_width = max(
1120 [len(v.get_testname()) for v in self._test_views]) + 3
Fang Dengdd20e452014-04-07 15:39:47 -07001121 self._generate_web_and_buildbot_links()
1122 self._record_timings()
1123 self._compute_return_code()
1124
1125
Chris Masone24b80f12012-02-14 14:18:01 -08001126def main():
Aviv Keshet1480c4a2013-03-21 16:38:31 -07001127 """
1128 Entry point for run_suite script.
1129 """
Chris Masone24b80f12012-02-14 14:18:01 -08001130 parser, options, args = parse_options()
Fang Deng0454e632014-04-07 15:39:47 -07001131 if not verify_options_and_args(options, args):
Fang Dengdd20e452014-04-07 15:39:47 -07001132 parser.print_help()
1133 return
1134
Chris Masone3a850642012-07-11 11:11:18 -07001135 log_name = 'run_suite-default.log'
Chris Masone986459e2012-04-11 11:36:48 -07001136 if not options.mock_job_id:
Chris Masone3a850642012-07-11 11:11:18 -07001137 # convert build name from containing / to containing only _
1138 log_name = 'run_suite-%s.log' % options.build.replace('/', '_')
1139 log_dir = os.path.join(common.autotest_dir, 'logs')
1140 if os.path.exists(log_dir):
1141 log_name = os.path.join(log_dir, log_name)
Alex Miller88762a82013-09-04 15:41:28 -07001142
Fang Dengdd20e452014-04-07 15:39:47 -07001143 setup_logging(logfile=log_name)
Alex Miller88762a82013-09-04 15:41:28 -07001144 try:
1145 priority = int(options.priority)
1146 except ValueError:
1147 try:
1148 priority = priorities.Priority.get_value(options.priority)
1149 except AttributeError:
1150 print 'Unknown priority level %s. Try one of %s.' % (
1151 options.priority, ', '.join(priorities.Priority.names))
1152
Simran Basi22aa9fe2012-12-07 16:37:09 -08001153 try:
Dan Shia02181f2013-01-29 14:03:32 -08001154 if not options.bypass_labstatus:
J. Richard Barnetteabbe0962013-12-10 18:15:44 -08001155 utils.check_lab_status(options.build)
1156 except utils.TestLabException as e:
Alex Millerc3262f02013-11-15 11:27:19 -08001157 logging.warning('Error Message: %s', e)
Fang Deng5a43be62014-05-07 17:17:04 -07001158 return RETURN_CODES.INFRA_FAILURE
Simran Basi22aa9fe2012-12-07 16:37:09 -08001159
Alex Millerc7a59522013-10-30 15:18:57 -07001160 instance_server = instance_for_pool(options.pool)
1161 afe = frontend_wrappers.RetryingAFE(server=instance_server,
Simran Basi25effe32013-11-26 13:02:11 -08001162 timeout_min=options.afe_timeout_mins,
Chris Masone8ac66712012-02-15 14:21:02 -08001163 delay_sec=options.delay_sec)
Alex Millerc7a59522013-10-30 15:18:57 -07001164 logging.info('Autotest instance: %s', instance_server)
Chris Masone359c0fd2012-03-13 15:18:59 -07001165
Dan Shi20952c12014-05-14 17:07:38 -07001166 rpc_helper = diagnosis_utils.RPCHelper(afe)
1167
1168 try:
Dan Shi94234cb2014-05-23 20:04:31 -07001169 rpc_helper.check_dut_availability(options.board, options.pool,
Dan Shi8de6d1b2014-06-12 09:10:37 -07001170 options.minimum_duts)
Dan Shi20952c12014-05-14 17:07:38 -07001171 except utils.TestLabException as e:
1172 logging.warning('Not enough DUTs to run this suite: %s', e)
1173 return RETURN_CODES.INFRA_FAILURE
1174
Dan Shi95122412013-11-12 16:20:33 -08001175 wait = options.no_wait == 'False'
1176 file_bugs = options.file_bugs == 'True'
Fang Deng058860c2014-05-15 15:41:50 -07001177 retry = options.retry == 'True'
Prashanth B923ca262014-03-14 12:36:29 -07001178 logging.info('%s Submitted create_suite_job rpc',
1179 diagnosis_utils.JobTimer.format_time(datetime.now()))
Chris Masone986459e2012-04-11 11:36:48 -07001180 if options.mock_job_id:
1181 job_id = int(options.mock_job_id)
1182 else:
Fang Deng5a43be62014-05-07 17:17:04 -07001183 try:
1184 job_id = afe.run('create_suite_job', name=options.name,
1185 board=options.board, build=options.build,
1186 check_hosts=wait, pool=options.pool,
1187 num=options.num,
1188 file_bugs=file_bugs, priority=priority,
1189 suite_args=options.suite_args,
1190 wait_for_results=wait,
Fang Deng058860c2014-05-15 15:41:50 -07001191 timeout_mins=options.timeout_mins,
1192 job_retry=retry)
Fang Deng5a43be62014-05-07 17:17:04 -07001193 except (error.CrosDynamicSuiteException,
1194 error.RPCException, proxy.JSONRPCException) as e:
1195 logging.warning('Error Message: %s', e)
1196 return RETURN_CODES.INFRA_FAILURE
1197
Prashanth B923ca262014-03-14 12:36:29 -07001198 job_timer = diagnosis_utils.JobTimer(
1199 time.time(), float(options.timeout_mins))
1200 logging.info('%s Created suite job: %s',
1201 job_timer.format_time(job_timer.job_created_time),
1202 reporting_utils.link_job(
1203 job_id, instance_server=instance_server))
1204
Alex Millerc7a59522013-10-30 15:18:57 -07001205 TKO = frontend_wrappers.RetryingTKO(server=instance_server,
Simran Basi25effe32013-11-26 13:02:11 -08001206 timeout_min=options.afe_timeout_mins,
Chris Masone8ac66712012-02-15 14:21:02 -08001207 delay_sec=options.delay_sec)
Aviv Keshet1480c4a2013-03-21 16:38:31 -07001208 code = RETURN_CODES.OK
Dan Shi20952c12014-05-14 17:07:38 -07001209
J. Richard Barnette712eb402013-08-13 18:03:00 -07001210 if wait:
1211 while not afe.get_jobs(id=job_id, finished=True):
Prashanth B923ca262014-03-14 12:36:29 -07001212 # Note that this call logs output, preventing buildbot's
1213 # 9000 second silent timeout from kicking in. Let there be no
1214 # doubt, this is a hack. The timeout is from upstream buildbot and
1215 # this is the easiest work around.
1216 if job_timer.first_past_halftime():
1217 rpc_helper.diagnose_job(job_id)
Prashanth Ba7be2072014-07-15 15:03:21 -07001218 if job_timer.debug_output_timer.poll():
1219 logging.info('The suite job has another %s till timeout.',
Prashanth B923ca262014-03-14 12:36:29 -07001220 job_timer.timeout_hours - job_timer.elapsed_time())
Alex Miller764227d2013-11-15 10:28:56 -08001221 time.sleep(10)
J. Richard Barnette712eb402013-08-13 18:03:00 -07001222
Fang Dengdd20e452014-04-07 15:39:47 -07001223 # Start collecting test results.
1224 collector = ResultCollector(instance_server=instance_server,
1225 afe=afe, tko=TKO, build=options.build,
1226 suite_name=options.name,
1227 suite_job_id=job_id)
1228 collector.run()
1229 # Output test results, timings, web links.
1230 collector.output_results()
Fang Dengdd20e452014-04-07 15:39:47 -07001231 code = collector.return_code
Fang Deng5a43be62014-05-07 17:17:04 -07001232 return_message = collector.return_message
1233 if not options.mock_job_id:
1234 # Send timings to statsd. Do not record stats if the suite was
1235 # aborted (either by a user or through the golo rpc).
1236 # Also do not record stats if is_aborted is None, indicating
1237 # aborting status is unknown yet.
1238 if collector.is_aborted == False:
1239 collector.timings.SendResultsToStatsd(
1240 options.name, options.build, options.board)
1241 # There is a minor race condition here where we might have aborted
1242 # for some reason other than a timeout, and the job_timer thinks
1243 # it's a timeout because of the jitter in waiting for results.
1244 # The consequence would be that run_suite exits with code
1245 # SUITE_TIMEOUT while it should have returned INFRA_FAILURE
1246 # instead, which should happen very rarely.
1247 is_suite_timeout = job_timer.is_suite_timeout()
1248 if collector.is_aborted == True and is_suite_timeout:
1249 # There are two possible cases when a suite times out.
1250 # 1. the suite job was aborted due to timing out
1251 # 2. the suite job succeeded, but some child jobs
1252 # were already aborted before the suite job exited.
1253 # The case 2 was handled by ResultCollector,
1254 # here we handle case 1.
1255 old_code = code
Fang Dengaeab6172014-05-07 17:17:04 -07001256 code = get_worse_code(
1257 code, RETURN_CODES.SUITE_TIMEOUT)
Fang Deng5a43be62014-05-07 17:17:04 -07001258 if old_code != code:
Fang Dengaeab6172014-05-07 17:17:04 -07001259 return_message = 'Suite job timed out.'
Fang Deng5a43be62014-05-07 17:17:04 -07001260 logging.info('Upgrade return code from %s to %s '
1261 'because suite job has timed out.',
1262 RETURN_CODES.get_string(old_code),
1263 RETURN_CODES.get_string(code))
Fang Deng5a43be62014-05-07 17:17:04 -07001264 if is_suite_timeout:
1265 logging.info('\nAttempting to diagnose pool: %s', options.pool)
1266 stats.Counter('run_suite_timeouts').increment()
1267 try:
1268 # Add some jitter to make up for any latency in
1269 # aborting the suite or checking for results.
1270 cutoff = (job_timer.timeout_hours +
1271 datetime_base.timedelta(hours=0.3))
1272 rpc_helper.diagnose_pool(
1273 options.board, options.pool, cutoff)
1274 except proxy.JSONRPCException as e:
1275 logging.warning('Unable to diagnose suite abort.')
1276
1277 # And output return message.
1278 code_str = RETURN_CODES.get_string(code)
1279 logging.info('Will return from run_suite with status: %s', code_str)
1280 if return_message:
1281 logging.info('Reason: %s', return_message)
Prashanth B923ca262014-03-14 12:36:29 -07001282
Fang Dengdd20e452014-04-07 15:39:47 -07001283 logging.info('\nOutput below this line is for buildbot consumption:')
1284 collector.output_buildbot_links()
Chris Masoned5939fe2012-03-13 10:11:06 -07001285 else:
Scott Zawalski94457b72012-07-02 18:45:07 -04001286 logging.info('Created suite job: %r', job_id)
Alex Millera05498f2013-11-01 16:16:21 -07001287 link = LogLink(options.name, instance_server,
1288 '%s-%s' % (job_id, getpass.getuser()))
Craig Harrison25eb0f32012-08-23 16:48:49 -07001289 logging.info(link.GenerateBuildbotLink())
Scott Zawalski94457b72012-07-02 18:45:07 -04001290 logging.info('--no_wait specified; Exiting.')
Chris Masone24b80f12012-02-14 14:18:01 -08001291 return code
1292
Fang Dengdd20e452014-04-07 15:39:47 -07001293
Chris Masone24b80f12012-02-14 14:18:01 -08001294if __name__ == "__main__":
1295 sys.exit(main())