blob: 871983c7f1d94c6dc81a2bc76537ae36922dcbc7 [file] [log] [blame]
Chris Masone24b80f12012-02-14 14:18:01 -08001#!/usr/bin/python
2#
3# Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
4# Use of this source code is governed by a BSD-style license that can be
5# found in the LICENSE file.
6
Fang Deng5a43be62014-05-07 17:17:04 -07007
Chris Masone24b80f12012-02-14 14:18:01 -08008"""Tool for running suites of tests and waiting for completion.
9
Fang Deng5a43be62014-05-07 17:17:04 -070010The desired test suite will be scheduled with autotest. By default,
Chris Masone24b80f12012-02-14 14:18:01 -080011this tool will block until the job is complete, printing a summary
12at the end. Error conditions result in exceptions.
13
14This is intended for use only with Chrome OS test suits that leverage the
15dynamic suite infrastructure in server/cros/dynamic_suite.py.
Fang Deng5a43be62014-05-07 17:17:04 -070016
17This script exits with one of the following codes:
180 - OK: Suite finished successfully
191 - ERROR: Test(s) failed, or hits its own timeout
Fang Dengaeab6172014-05-07 17:17:04 -0700202 - WARNING: Test(s) raised a warning or passed on retry, none failed/timed out.
Fang Deng5a43be62014-05-07 17:17:04 -0700213 - INFRA_FAILURE: Infrastructure related issues, e.g.
22 * Lab is down
23 * Too many duts (defined as a constant) in repair failed status
24 * Suite job issues, like bug in dynamic suite,
25 user aborted the suite, lose a drone/all devservers/rpc server,
26 0 tests ran, etc.
Fang Deng95af42f2014-09-12 14:16:11 -070027 * provision failed
28 TODO(fdeng): crbug.com/413918, reexamine treating all provision
29 failures as INFRA failures.
Fang Deng5a43be62014-05-07 17:17:04 -0700304 - SUITE_TIMEOUT: Suite timed out, some tests ran,
31 none failed by the time the suite job was aborted. This will cover,
32 but not limited to, the following cases:
33 * A devserver failure that manifests as a timeout
34 * No DUTs available midway through a suite
35 * Provision/Reset/Cleanup took longer time than expected for new image
36 * A regression in scheduler tick time.
Fang Deng6197da32014-09-25 10:18:48 -0700375- BOARD_NOT_AVAILABLE: If there is no host for the requested board/pool.
386- INVALID_OPTIONS: If options are not valid.
Chris Masone24b80f12012-02-14 14:18:01 -080039"""
40
Fang Deng5a43be62014-05-07 17:17:04 -070041
Prashanth B923ca262014-03-14 12:36:29 -070042import datetime as datetime_base
Fang Dengcbc01212014-11-25 16:09:46 -080043import getpass, logging, optparse, os, sys, time
Chris Masonecfa7efc2012-09-06 16:00:07 -070044from datetime import datetime
45
Chris Masone24b80f12012-02-14 14:18:01 -080046import common
Fang Deng5a43be62014-05-07 17:17:04 -070047from autotest_lib.client.common_lib import error
J. Richard Barnette3cbd76b2013-11-27 12:11:25 -080048from autotest_lib.client.common_lib import global_config, enum
49from autotest_lib.client.common_lib import priorities
Dan Shidfea3682014-08-10 23:38:40 -070050from autotest_lib.client.common_lib import time_utils
Gabe Black1e1c41b2015-02-04 23:55:15 -080051from autotest_lib.client.common_lib.cros.graphite import autotest_stats
Prashanth B6285f6a2014-05-08 18:01:27 -070052from autotest_lib.client.common_lib.cros import retry
Prashanth B923ca262014-03-14 12:36:29 -070053from autotest_lib.frontend.afe.json_rpc import proxy
J. Richard Barnette3cbd76b2013-11-27 12:11:25 -080054from autotest_lib.server import utils
Chris Masone44e4d6c2012-08-15 14:25:53 -070055from autotest_lib.server.cros.dynamic_suite import constants
Chris Masoneb4935552012-08-14 12:05:54 -070056from autotest_lib.server.cros.dynamic_suite import frontend_wrappers
Prashanth B923ca262014-03-14 12:36:29 -070057from autotest_lib.server.cros.dynamic_suite import reporting_utils
J. Richard Barnettee7b98bb2013-08-21 16:34:16 -070058from autotest_lib.server.cros.dynamic_suite import tools
Prashanth B923ca262014-03-14 12:36:29 -070059from autotest_lib.site_utils import diagnosis_utils
MK Ryu977a9752014-10-21 11:58:09 -070060from autotest_lib.site_utils import job_overhead
61
Chris Masone24b80f12012-02-14 14:18:01 -080062
Chris Masone1120cdf2012-02-27 17:35:07 -080063CONFIG = global_config.global_config
64
Simran Basi7203d4e2015-02-03 15:50:18 -080065WMATRIX_RETRY_URL = CONFIG.get_config_value('BUG_REPORTING',
66 'wmatrix_retry_url')
67
Simran Basi22aa9fe2012-12-07 16:37:09 -080068# Return code that will be sent back to autotest_rpc_server.py
Fang Deng5a43be62014-05-07 17:17:04 -070069RETURN_CODES = enum.Enum(
Fang Dengfb4a9492014-09-18 17:52:06 -070070 'OK', 'ERROR', 'WARNING', 'INFRA_FAILURE', 'SUITE_TIMEOUT',
Fang Deng6197da32014-09-25 10:18:48 -070071 'BOARD_NOT_AVAILABLE', 'INVALID_OPTIONS')
Fang Deng5a43be62014-05-07 17:17:04 -070072# The severity of return code. If multiple codes
73# apply, the script should always return the severest one.
74# E.g. if we have a test failure and the suite also timed out,
75# we should return 'ERROR'.
76SEVERITY = {RETURN_CODES.OK: 0,
77 RETURN_CODES.WARNING: 1,
Fang Deng95af42f2014-09-12 14:16:11 -070078 RETURN_CODES.SUITE_TIMEOUT: 2,
79 RETURN_CODES.INFRA_FAILURE: 3,
Fang Deng6197da32014-09-25 10:18:48 -070080 RETURN_CODES.ERROR: 4}
Fang Deng5a43be62014-05-07 17:17:04 -070081
82
83def get_worse_code(code1, code2):
Fang Dengaeab6172014-05-07 17:17:04 -070084 """Compare the severity of two codes and return the worse code.
Fang Deng5a43be62014-05-07 17:17:04 -070085
86 @param code1: An enum value of RETURN_CODES
87 @param code2: An enum value of RETURN_CODES
88
Fang Dengaeab6172014-05-07 17:17:04 -070089 @returns: the more severe one between code1 and code2.
Fang Deng5a43be62014-05-07 17:17:04 -070090
91 """
Fang Dengaeab6172014-05-07 17:17:04 -070092 return code1 if SEVERITY[code1] >= SEVERITY[code2] else code2
Simran Basi22aa9fe2012-12-07 16:37:09 -080093
Chris Masonedfa0beba2012-03-19 11:41:47 -070094
Chris Masone24b80f12012-02-14 14:18:01 -080095def parse_options():
Aviv Keshet1480c4a2013-03-21 16:38:31 -070096 #pylint: disable-msg=C0111
Zdenek Behan77290c32012-06-26 17:39:47 +020097 usage = "usage: %prog [options]"
Chris Masone24b80f12012-02-14 14:18:01 -080098 parser = optparse.OptionParser(usage=usage)
99 parser.add_option("-b", "--board", dest="board")
100 parser.add_option("-i", "--build", dest="build")
Prashanth Balasubramanian673016d2014-11-04 10:40:48 -0800101 parser.add_option("-w", "--web", dest="web", default=None,
102 help="Address of a webserver to receive suite requests.")
Chris Masone359c0fd2012-03-13 15:18:59 -0700103 # This should just be a boolean flag, but the autotest "proxy" code
104 # can't handle flags that don't take arguments.
Alex Millerab33ddb2012-10-03 12:56:02 -0700105 parser.add_option("-n", "--no_wait", dest="no_wait", default="False",
106 help='Must pass "True" or "False" if used.')
Alex Miller0032e932013-10-23 12:52:58 -0700107 # If you really want no pool, --pool="" will do it. USE WITH CARE.
108 parser.add_option("-p", "--pool", dest="pool", default="suites")
Chris Masone24b80f12012-02-14 14:18:01 -0800109 parser.add_option("-s", "--suite_name", dest="name")
Fang Dengfb4a9492014-09-18 17:52:06 -0700110 parser.add_option("-a", "--afe_timeout_mins", type="int",
111 dest="afe_timeout_mins", default=30)
112 parser.add_option("-t", "--timeout_mins", type="int",
113 dest="timeout_mins", default=1440)
Simran Basi441fbc12015-01-23 12:28:54 -0800114 parser.add_option("-x", "--max_runtime_mins", type="int",
115 dest="max_runtime_mins", default=1440)
Fang Dengfb4a9492014-09-18 17:52:06 -0700116 parser.add_option("-d", "--delay_sec", type="int",
117 dest="delay_sec", default=10)
Chris Masone986459e2012-04-11 11:36:48 -0700118 parser.add_option("-m", "--mock_job_id", dest="mock_job_id",
Aviv Keshetdb321de2015-04-10 19:09:58 -0700119 help="Attach to existing job id for already running "
120 "suite, and creates report.")
121 # NOTE(akeshet): This looks similar to --no_wait, but behaves differently.
122 # --no_wait is passed in to the suite rpc itself and affects the suite,
123 # while this does not.
124 parser.add_option("-c", "--create_and_return", dest="create_and_return",
125 action="store_true",
126 help="Create the suite and print the job id, then "
127 "finish immediately.")
Alex Miller05a2fff2012-09-10 10:14:34 -0700128 parser.add_option("-u", "--num", dest="num", type="int", default=None,
Chris Masone8906ab12012-07-23 15:37:56 -0700129 help="Run on at most NUM machines.")
Alex Millerf43d0eb2012-10-01 13:43:13 -0700130 # Same boolean flag issue applies here.
Alex Millerab33ddb2012-10-03 12:56:02 -0700131 parser.add_option("-f", "--file_bugs", dest="file_bugs", default='False',
132 help='File bugs on test failures. Must pass "True" or '
133 '"False" if used.')
Dan Shia02181f2013-01-29 14:03:32 -0800134 parser.add_option("-l", "--bypass_labstatus", dest="bypass_labstatus",
135 action="store_true", help='Bypass lab status check.')
Alex Miller88762a82013-09-04 15:41:28 -0700136 # We allow either a number or a string for the priority. This way, if you
137 # know what you're doing, one can specify a custom priority level between
138 # other levels.
139 parser.add_option("-r", "--priority", dest="priority",
140 default=priorities.Priority.DEFAULT,
141 action="store", help="Priority of suite")
Fang Deng058860c2014-05-15 15:41:50 -0700142 parser.add_option('--retry', dest='retry', default='False',
143 action='store', help='Enable test retry. '
144 'Must pass "True" or "False" if used.')
Fang Deng443f1952015-01-02 14:51:49 -0800145 parser.add_option('--max_retries', dest='max_retries', default=None,
146 type='int', action='store', help='Maximum retries'
147 'allowed at suite level. No limit if not specified.')
Dan Shi8de6d1b2014-06-12 09:10:37 -0700148 parser.add_option('--minimum_duts', dest='minimum_duts', type=int,
149 default=0, action='store',
Fang Dengcbc01212014-11-25 16:09:46 -0800150 help='Check that the pool has at least such many '
151 'healthy machines, otherwise suite will not run. '
152 'Default to 0.')
153 parser.add_option('--suite_min_duts', dest='suite_min_duts', type=int,
154 default=0, action='store',
155 help='Preferred minimum number of machines. Scheduler '
156 'will prioritize on getting such many machines for '
157 'the suite when it is competing with another suite '
158 'that has a higher priority but already got minimum '
159 'machines it needs. Default to 0.')
Aviv Keshet7cd12312013-07-25 10:25:55 -0700160 parser.add_option("--suite_args", dest="suite_args",
161 default=None, action="store",
162 help="Argument string for suite control file.")
Simran Basi1e10e922015-04-16 15:09:56 -0700163 parser.add_option('--offload_failures_only', dest='offload_failures_only',
164 action='store', default='False',
165 help='Only enable gs_offloading for failed tests. '
166 'Successful tests will be deleted. Must pass "True"'
167 ' or "False" if used.')
Shuqian Zhaoab1bedc2015-06-02 11:12:28 -0700168 parser.add_option('--use_suite_attr', dest='use_suite_attr',
169 action='store_true', default=False,
170 help='Advanced. Run the suite based on ATTRIBUTES of '
171 'control files, rather than SUITE.')
Chris Masone24b80f12012-02-14 14:18:01 -0800172 options, args = parser.parse_args()
173 return parser, options, args
174
175
Fang Deng0454e632014-04-07 15:39:47 -0700176def verify_options_and_args(options, args):
Fang Dengdd20e452014-04-07 15:39:47 -0700177 """Verify the validity of options and args.
178
Fang Dengdd20e452014-04-07 15:39:47 -0700179 @param options: The parsed options to verify.
180 @param args: The parsed args to verify.
181
182 @returns: True if verification passes, False otherwise.
183
184 """
Fang Deng6865aab2015-02-20 14:49:47 -0800185 if args:
186 print 'Unknown arguments: ' + str(args)
187 return False
188
189 if options.mock_job_id and (
190 not options.build or not options.name or not options.board):
191 print ('When using -m, need to specify build, board and suite '
192 'name which you have used for creating the original job')
193 return False
194 else:
Fang Dengdd20e452014-04-07 15:39:47 -0700195 if not options.build:
196 print 'Need to specify which build to use'
197 return False
198 if not options.board:
199 print 'Need to specify board'
200 return False
201 if not options.name:
202 print 'Need to specify suite name'
203 return False
204 if options.num is not None and options.num < 1:
205 print 'Number of machines must be more than 0, if specified.'
206 return False
207 if options.no_wait != 'True' and options.no_wait != 'False':
208 print 'Please specify "True" or "False" for --no_wait.'
209 return False
210 if options.file_bugs != 'True' and options.file_bugs != 'False':
211 print 'Please specify "True" or "False" for --file_bugs.'
212 return False
Fang Deng058860c2014-05-15 15:41:50 -0700213 if options.retry != 'True' and options.retry != 'False':
214 print 'Please specify "True" or "False" for --retry'
215 return False
Fang Deng443f1952015-01-02 14:51:49 -0800216 if options.retry == 'False' and options.max_retries is not None:
217 print 'max_retries can only be used with --retry=True'
218 return False
Shuqian Zhaoab1bedc2015-06-02 11:12:28 -0700219 if options.use_suite_attr and options.suite_args is not None:
220 print ('The new suite control file cannot parse the suite_args: %s.'
221 'Please not specify any suite_args here.' % options.suite_args)
222 return False
Fang Deng058860c2014-05-15 15:41:50 -0700223 if options.no_wait == 'True' and options.retry == 'True':
224 print 'Test retry is not available when using --no_wait=True'
Fang Dengdd20e452014-04-07 15:39:47 -0700225 return True
226
227
Shuqian Zhaoab1bedc2015-06-02 11:12:28 -0700228def change_options_for_suite_attr(options):
229 """Change options to be prepared to run the suite_attr_wrapper.
230
231 If specify 'use_suite_attr' from the cmd line, it indicates to run the
232 new style suite control file, suite_attr_wrapper. Then, change the
233 options.suite_name to 'suite_attr_wrapper', change the options.suite_args to
234 include the arguments needed by suite_attr_wrapper.
235
236 @param options: The verified options.
237
238 @returns: The changed options.
239
240 """
241 # Convert the suite_name to attribute boolean expression.
242 if type(options.name) is str:
243 attr_filter_val = 'suite:%s' % options.name
244 else:
245 attr_filter_val = ' or '.join(['suite:%s' % x for x in options.name])
246
247 # change the suite_args to be a dict of arguments for suite_attr_wrapper
248 # if suite_args is not None, store the values in 'other_args' of the dict
249 args_dict = {}
250 args_dict['attr_filter'] = attr_filter_val
251 options.suite_args = str(args_dict)
252 options.name = 'suite_attr_wrapper'
253
254 return options
255
256
Chris Masone24b80f12012-02-14 14:18:01 -0800257def get_pretty_status(status):
Aviv Keshet1480c4a2013-03-21 16:38:31 -0700258 """
259 Converts a status string into a pretty-for-printing string.
260
261 @param status: Status to convert.
262
263 @return: Returns pretty string.
264 GOOD -> [ PASSED ]
265 TEST_NA -> [ INFO ]
266 other -> [ FAILED ]
267 """
Chris Masone24b80f12012-02-14 14:18:01 -0800268 if status == 'GOOD':
269 return '[ PASSED ]'
Chris Masone8906ab12012-07-23 15:37:56 -0700270 elif status == 'TEST_NA':
271 return '[ INFO ]'
Chris Masone24b80f12012-02-14 14:18:01 -0800272 return '[ FAILED ]'
273
Fang Dengdd20e452014-04-07 15:39:47 -0700274
Aviv Keshet9afee5e2014-10-09 16:33:09 -0700275def GetBuildbotStepLink(anchor_text, url):
276 """Generate a buildbot formatted link.
277
278 @param anchor_text The link text.
279 @param url The url to link to.
280 """
281 return '@@@STEP_LINK@%s@%s@@@' % (anchor_text, url)
282
Chris Masone24b80f12012-02-14 14:18:01 -0800283
Craig Harrison25eb0f32012-08-23 16:48:49 -0700284class LogLink(object):
J. Richard Barnetteb9c911d2013-08-23 11:24:21 -0700285 """Information needed to record a link in the logs.
Craig Harrison25eb0f32012-08-23 16:48:49 -0700286
J. Richard Barnetteb9c911d2013-08-23 11:24:21 -0700287 Depending on context and the information provided at
288 construction time, the link may point to either to log files for
289 a job, or to a bug filed for a failure in the job.
Craig Harrison25eb0f32012-08-23 16:48:49 -0700290
J. Richard Barnetteb9c911d2013-08-23 11:24:21 -0700291 @var anchor The link text.
292 @var url The link url.
293 @var bug_id Id of a bug to link to, or None.
294 """
295
296 _BUG_URL_PREFIX = CONFIG.get_config_value('BUG_REPORTING',
297 'tracker_url')
J. Richard Barnetteb9c911d2013-08-23 11:24:21 -0700298 _URL_PATTERN = CONFIG.get_config_value('CROS',
299 'log_url_pattern', type=str)
300
301
Prashanth Balasubramanianae437212014-10-27 11:17:26 -0700302 @classmethod
303 def get_bug_link(cls, bug_id):
304 """Generate a bug link for the given bug_id.
305
306 @param bug_id: The id of the bug.
307 @return: A link, eg: https://crbug.com/<bug_id>.
308 """
309 return '%s%s' % (cls._BUG_URL_PREFIX, bug_id)
310
311
Fang Dengaeab6172014-05-07 17:17:04 -0700312 def __init__(self, anchor, server, job_string, bug_info=None, reason=None,
Simran Basi7203d4e2015-02-03 15:50:18 -0800313 retry_count=0, testname=None):
J. Richard Barnetteb9c911d2013-08-23 11:24:21 -0700314 """Initialize the LogLink by generating the log URL.
315
316 @param anchor The link text.
Alex Millerc7a59522013-10-30 15:18:57 -0700317 @param server The hostname of the server this suite ran on.
J. Richard Barnetteb9c911d2013-08-23 11:24:21 -0700318 @param job_string The job whose logs we'd like to link to.
319 @param bug_info Info about the bug, if one was filed.
Fang Deng53c6ff52014-02-24 17:51:24 -0800320 @param reason A string representing the reason of failure if any.
Fang Dengaeab6172014-05-07 17:17:04 -0700321 @param retry_count How many times the test has been retried.
Simran Basi7203d4e2015-02-03 15:50:18 -0800322 @param testname Optional Arg that supplies the testname.
Craig Harrison25eb0f32012-08-23 16:48:49 -0700323 """
324 self.anchor = anchor
Alex Millerc7a59522013-10-30 15:18:57 -0700325 self.url = self._URL_PATTERN % (server, job_string)
Fang Deng53c6ff52014-02-24 17:51:24 -0800326 self.reason = reason
Fang Dengaeab6172014-05-07 17:17:04 -0700327 self.retry_count = retry_count
Simran Basi7203d4e2015-02-03 15:50:18 -0800328 self.testname = testname
J. Richard Barnetteb9c911d2013-08-23 11:24:21 -0700329 if bug_info:
330 self.bug_id, self.bug_count = bug_info
331 else:
332 self.bug_id = None
333 self.bug_count = None
Craig Harrison25eb0f32012-08-23 16:48:49 -0700334
335
336 def GenerateBuildbotLink(self):
J. Richard Barnetteb9c911d2013-08-23 11:24:21 -0700337 """Generate a link formatted to meet buildbot expectations.
338
339 If there is a bug associated with this link, report that;
340 otherwise report a link to the job logs.
Craig Harrison25eb0f32012-08-23 16:48:49 -0700341
342 @return A link formatted for the buildbot log annotator.
343 """
Fang Dengaeab6172014-05-07 17:17:04 -0700344 info_strings = []
345 if self.retry_count > 0:
346 info_strings.append('retry_count: %d' % self.retry_count)
347
J. Richard Barnetteb9c911d2013-08-23 11:24:21 -0700348 if self.bug_id:
Prashanth Balasubramanianae437212014-10-27 11:17:26 -0700349 url = self.get_bug_link(self.bug_id)
beepsad4daf82013-09-26 10:07:33 -0700350 if self.bug_count is None:
Fang Dengaeab6172014-05-07 17:17:04 -0700351 bug_info = 'unknown number of reports'
beepsad4daf82013-09-26 10:07:33 -0700352 elif self.bug_count == 1:
Fang Dengaeab6172014-05-07 17:17:04 -0700353 bug_info = 'new report'
J. Richard Barnetteb9c911d2013-08-23 11:24:21 -0700354 else:
Fang Dengaeab6172014-05-07 17:17:04 -0700355 bug_info = '%s reports' % self.bug_count
356 info_strings.append(bug_info)
J. Richard Barnetteb9c911d2013-08-23 11:24:21 -0700357 else:
358 url = self.url
Fang Deng53c6ff52014-02-24 17:51:24 -0800359
360 if self.reason:
Fang Dengaeab6172014-05-07 17:17:04 -0700361 info_strings.append(self.reason.strip())
362
363 if info_strings:
364 info = ', '.join(info_strings)
365 anchor_text = '%(anchor)s: %(info)s' % {
366 'anchor': self.anchor.strip(), 'info': info}
367 else:
368 anchor_text = self.anchor.strip()
Fang Deng53c6ff52014-02-24 17:51:24 -0800369
Aviv Keshet9afee5e2014-10-09 16:33:09 -0700370 return GetBuildbotStepLink(anchor_text, url)
Craig Harrison25eb0f32012-08-23 16:48:49 -0700371
372
Craig Harrisond8451572012-08-31 10:29:33 -0700373 def GenerateTextLink(self):
J. Richard Barnetteb9c911d2013-08-23 11:24:21 -0700374 """Generate a link to the job's logs, for consumption by a human.
Craig Harrison25eb0f32012-08-23 16:48:49 -0700375
Craig Harrisond8451572012-08-31 10:29:33 -0700376 @return A link formatted for human readability.
Craig Harrison25eb0f32012-08-23 16:48:49 -0700377 """
Fang Deng53c6ff52014-02-24 17:51:24 -0800378 return '%s%s' % (self.anchor, self.url)
Craig Harrison25eb0f32012-08-23 16:48:49 -0700379
380
Simran Basi7203d4e2015-02-03 15:50:18 -0800381 def GenerateWmatrixRetryLink(self):
382 """Generate a link to the wmatrix retry dashboard.
383
384 @return A link formatted for the buildbot log annotator.
385 """
386 if not self.testname:
387 return None
388
389 return GetBuildbotStepLink(
390 'Flaky test dashboard view for test %s' %
391 self.testname, WMATRIX_RETRY_URL % self.testname)
392
393
Chris Masoneb61b4052012-04-30 14:35:28 -0700394class Timings(object):
395 """Timings for important events during a suite.
396
397 All timestamps are datetime.datetime objects.
398
Fang Dengdd20e452014-04-07 15:39:47 -0700399 @var suite_job_id: the afe job id of the suite job for which
400 we are recording the timing for.
401 @var download_start_time: the time the devserver starts staging
402 the build artifacts. Recorded in create_suite_job.
403 @var payload_end_time: the time when the artifacts only necessary to start
404 installsing images onto DUT's are staged.
405 Recorded in create_suite_job.
406 @var artifact_end_time: the remaining artifacts are downloaded after we kick
407 off the reimaging job, at which point we record
408 artifact_end_time. Recorded in dynamic_suite.py.
Chris Masoneb61b4052012-04-30 14:35:28 -0700409 @var suite_start_time: the time the suite started.
Chris Masoneb61b4052012-04-30 14:35:28 -0700410 @var tests_start_time: the time the first test started running.
Fang Dengdd20e452014-04-07 15:39:47 -0700411 @var tests_end_time: the time the last test finished running.
Chris Masoneb61b4052012-04-30 14:35:28 -0700412 """
beeps6f02d192013-03-22 13:15:49 -0700413
Fang Dengdd20e452014-04-07 15:39:47 -0700414 def __init__(self, suite_job_id):
415 self.suite_job_id = suite_job_id
416 # Timings related to staging artifacts on devserver.
417 self.download_start_time = None
418 self.payload_end_time = None
419 self.artifact_end_time = None
beeps6f02d192013-03-22 13:15:49 -0700420
Fang Dengdd20e452014-04-07 15:39:47 -0700421 # The test_start_time, but taken off the view that corresponds to the
422 # suite instead of an individual test.
423 self.suite_start_time = None
beeps6f02d192013-03-22 13:15:49 -0700424
Fang Dengdd20e452014-04-07 15:39:47 -0700425 # Earliest and Latest tests in the set of TestViews passed to us.
426 self.tests_start_time = None
427 self.tests_end_time = None
428
Chris Masoneb61b4052012-04-30 14:35:28 -0700429
Chris Masoned9f13c52012-08-29 10:37:08 -0700430 def RecordTiming(self, view):
431 """Given a test report view, extract and record pertinent time info.
Chris Masoneb61b4052012-04-30 14:35:28 -0700432
433 get_detailed_test_views() returns a list of entries that provide
434 info about the various parts of a suite run. This method can take
435 any one of these entries and look up timestamp info we might want
436 and record it.
437
Chris Masonecfa7efc2012-09-06 16:00:07 -0700438 If timestamps are unavailable, datetime.datetime.min/max will be used.
439
Fang Dengaeab6172014-05-07 17:17:04 -0700440 @param view: A TestView object.
Chris Masoneb61b4052012-04-30 14:35:28 -0700441 """
Chris Masonecfa7efc2012-09-06 16:00:07 -0700442 start_candidate = datetime.min
443 end_candidate = datetime.max
444 if view['test_started_time']:
Dan Shidfea3682014-08-10 23:38:40 -0700445 start_candidate = time_utils.time_string_to_datetime(
446 view['test_started_time'])
Chris Masonecfa7efc2012-09-06 16:00:07 -0700447 if view['test_finished_time']:
Dan Shidfea3682014-08-10 23:38:40 -0700448 end_candidate = time_utils.time_string_to_datetime(
449 view['test_finished_time'])
Chris Masonecfa7efc2012-09-06 16:00:07 -0700450
Fang Dengaeab6172014-05-07 17:17:04 -0700451 if view.get_testname() == TestView.SUITE_PREP:
Chris Masoneb61b4052012-04-30 14:35:28 -0700452 self.suite_start_time = start_candidate
Chris Masoneb61b4052012-04-30 14:35:28 -0700453 else:
454 self._UpdateFirstTestStartTime(start_candidate)
455 self._UpdateLastTestEndTime(end_candidate)
Fang Dengdd20e452014-04-07 15:39:47 -0700456 if view['afe_job_id'] == self.suite_job_id and 'job_keyvals' in view:
Chris Masoned9f13c52012-08-29 10:37:08 -0700457 keyvals = view['job_keyvals']
Dan Shidfea3682014-08-10 23:38:40 -0700458 self.download_start_time = time_utils.time_string_to_datetime(
459 keyvals.get(constants.DOWNLOAD_STARTED_TIME),
460 handle_type_error=True)
beeps6f02d192013-03-22 13:15:49 -0700461
Dan Shidfea3682014-08-10 23:38:40 -0700462 self.payload_end_time = time_utils.time_string_to_datetime(
463 keyvals.get(constants.PAYLOAD_FINISHED_TIME),
464 handle_type_error=True)
beeps6f02d192013-03-22 13:15:49 -0700465
Dan Shidfea3682014-08-10 23:38:40 -0700466 self.artifact_end_time = time_utils.time_string_to_datetime(
467 keyvals.get(constants.ARTIFACT_FINISHED_TIME),
468 handle_type_error=True)
Chris Masone44e4d6c2012-08-15 14:25:53 -0700469
Chris Masoneb61b4052012-04-30 14:35:28 -0700470
471 def _UpdateFirstTestStartTime(self, candidate):
472 """Update self.tests_start_time, iff candidate is an earlier time.
473
474 @param candidate: a datetime.datetime object.
475 """
476 if not self.tests_start_time or candidate < self.tests_start_time:
477 self.tests_start_time = candidate
478
479
480 def _UpdateLastTestEndTime(self, candidate):
481 """Update self.tests_end_time, iff candidate is a later time.
482
483 @param candidate: a datetime.datetime object.
484 """
485 if not self.tests_end_time or candidate > self.tests_end_time:
486 self.tests_end_time = candidate
487
488
489 def __str__(self):
490 return ('\n'
491 'Suite timings:\n'
Chris Masonea8066a92012-05-01 16:52:31 -0700492 'Downloads started at %s\n'
493 'Payload downloads ended at %s\n'
Chris Masoneb61b4052012-04-30 14:35:28 -0700494 'Suite started at %s\n'
Chris Masonea8066a92012-05-01 16:52:31 -0700495 'Artifact downloads ended (at latest) at %s\n'
Chris Masoneb61b4052012-04-30 14:35:28 -0700496 'Testing started at %s\n'
Chris Masonea8066a92012-05-01 16:52:31 -0700497 'Testing ended at %s\n' % (self.download_start_time,
498 self.payload_end_time,
499 self.suite_start_time,
Chris Masonea8066a92012-05-01 16:52:31 -0700500 self.artifact_end_time,
Chris Masoneb61b4052012-04-30 14:35:28 -0700501 self.tests_start_time,
502 self.tests_end_time))
503
504
beeps6f02d192013-03-22 13:15:49 -0700505 def SendResultsToStatsd(self, suite, build, board):
506 """
507 Sends data to statsd.
508
509 1. Makes a data_key of the form: run_suite.$board.$branch.$suite
510 eg: stats/gauges/<hostname>/run_suite/<board>/<branch>/<suite>/
511 2. Computes timings for several start and end event pairs.
Alex Miller9a1987a2013-08-21 15:51:16 -0700512 3. Sends all timing values to statsd.
beeps6f02d192013-03-22 13:15:49 -0700513
514 @param suite: scheduled suite that we want to record the results of.
515 @param build: the build that this suite ran on.
516 eg: 'lumpy-release/R26-3570.0.0'
517 @param board: the board that this suite ran on.
518 """
519 if sys.version_info < (2, 7):
520 logging.error('Sending run_suite perf data to statsd requires'
521 'python 2.7 or greater.')
522 return
523
MK Ryuc9c0c3f2014-10-27 14:36:01 -0700524 # Constructs the key used for logging statsd timing data.
525 data_key = utils.get_data_key('run_suite', suite, build, board)
beeps6f02d192013-03-22 13:15:49 -0700526
527 # Since we don't want to try subtracting corrupted datetime values
Dan Shidfea3682014-08-10 23:38:40 -0700528 # we catch TypeErrors in time_utils.time_string_to_datetime and insert
529 # None instead. This means that even if, say,
530 # keyvals.get(constants.ARTIFACT_FINISHED_TIME) returns a corrupt
531 # value the member artifact_end_time is set to None.
beeps6f02d192013-03-22 13:15:49 -0700532 if self.download_start_time:
533 if self.payload_end_time:
Gabe Black1e1c41b2015-02-04 23:55:15 -0800534 autotest_stats.Timer(data_key).send('payload_download_time',
535 (self.payload_end_time -
536 self.download_start_time).total_seconds())
beeps6f02d192013-03-22 13:15:49 -0700537
538 if self.artifact_end_time:
Gabe Black1e1c41b2015-02-04 23:55:15 -0800539 autotest_stats.Timer(data_key).send('artifact_download_time',
540 (self.artifact_end_time -
541 self.download_start_time).total_seconds())
beeps6f02d192013-03-22 13:15:49 -0700542
543 if self.tests_end_time:
544 if self.suite_start_time:
Gabe Black1e1c41b2015-02-04 23:55:15 -0800545 autotest_stats.Timer(data_key).send('suite_run_time',
546 (self.tests_end_time -
547 self.suite_start_time).total_seconds())
beeps6f02d192013-03-22 13:15:49 -0700548
549 if self.tests_start_time:
Gabe Black1e1c41b2015-02-04 23:55:15 -0800550 autotest_stats.Timer(data_key).send('tests_run_time',
551 (self.tests_end_time -
552 self.tests_start_time).total_seconds())
beeps6f02d192013-03-22 13:15:49 -0700553
beeps6f02d192013-03-22 13:15:49 -0700554
Alex Millerc7a59522013-10-30 15:18:57 -0700555_DEFAULT_AUTOTEST_INSTANCE = CONFIG.get_config_value(
556 'SERVER', 'hostname', type=str)
557
558
559def instance_for_pool(pool_name):
560 """
561 Return the hostname of the server that should be used to service a suite
562 for the specified pool.
563
564 @param pool_name: The pool (without 'pool:' to schedule the suite against.
565 @return: The correct host that should be used to service this suite run.
566 """
567 return CONFIG.get_config_value(
568 'POOL_INSTANCE_SHARDING', pool_name,
569 default=_DEFAULT_AUTOTEST_INSTANCE)
570
571
Fang Dengaeab6172014-05-07 17:17:04 -0700572class TestView(object):
573 """Represents a test view and provides a set of helper functions."""
574
575
576 SUITE_PREP = 'Suite prep'
Fang Deng95af42f2014-09-12 14:16:11 -0700577 INFRA_TESTS = ['provision']
Fang Dengaeab6172014-05-07 17:17:04 -0700578
579
Fang Dengf8503532014-06-12 18:21:55 -0700580 def __init__(self, view, afe_job, suite_name, build):
Fang Dengaeab6172014-05-07 17:17:04 -0700581 """Init a TestView object representing a tko test view.
582
583 @param view: A dictionary representing a tko test view.
Fang Dengf8503532014-06-12 18:21:55 -0700584 @param afe_job: An instance of frontend.afe.models.Job
585 representing the job that kicked off the test.
Fang Dengaeab6172014-05-07 17:17:04 -0700586 @param suite_name: The name of the suite
587 that the test belongs to.
588 @param build: The build for which the test is run.
589 """
590 self.view = view
Fang Dengf8503532014-06-12 18:21:55 -0700591 self.afe_job = afe_job
Fang Dengaeab6172014-05-07 17:17:04 -0700592 self.suite_name = suite_name
593 self.build = build
Fang Dengf8503532014-06-12 18:21:55 -0700594 self.is_suite_view = afe_job.parent_job is None
Fang Dengaeab6172014-05-07 17:17:04 -0700595 # This is the test name that will be shown in the output.
596 self.testname = None
597
Fang Dengf8503532014-06-12 18:21:55 -0700598 # The case that a job was aborted before it got a chance to run
599 # usually indicates suite has timed out (unless aborted by user).
600 # In this case, the abort reason will be None.
601 # Update the reason with proper information.
602 if (self.is_relevant_suite_view() and
603 not self.get_testname() == self.SUITE_PREP and
604 self.view['status'] == 'ABORT' and
605 not self.view['reason']):
606 self.view['reason'] = 'Timed out, did not run.'
607
Fang Dengaeab6172014-05-07 17:17:04 -0700608
609 def __getitem__(self, key):
610 """Overload __getitem__ so that we can still use []
611
612 @param key: A key of the tko test view.
613
614 @returns: The value of an attribute in the view.
615
616 """
617 return self.view[key]
618
619
Fang Dengaeab6172014-05-07 17:17:04 -0700620 def __iter__(self):
621 """Overload __iter__ so that it supports 'in' operator."""
622 return iter(self.view)
623
624
625 def get_testname(self):
626 """Get test name that should be shown in the output.
627
628 Formalize the test_name we got from the test view.
629
630 Remove 'build/suite' prefix if any. And append 'experimental' prefix
631 for experimental tests if their names do not start with 'experimental'.
632
633 If one runs a test in control file via the following code,
634 job.runtest('my_Test', tag='tag')
635 for most of the cases, view['test_name'] would look like 'my_Test.tag'.
636 If this is the case, this method will just return the original
637 test name, i.e. 'my_Test.tag'.
638
639 There are four special cases.
640 1) A test view is for the suite job's SERVER_JOB.
641 In this case, this method will return 'Suite prep'.
642
643 2) A test view is of a child job and for a SERVER_JOB or CLIENT_JOB.
644 In this case, we will take the job name, remove the build/suite
645 prefix from the job name, and append the rest to 'SERVER_JOB'
646 or 'CLIENT_JOB' as a prefix. So the names returned by this
647 method will look like:
648 'experimental_Telemetry Smoothness Measurement_SERVER_JOB'
649 'experimental_dummy_Pass_SERVER_JOB'
650 'dummy_Fail_SERVER_JOB'
651
Fang Dengf8503532014-06-12 18:21:55 -0700652 3) A test view is of a suite job and its status is ABORT.
Fang Dengaeab6172014-05-07 17:17:04 -0700653 In this case, the view['test_name'] is the child job's name.
654 If it is an experimental test, 'experimental' will be part
655 of the name. For instance,
656 'lumpy-release/R35-5712.0.0/perf_v2/
657 experimental_Telemetry Smoothness Measurement'
658 'lumpy-release/R35-5712.0.0/dummy/experimental_dummy_Pass'
659 'lumpy-release/R35-5712.0.0/dummy/dummy_Fail'
660 The above names will be converted to the following:
661 'experimental_Telemetry Smoothness Measurement'
662 'experimental_dummy_Pass'
663 'dummy_Fail'
664
Fang Dengf8503532014-06-12 18:21:55 -0700665 4) A test view's status is of a suite job and its status is TEST_NA.
Fang Dengaeab6172014-05-07 17:17:04 -0700666 In this case, the view['test_name'] is the NAME field of the control
667 file. If it is an experimental test, 'experimental' will part of
668 the name. For instance,
669 'experimental_Telemetry Smoothness Measurement'
670 'experimental_dummy_Pass'
671 'dummy_Fail'
672 This method will not modify these names.
673
674 @returns: Test name after normalization.
675
676 """
677 if self.testname is not None:
678 return self.testname
679
680 if (self.is_suite_view and
681 self.view['test_name'].startswith('SERVER_JOB')):
682 # Rename suite job's SERVER_JOB to 'Suite prep'.
683 self.testname = self.SUITE_PREP
684 return self.testname
685
686 if (self.view['test_name'].startswith('SERVER_JOB') or
687 self.view['test_name'].startswith('CLIENT_JOB')):
688 # Append job name as a prefix for SERVER_JOB and CLIENT_JOB
689 testname= '%s_%s' % (self.view['job_name'], self.view['test_name'])
690 else:
691 testname = self.view['test_name']
692 experimental = self.is_experimental()
693 # Remove the build and suite name from testname if any.
694 testname = tools.get_test_name(
695 self.build, self.suite_name, testname)
696 # If an experimental test was aborted, testname
697 # would include the 'experimental' prefix already.
698 prefix = constants.EXPERIMENTAL_PREFIX if (
699 experimental and not
700 testname.startswith(constants.EXPERIMENTAL_PREFIX)) else ''
701 self.testname = prefix + testname
702 return self.testname
703
704
705 def is_relevant_suite_view(self):
706 """Checks whether this is a suite view we should care about.
707
708 @returns: True if it is relevant. False otherwise.
709 """
710 return (self.get_testname() == self.SUITE_PREP or
711 (self.is_suite_view and
712 not self.view['test_name'].startswith('CLIENT_JOB') and
713 not self.view['subdir']))
714
715
716 def is_test(self):
717 """Return whether the view is for an actual test.
718
719 @returns True if the view is for an actual test.
720 False if the view is for SERVER_JOB or CLIENT_JOB.
721
722 """
723 return not (self.view['test_name'].startswith('SERVER_JOB') or
724 self.view['test_name'].startswith('CLIENT_JOB'))
725
726
727 def is_retry(self):
728 """Check whether the view is for a retry.
729
730 @returns: True, if the view is for a retry; False otherwise.
731
732 """
733 return self.view['job_keyvals'].get('retry_original_job_id') is not None
734
735
736 def is_experimental(self):
737 """Check whether a test view is for an experimental test.
738
739 @returns: True if it is for an experimental test, False otherwise.
740
741 """
742 return (self.view['job_keyvals'].get('experimental') == 'True' or
743 tools.get_test_name(self.build, self.suite_name,
744 self.view['test_name']).startswith('experimental'))
745
746
Fang Dengf8503532014-06-12 18:21:55 -0700747 def hit_timeout(self):
748 """Check whether the corresponding job has hit its own timeout.
Fang Dengaeab6172014-05-07 17:17:04 -0700749
Fang Dengf8503532014-06-12 18:21:55 -0700750 Note this method should not be called for those test views
751 that belongs to a suite job and are determined as irrelevant
752 by is_relevant_suite_view. This is because they are associated
753 to the suite job, whose job start/finished time make no sense
754 to an irrelevant test view.
Fang Dengaeab6172014-05-07 17:17:04 -0700755
Fang Dengf8503532014-06-12 18:21:55 -0700756 @returns: True if the corresponding afe job has hit timeout.
757 False otherwise.
758 """
759 if (self.is_relevant_suite_view() and
760 self.get_testname() != self.SUITE_PREP):
761 # Any relevant suite test view except SUITE_PREP
762 # did not hit its own timeout because it was not ever run.
763 return False
764 start = (datetime.strptime(
Dan Shidfea3682014-08-10 23:38:40 -0700765 self.view['job_started_time'], time_utils.TIME_FMT)
Fang Dengf8503532014-06-12 18:21:55 -0700766 if self.view['job_started_time'] else None)
767 end = (datetime.strptime(
Dan Shidfea3682014-08-10 23:38:40 -0700768 self.view['job_finished_time'], time_utils.TIME_FMT)
Fang Dengf8503532014-06-12 18:21:55 -0700769 if self.view['job_finished_time'] else None)
770 if not start or not end:
771 return False
772 else:
773 return ((end - start).total_seconds()/60.0
774 > self.afe_job.max_runtime_mins)
775
776
777 def is_aborted(self):
778 """Check if the view was aborted.
779
780 For suite prep and child job test views, we check job keyval
781 'aborted_by' and test status.
782
783 For relevant suite job test views, we only check test status
784 because the suite job keyval won't make sense to individual
785 test views.
786
787 @returns: True if the test was as aborted, False otherwise.
Fang Dengaeab6172014-05-07 17:17:04 -0700788
789 """
Fang Dengf8503532014-06-12 18:21:55 -0700790
791 if (self.is_relevant_suite_view() and
792 self.get_testname() != self.SUITE_PREP):
793 return self.view['status'] == 'ABORT'
794 else:
795 return (bool(self.view['job_keyvals'].get('aborted_by')) and
796 self.view['status'] in ['ABORT', 'RUNNING'])
Fang Dengaeab6172014-05-07 17:17:04 -0700797
798
799 def is_in_fail_status(self):
Fang Deng95af42f2014-09-12 14:16:11 -0700800 """Check if the given test's status corresponds to a failure.
Fang Dengaeab6172014-05-07 17:17:04 -0700801
802 @returns: True if the test's status is FAIL or ERROR. False otherwise.
803
804 """
805 # All the statuses tests can have when they fail.
806 return self.view['status'] in ['FAIL', 'ERROR', 'ABORT']
807
808
Fang Deng95af42f2014-09-12 14:16:11 -0700809 def is_infra_test(self):
810 """Check whether this is a test that only lab infra is concerned.
811
812 @returns: True if only lab infra is concerned, False otherwise.
813
814 """
815 return self.get_testname() in self.INFRA_TESTS
816
817
Fang Dengaeab6172014-05-07 17:17:04 -0700818 def get_buildbot_link_reason(self):
819 """Generate the buildbot link reason for the test.
820
821 @returns: A string representing the reason.
822
823 """
824 return ('%s: %s' % (self.view['status'], self.view['reason'])
825 if self.view['reason'] else self.view['status'])
826
827
828 def get_job_id_owner_str(self):
829 """Generate the job_id_owner string for a test.
830
831 @returns: A string which looks like 135036-username
832
833 """
834 return '%s-%s' % (self.view['afe_job_id'], getpass.getuser())
835
836
837 def get_bug_info(self, suite_job_keyvals):
838 """Get the bug info from suite_job_keyvals.
839
840 If a bug has been filed for the test, its bug info (bug id and counts)
841 will be stored in the suite job's keyvals. This method attempts to
842 retrieve bug info of the test from |suite_job_keyvals|. It will return
843 None if no bug info is found. No need to check bug info if the view is
844 SUITE_PREP.
845
846 @param suite_job_keyvals: The job keyval dictionary of the suite job.
847 All the bug info about child jobs are stored in
848 suite job's keyvals.
849
850 @returns: None if there is no bug info, or a pair with the
851 id of the bug, and the count of the number of
852 times the bug has been seen.
853
854 """
855 if self.get_testname() == self.SUITE_PREP:
856 return None
857 if (self.view['test_name'].startswith('SERVER_JOB') or
858 self.view['test_name'].startswith('CLIENT_JOB')):
859 # Append job name as a prefix for SERVER_JOB and CLIENT_JOB
860 testname= '%s_%s' % (self.view['job_name'], self.view['test_name'])
861 else:
862 testname = self.view['test_name']
863
864 return tools.get_test_failure_bug_info(
865 suite_job_keyvals, self.view['afe_job_id'],
866 testname)
867
868
869 def should_display_buildbot_link(self):
870 """Check whether a buildbot link should show for this view.
871
Fang Dengf8503532014-06-12 18:21:55 -0700872 For suite prep view, show buildbot link if it fails.
873 For normal test view,
874 show buildbot link if it is a retry
875 show buildbot link if it hits its own timeout.
876 show buildbot link if it fails. This doesn't
877 include the case where it was aborted but has
878 not hit its own timeout (most likely it was aborted because
879 suite has timed out).
Fang Dengaeab6172014-05-07 17:17:04 -0700880
881 @returns: True if we should show the buildbot link.
882 False otherwise.
883 """
884 is_bad_status = (self.view['status'] != 'GOOD' and
885 self.view['status'] != 'TEST_NA')
Fang Dengf8503532014-06-12 18:21:55 -0700886 if self.get_testname() == self.SUITE_PREP:
887 return is_bad_status
888 else:
889 if self.is_retry():
890 return True
891 if is_bad_status:
892 return not self.is_aborted() or self.hit_timeout()
Fang Dengaeab6172014-05-07 17:17:04 -0700893
894
Fang Dengdd20e452014-04-07 15:39:47 -0700895class ResultCollector(object):
896 """Collect test results of a suite.
897
898 Once a suite job has finished, use this class to collect test results.
899 `run` is the core method that is to be called first. Then the caller
900 could retrieve information like return code, return message, is_aborted,
901 and timings by accessing the collector's public attributes. And output
902 the test results and links by calling the 'output_*' methods.
903
904 Here is a overview of what `run` method does.
905
906 1) Collect the suite job's results from tko_test_view_2.
907 For the suite job, we only pull test views without a 'subdir'.
908 A NULL subdir indicates that the test was _not_ executed. This could be
909 that no child job was scheduled for this test or the child job got
910 aborted before starts running.
911 (Note 'SERVER_JOB'/'CLIENT_JOB' are handled specially)
912
913 2) Collect the child jobs' results from tko_test_view_2.
914 For child jobs, we pull all the test views associated with them.
915 (Note 'SERVER_JOB'/'CLIENT_JOB' are handled speically)
916
Fang Dengaeab6172014-05-07 17:17:04 -0700917 3) Generate web and buildbot links.
Fang Dengdd20e452014-04-07 15:39:47 -0700918 4) Compute timings of the suite run.
919 5) Compute the return code based on test results.
920
921 @var _instance_server: The hostname of the server that is used
922 to service the suite.
923 @var _afe: The afe rpc client.
924 @var _tko: The tko rpc client.
925 @var _build: The build for which the suite is run,
926 e.g. 'lumpy-release/R35-5712.0.0'
MK Ryu977a9752014-10-21 11:58:09 -0700927 @var _board: The target board for which the suite is run,
928 e.g., 'lumpy', 'link'.
Fang Dengdd20e452014-04-07 15:39:47 -0700929 @var _suite_name: The suite name, e.g. 'bvt', 'dummy'.
930 @var _suite_job_id: The job id of the suite for which we are going to
931 collect results.
Fang Dengaeab6172014-05-07 17:17:04 -0700932 @var _suite_views: A list of TestView objects, representing relevant
933 test views of the suite job.
934 @var _child_views: A list of TestView objects, representing test views
935 of the child jobs.
936 @var _test_views: A list of TestView objects, representing all test views
937 from _suite_views and _child_views.
Fang Dengdd20e452014-04-07 15:39:47 -0700938 @var _web_links: A list of web links pointing to the results of jobs.
939 @var _buildbot_links: A list of buildbot links for non-passing tests.
Fang Dengaeab6172014-05-07 17:17:04 -0700940 @var _max_testname_width: Max width of all test names.
Fang Dengdd20e452014-04-07 15:39:47 -0700941 @var return_code: The exit code that should be returned by run_suite.
942 @var return_message: Any message that should be displayed to explain
943 the return code.
944 @var is_aborted: Whether the suite was aborted or not.
945 True, False or None (aborting status is unknown yet)
946 @var timings: A Timing object that records the suite's timings.
947
948 """
949
950
MK Ryu977a9752014-10-21 11:58:09 -0700951 def __init__(self, instance_server, afe, tko, build, board,
Fang Dengdd20e452014-04-07 15:39:47 -0700952 suite_name, suite_job_id):
953 self._instance_server = instance_server
954 self._afe = afe
955 self._tko = tko
956 self._build = build
MK Ryu977a9752014-10-21 11:58:09 -0700957 self._board = board
Fang Dengdd20e452014-04-07 15:39:47 -0700958 self._suite_name = suite_name
959 self._suite_job_id = suite_job_id
Fang Deng0454e632014-04-07 15:39:47 -0700960 self._suite_views = []
961 self._child_views = []
Fang Dengdd20e452014-04-07 15:39:47 -0700962 self._test_views = []
Fang Dengaeab6172014-05-07 17:17:04 -0700963 self._retry_counts = {}
Fang Dengdd20e452014-04-07 15:39:47 -0700964 self._web_links = []
965 self._buildbot_links = []
Fang Deng0454e632014-04-07 15:39:47 -0700966 self._max_testname_width = 0
MK Ryu977a9752014-10-21 11:58:09 -0700967 self._num_child_jobs = 0
Fang Dengdd20e452014-04-07 15:39:47 -0700968 self.return_code = None
Fang Deng0454e632014-04-07 15:39:47 -0700969 self.return_message = ''
Fang Dengdd20e452014-04-07 15:39:47 -0700970 self.is_aborted = None
971 self.timings = None
972
973
Fang Dengdd20e452014-04-07 15:39:47 -0700974 def _fetch_relevant_test_views_of_suite(self):
975 """Fetch relevant test views of the suite job.
976
977 For the suite job, there will be a test view for SERVER_JOB, and views
978 for results of its child jobs. For example, assume we've ceated
979 a suite job (afe_job_id: 40) that runs dummy_Pass, dummy_Fail,
980 dummy_Pass.bluetooth. Assume dummy_Pass was aborted before running while
981 dummy_Path.bluetooth got TEST_NA as no duts have bluetooth.
982 So the suite job's test views would look like
983 _____________________________________________________________________
984 test_idx| job_idx|test_name |subdir |afe_job_id|status
985 10 | 1000 |SERVER_JOB |---- |40 |GOOD
986 11 | 1000 |dummy_Pass |NULL |40 |ABORT
987 12 | 1000 |dummy_Fail.Fail |41-onwer/...|40 |FAIL
988 13 | 1000 |dummy_Fail.Error |42-owner/...|40 |ERROR
989 14 | 1000 |dummy_Pass.bluetooth|NULL |40 |TEST_NA
990
991 For a suite job, we only care about
992 a) The test view for the suite job's SERVER_JOB
993 b) The test views for real tests without a subdir. A NULL subdir
994 indicates that a test didn't get executed.
995 So, for the above example, we only keep test views whose test_idxs
996 are 10, 11, 14.
997
Fang Dengaeab6172014-05-07 17:17:04 -0700998 @returns: A list of TestView objects, representing relevant
999 test views of the suite job.
Fang Dengdd20e452014-04-07 15:39:47 -07001000
1001 """
Fang Dengf8503532014-06-12 18:21:55 -07001002 suite_job = self._afe.get_jobs(id=self._suite_job_id)[0]
Fang Deng0454e632014-04-07 15:39:47 -07001003 views = self._tko.run(call='get_detailed_test_views',
1004 afe_job_id=self._suite_job_id)
Fang Dengdd20e452014-04-07 15:39:47 -07001005 relevant_views = []
1006 for v in views:
Fang Dengf8503532014-06-12 18:21:55 -07001007 v = TestView(v, suite_job, self._suite_name, self._build)
Fang Dengaeab6172014-05-07 17:17:04 -07001008 if v.is_relevant_suite_view():
Fang Dengdd20e452014-04-07 15:39:47 -07001009 relevant_views.append(v)
Fang Dengdd20e452014-04-07 15:39:47 -07001010 return relevant_views
1011
1012
Fang Dengaeab6172014-05-07 17:17:04 -07001013 def _compute_retry_count(self, view):
1014 """Return how many times the test has been retried.
1015
1016 @param view: A TestView instance.
1017 @returns: An int value indicating the retry count.
1018
1019 """
1020 old_job = view['job_keyvals'].get('retry_original_job_id')
1021 count = 0
1022 while old_job:
1023 count += 1
1024 views = self._tko.run(
1025 call='get_detailed_test_views', afe_job_id=old_job)
1026 old_job = (views[0]['job_keyvals'].get('retry_original_job_id')
1027 if views else None)
1028 return count
1029
1030
Fang Dengdd20e452014-04-07 15:39:47 -07001031 def _fetch_test_views_of_child_jobs(self):
1032 """Fetch test views of child jobs.
1033
Fang Dengaeab6172014-05-07 17:17:04 -07001034 @returns: A tuple (child_views, retry_counts)
1035 child_views is list of TestView objects, representing
1036 all valid views. retry_counts is a dictionary that maps
1037 test_idx to retry counts. It only stores retry
1038 counts that are greater than 0.
Fang Deng0454e632014-04-07 15:39:47 -07001039
Fang Dengdd20e452014-04-07 15:39:47 -07001040 """
Fang Dengdd20e452014-04-07 15:39:47 -07001041 child_views = []
Fang Dengaeab6172014-05-07 17:17:04 -07001042 retry_counts = {}
Fang Dengf8503532014-06-12 18:21:55 -07001043 child_jobs = self._afe.get_jobs(parent_job_id=self._suite_job_id)
MK Ryu977a9752014-10-21 11:58:09 -07001044 if child_jobs:
1045 self._num_child_jobs = len(child_jobs)
Fang Dengf8503532014-06-12 18:21:55 -07001046 for job in child_jobs:
1047 views = [TestView(v, job, self._suite_name, self._build)
Fang Dengaeab6172014-05-07 17:17:04 -07001048 for v in self._tko.run(
Fang Dengf8503532014-06-12 18:21:55 -07001049 call='get_detailed_test_views', afe_job_id=job.id,
Fang Dengaeab6172014-05-07 17:17:04 -07001050 invalid=0)]
Fang Dengdd20e452014-04-07 15:39:47 -07001051 contains_test_failure = any(
Fang Dengaeab6172014-05-07 17:17:04 -07001052 v.is_test() and v['status'] != 'GOOD' for v in views)
Fang Dengdd20e452014-04-07 15:39:47 -07001053 for v in views:
Fang Dengaeab6172014-05-07 17:17:04 -07001054 if (v.is_test() or
1055 v['status'] != 'GOOD' and not contains_test_failure):
1056 # For normal test view, just keep it.
1057 # For SERVER_JOB or CLIENT_JOB, only keep it
1058 # if it fails and no other test failure.
Fang Dengdd20e452014-04-07 15:39:47 -07001059 child_views.append(v)
Fang Dengaeab6172014-05-07 17:17:04 -07001060 retry_count = self._compute_retry_count(v)
1061 if retry_count > 0:
1062 retry_counts[v['test_idx']] = retry_count
1063 return child_views, retry_counts
Fang Dengdd20e452014-04-07 15:39:47 -07001064
1065
1066 def _generate_web_and_buildbot_links(self):
1067 """Generate web links and buildbot links."""
1068 # TODO(fdeng): If a job was aborted before it reaches Running
1069 # state, we read the test view from the suite job
1070 # and thus this method generates a link pointing to the
1071 # suite job's page for the aborted job. Need a fix.
1072 self._web_links = []
1073 self._buildbot_links = []
1074 # Bug info are stored in the suite job's keyvals.
1075 suite_job_keyvals = self._suite_views[0]['job_keyvals']
1076 for v in self._test_views:
Fang Dengaeab6172014-05-07 17:17:04 -07001077 retry_count = self._retry_counts.get(v['test_idx'], 0)
1078 bug_info = v.get_bug_info(suite_job_keyvals)
1079 job_id_owner = v.get_job_id_owner_str()
Fang Dengdd20e452014-04-07 15:39:47 -07001080 link = LogLink(
Fang Dengaeab6172014-05-07 17:17:04 -07001081 anchor=v.get_testname().ljust(
Fang Dengdd20e452014-04-07 15:39:47 -07001082 self._max_testname_width),
1083 server=self._instance_server,
1084 job_string=job_id_owner,
Simran Basi7203d4e2015-02-03 15:50:18 -08001085 bug_info=bug_info, retry_count=retry_count,
1086 testname=v.get_testname())
Fang Dengdd20e452014-04-07 15:39:47 -07001087 self._web_links.append(link)
1088
Fang Dengaeab6172014-05-07 17:17:04 -07001089 if v.should_display_buildbot_link():
1090 link.reason = v.get_buildbot_link_reason()
Fang Dengdd20e452014-04-07 15:39:47 -07001091 self._buildbot_links.append(link)
1092
1093
1094 def _record_timings(self):
1095 """Record suite timings."""
1096 self.timings = Timings(self._suite_job_id)
1097 for v in self._test_views:
1098 self.timings.RecordTiming(v)
1099
1100
Fang Dengaeab6172014-05-07 17:17:04 -07001101 def _get_return_msg(self, code, tests_passed_after_retry):
1102 """Return the proper message for a given return code.
1103
1104 @param code: An enum value of RETURN_CODES
1105 @param test_passed_after_retry: True/False, indicating
1106 whether there are test(s) that have passed after retry.
1107
1108 @returns: A string, representing the message.
1109
1110 """
1111 if code == RETURN_CODES.INFRA_FAILURE:
Fang Deng95af42f2014-09-12 14:16:11 -07001112 return 'Suite job failed or provisioning failed.'
Fang Dengaeab6172014-05-07 17:17:04 -07001113 elif code == RETURN_CODES.SUITE_TIMEOUT:
1114 return ('Some test(s) was aborted before running,'
1115 ' suite must have timed out.')
1116 elif code == RETURN_CODES.WARNING:
1117 if tests_passed_after_retry:
1118 return 'Some test(s) passed after retry.'
1119 else:
1120 return 'Some test(s) raised a warning.'
1121 elif code == RETURN_CODES.ERROR:
1122 return 'Some test(s) failed.'
1123 else:
1124 return ''
1125
1126
Fang Dengdd20e452014-04-07 15:39:47 -07001127 def _compute_return_code(self):
1128 """Compute the exit code based on test results."""
1129 code = RETURN_CODES.OK
Fang Dengaeab6172014-05-07 17:17:04 -07001130 tests_passed_after_retry = False
1131
Fang Dengdd20e452014-04-07 15:39:47 -07001132 for v in self._test_views:
Fang Dengf8503532014-06-12 18:21:55 -07001133 # The order of checking each case is important.
Fang Dengaeab6172014-05-07 17:17:04 -07001134 if v.is_experimental():
Fang Deng5a43be62014-05-07 17:17:04 -07001135 continue
Fang Dengf8503532014-06-12 18:21:55 -07001136 if v.get_testname() == TestView.SUITE_PREP:
1137 if v.is_aborted() and v.hit_timeout():
1138 current_code = RETURN_CODES.SUITE_TIMEOUT
1139 elif v.is_in_fail_status():
1140 current_code = RETURN_CODES.INFRA_FAILURE
1141 elif v['status'] == 'WARN':
1142 current_code = RETURN_CODES.WARNING
1143 else:
1144 current_code = RETURN_CODES.OK
Fang Deng5a43be62014-05-07 17:17:04 -07001145 else:
Fang Dengf8503532014-06-12 18:21:55 -07001146 if v.is_aborted() and v.is_relevant_suite_view():
1147 # The test was aborted before started
1148 # This gurantees that the suite has timed out.
1149 current_code = RETURN_CODES.SUITE_TIMEOUT
1150 elif v.is_aborted() and not v.hit_timeout():
1151 # The test was aborted, but
1152 # not due to a timeout. This is most likely
1153 # because the suite has timed out, but may
1154 # also because it was aborted by the user.
1155 # Since suite timing out is determined by checking
1156 # the suite prep view, we simply ignore this view here.
1157 current_code = RETURN_CODES.OK
1158 elif v.is_in_fail_status():
1159 # The test job failed.
Fang Deng95af42f2014-09-12 14:16:11 -07001160 if v.is_infra_test():
1161 current_code = RETURN_CODES.INFRA_FAILURE
1162 else:
1163 current_code = RETURN_CODES.ERROR
Fang Dengf8503532014-06-12 18:21:55 -07001164 elif v['status'] == 'WARN':
1165 # The test/suite job raised a wanrning.
1166 current_code = RETURN_CODES.WARNING
1167 elif v.is_retry():
1168 # The test is a passing retry.
1169 current_code = RETURN_CODES.WARNING
1170 tests_passed_after_retry = True
1171 else:
1172 current_code = RETURN_CODES.OK
1173 code = get_worse_code(code, current_code)
1174
Fang Dengdd20e452014-04-07 15:39:47 -07001175 self.return_code = code
Fang Dengaeab6172014-05-07 17:17:04 -07001176 self.return_message = self._get_return_msg(
1177 code, tests_passed_after_retry)
Fang Dengdd20e452014-04-07 15:39:47 -07001178
1179
1180 def output_results(self):
1181 """Output test results, timings and web links."""
1182 # Output test results
1183 for v in self._test_views:
Fang Dengaeab6172014-05-07 17:17:04 -07001184 display_name = v.get_testname().ljust(self._max_testname_width)
Fang Dengdd20e452014-04-07 15:39:47 -07001185 logging.info('%s%s', display_name,
1186 get_pretty_status(v['status']))
1187 if v['status'] != 'GOOD':
Fang Dengaeab6172014-05-07 17:17:04 -07001188 logging.info('%s %s: %s', display_name, v['status'],
Fang Dengdd20e452014-04-07 15:39:47 -07001189 v['reason'])
Fang Dengaeab6172014-05-07 17:17:04 -07001190 if v.is_retry():
1191 retry_count = self._retry_counts.get(v['test_idx'], 0)
1192 logging.info('%s retry_count: %s',
1193 display_name, retry_count)
Fang Dengdd20e452014-04-07 15:39:47 -07001194 # Output suite timings
1195 logging.info(self.timings)
1196 # Output links to test logs
1197 logging.info('\nLinks to test logs:')
1198 for link in self._web_links:
1199 logging.info(link.GenerateTextLink())
Fang Deng5a43be62014-05-07 17:17:04 -07001200 logging.info('\n')
Fang Dengdd20e452014-04-07 15:39:47 -07001201
1202
1203 def output_buildbot_links(self):
1204 """Output buildbot links."""
1205 for link in self._buildbot_links:
1206 logging.info(link.GenerateBuildbotLink())
Simran Basi7203d4e2015-02-03 15:50:18 -08001207 wmatrix_link = link.GenerateWmatrixRetryLink()
1208 if wmatrix_link:
1209 logging.info(wmatrix_link)
Fang Dengdd20e452014-04-07 15:39:47 -07001210
1211
1212 def run(self):
1213 """Collect test results.
1214
1215 This method goes through the following steps:
1216 Fetch relevent test views of the suite job.
1217 Fetch test views of child jobs
1218 Check whether the suite was aborted.
Fang Dengaeab6172014-05-07 17:17:04 -07001219 Generate links.
Fang Dengdd20e452014-04-07 15:39:47 -07001220 Calculate suite timings.
1221 Compute return code based on the test result.
1222
1223 """
1224 self._suite_views = self._fetch_relevant_test_views_of_suite()
Fang Dengaeab6172014-05-07 17:17:04 -07001225 self._child_views, self._retry_counts = (
1226 self._fetch_test_views_of_child_jobs())
Fang Dengdd20e452014-04-07 15:39:47 -07001227 self._test_views = self._suite_views + self._child_views
1228 # For hostless job in Starting status, there is no test view associated.
1229 # This can happen when a suite job in Starting status is aborted. When
1230 # the scheduler hits some limit, e.g., max_hostless_jobs_per_drone,
1231 # max_jobs_started_per_cycle, a suite job can stays in Starting status.
1232 if not self._test_views:
Fang Deng5a43be62014-05-07 17:17:04 -07001233 self.return_code = RETURN_CODES.INFRA_FAILURE
Fang Dengdd20e452014-04-07 15:39:47 -07001234 self.return_message = 'No test view was found.'
1235 return
1236 self.is_aborted = any([view['job_keyvals'].get('aborted_by')
1237 for view in self._suite_views])
Fang Dengaeab6172014-05-07 17:17:04 -07001238 self._max_testname_width = max(
1239 [len(v.get_testname()) for v in self._test_views]) + 3
Fang Dengdd20e452014-04-07 15:39:47 -07001240 self._generate_web_and_buildbot_links()
1241 self._record_timings()
1242 self._compute_return_code()
1243
1244
MK Ryu977a9752014-10-21 11:58:09 -07001245 def gather_timing_stats(self):
1246 """Collect timing related statistics."""
1247 # Send timings to statsd.
1248 self.timings.SendResultsToStatsd(
1249 self._suite_name, self._build, self._board)
1250
1251 # Record suite runtime in metadata db.
Prathmesh Prabhua3713a02015-03-11 13:50:55 -07001252 # Some failure modes can leave times unassigned, report sentinel value
1253 # in that case.
1254 runtime_in_secs = -1
1255 if (self.timings.tests_end_time is not None and
1256 self.timings.suite_start_time is not None):
1257 runtime_in_secs = (self.timings.tests_end_time -
1258 self.timings.suite_start_time).total_seconds()
1259
MK Ryu977a9752014-10-21 11:58:09 -07001260 job_overhead.record_suite_runtime(self._suite_job_id, self._suite_name,
1261 self._board, self._build, self._num_child_jobs, runtime_in_secs)
1262
1263
Prashanth B6285f6a2014-05-08 18:01:27 -07001264@retry.retry(error.StageControlFileFailure, timeout_min=10)
1265def create_suite(afe, options):
1266 """Create a suite with retries.
1267
1268 @param afe: The afe object to insert the new suite job into.
1269 @param options: The options to use in creating the suite.
1270
1271 @return: The afe_job_id of the new suite job.
1272 """
1273 wait = options.no_wait == 'False'
1274 file_bugs = options.file_bugs == 'True'
1275 retry = options.retry == 'True'
Simran Basi1e10e922015-04-16 15:09:56 -07001276 offload_failures_only = options.offload_failures_only == 'True'
Prashanth B6285f6a2014-05-08 18:01:27 -07001277 try:
1278 priority = int(options.priority)
1279 except ValueError:
1280 try:
1281 priority = priorities.Priority.get_value(options.priority)
1282 except AttributeError:
1283 print 'Unknown priority level %s. Try one of %s.' % (
1284 options.priority, ', '.join(priorities.Priority.names))
1285 raise
1286 logging.info('%s Submitted create_suite_job rpc',
1287 diagnosis_utils.JobTimer.format_time(datetime.now()))
1288 return afe.run('create_suite_job', name=options.name,
1289 board=options.board, build=options.build,
1290 check_hosts=wait, pool=options.pool,
1291 num=options.num,
1292 file_bugs=file_bugs, priority=priority,
1293 suite_args=options.suite_args,
1294 wait_for_results=wait,
1295 timeout_mins=options.timeout_mins,
Simran Basi441fbc12015-01-23 12:28:54 -08001296 max_runtime_mins=options.max_runtime_mins,
Fang Deng443f1952015-01-02 14:51:49 -08001297 job_retry=retry, max_retries=options.max_retries,
Simran Basi1e10e922015-04-16 15:09:56 -07001298 suite_min_duts=options.suite_min_duts,
1299 offload_failures_only=offload_failures_only)
Prashanth B6285f6a2014-05-08 18:01:27 -07001300
1301
Fang Dengfb4a9492014-09-18 17:52:06 -07001302def main_without_exception_handling():
Aviv Keshet1480c4a2013-03-21 16:38:31 -07001303 """
Fang Dengfb4a9492014-09-18 17:52:06 -07001304 Entry point for run_suite script without exception handling.
Aviv Keshet1480c4a2013-03-21 16:38:31 -07001305 """
Chris Masone24b80f12012-02-14 14:18:01 -08001306 parser, options, args = parse_options()
Fang Deng0454e632014-04-07 15:39:47 -07001307 if not verify_options_and_args(options, args):
Fang Dengdd20e452014-04-07 15:39:47 -07001308 parser.print_help()
Fang Dengfb4a9492014-09-18 17:52:06 -07001309 return RETURN_CODES.INVALID_OPTIONS
Fang Dengdd20e452014-04-07 15:39:47 -07001310
Shuqian Zhaoab1bedc2015-06-02 11:12:28 -07001311 # If indicate to use the new style suite control file, convert the args
1312 if options.use_suite_attr:
1313 options = change_options_for_suite_attr(options)
1314
Chris Masone3a850642012-07-11 11:11:18 -07001315 log_name = 'run_suite-default.log'
Fang Deng6865aab2015-02-20 14:49:47 -08001316 if options.build:
Chris Masone3a850642012-07-11 11:11:18 -07001317 # convert build name from containing / to containing only _
1318 log_name = 'run_suite-%s.log' % options.build.replace('/', '_')
1319 log_dir = os.path.join(common.autotest_dir, 'logs')
1320 if os.path.exists(log_dir):
1321 log_name = os.path.join(log_dir, log_name)
Alex Miller88762a82013-09-04 15:41:28 -07001322
MK Ryu83184352014-12-10 14:59:40 -08001323 utils.setup_logging(logfile=log_name)
Alex Miller88762a82013-09-04 15:41:28 -07001324
Fang Deng6197da32014-09-25 10:18:48 -07001325 if not options.bypass_labstatus:
1326 utils.check_lab_status(options.build)
Prashanth Balasubramanian673016d2014-11-04 10:40:48 -08001327 instance_server = (options.web if options.web else
1328 instance_for_pool(options.pool))
Alex Millerc7a59522013-10-30 15:18:57 -07001329 afe = frontend_wrappers.RetryingAFE(server=instance_server,
Simran Basi25effe32013-11-26 13:02:11 -08001330 timeout_min=options.afe_timeout_mins,
Chris Masone8ac66712012-02-15 14:21:02 -08001331 delay_sec=options.delay_sec)
Alex Millerc7a59522013-10-30 15:18:57 -07001332 logging.info('Autotest instance: %s', instance_server)
Chris Masone359c0fd2012-03-13 15:18:59 -07001333
Dan Shi20952c12014-05-14 17:07:38 -07001334 rpc_helper = diagnosis_utils.RPCHelper(afe)
Fang Deng6865aab2015-02-20 14:49:47 -08001335 is_real_time = True
Chris Masone986459e2012-04-11 11:36:48 -07001336 if options.mock_job_id:
1337 job_id = int(options.mock_job_id)
Fang Deng6865aab2015-02-20 14:49:47 -08001338 existing_job = afe.get_jobs(id=job_id, finished=True)
1339 if existing_job:
1340 is_real_time = False
1341 else:
1342 existing_job = afe.get_jobs(id=job_id)
1343 if existing_job:
1344 job_created_on = time_utils.date_string_to_epoch_time(
1345 existing_job[0].created_on)
1346 else:
1347 raise utils.TestLabException('Failed to retrieve job: %d' % job_id)
Chris Masone986459e2012-04-11 11:36:48 -07001348 else:
Fang Deng5a43be62014-05-07 17:17:04 -07001349 try:
Fang Deng6865aab2015-02-20 14:49:47 -08001350 rpc_helper.check_dut_availability(options.board, options.pool,
1351 options.minimum_duts)
Prashanth B6285f6a2014-05-08 18:01:27 -07001352 job_id = create_suite(afe, options)
Fang Deng6865aab2015-02-20 14:49:47 -08001353 job_created_on = time.time()
1354 except diagnosis_utils.NotEnoughDutsError:
1355 logging.info(GetBuildbotStepLink(
1356 'Pool Health Bug', LogLink.get_bug_link(rpc_helper.bug)))
1357 raise
Fang Deng5a43be62014-05-07 17:17:04 -07001358 except (error.CrosDynamicSuiteException,
1359 error.RPCException, proxy.JSONRPCException) as e:
1360 logging.warning('Error Message: %s', e)
1361 return RETURN_CODES.INFRA_FAILURE
Prashanth B6285f6a2014-05-08 18:01:27 -07001362 except AttributeError:
1363 return RETURN_CODES.INVALID_OPTIONS
Fang Deng5a43be62014-05-07 17:17:04 -07001364
Prashanth B923ca262014-03-14 12:36:29 -07001365 job_timer = diagnosis_utils.JobTimer(
Fang Deng6865aab2015-02-20 14:49:47 -08001366 job_created_on, float(options.timeout_mins))
Aviv Keshet9afee5e2014-10-09 16:33:09 -07001367 job_url = reporting_utils.link_job(job_id,
1368 instance_server=instance_server)
Prashanth B923ca262014-03-14 12:36:29 -07001369 logging.info('%s Created suite job: %s',
1370 job_timer.format_time(job_timer.job_created_time),
Aviv Keshet9afee5e2014-10-09 16:33:09 -07001371 job_url)
Aviv Keshetdb321de2015-04-10 19:09:58 -07001372 # TODO(akeshet): Move this link-printing to chromite.
Aviv Keshet9afee5e2014-10-09 16:33:09 -07001373 logging.info(GetBuildbotStepLink('Suite created', job_url))
Aviv Keshetdb321de2015-04-10 19:09:58 -07001374
1375 if options.create_and_return:
1376 logging.info('--create_and_return was specified, terminating now.')
1377 return RETURN_CODES.OK
1378
Alex Millerc7a59522013-10-30 15:18:57 -07001379 TKO = frontend_wrappers.RetryingTKO(server=instance_server,
Simran Basi25effe32013-11-26 13:02:11 -08001380 timeout_min=options.afe_timeout_mins,
Chris Masone8ac66712012-02-15 14:21:02 -08001381 delay_sec=options.delay_sec)
Aviv Keshet1480c4a2013-03-21 16:38:31 -07001382 code = RETURN_CODES.OK
Prashanth B6285f6a2014-05-08 18:01:27 -07001383 wait = options.no_wait == 'False'
J. Richard Barnette712eb402013-08-13 18:03:00 -07001384 if wait:
1385 while not afe.get_jobs(id=job_id, finished=True):
Prashanth B923ca262014-03-14 12:36:29 -07001386 # Note that this call logs output, preventing buildbot's
1387 # 9000 second silent timeout from kicking in. Let there be no
1388 # doubt, this is a hack. The timeout is from upstream buildbot and
1389 # this is the easiest work around.
1390 if job_timer.first_past_halftime():
MK Ryu4790eec2014-07-31 11:39:02 -07001391 rpc_helper.diagnose_job(job_id, instance_server)
Prashanth Ba7be2072014-07-15 15:03:21 -07001392 if job_timer.debug_output_timer.poll():
1393 logging.info('The suite job has another %s till timeout.',
Prashanth B923ca262014-03-14 12:36:29 -07001394 job_timer.timeout_hours - job_timer.elapsed_time())
Alex Miller764227d2013-11-15 10:28:56 -08001395 time.sleep(10)
Fang Dengf8503532014-06-12 18:21:55 -07001396 # For most cases, ResultCollector should be able to determine whether
1397 # a suite has timed out by checking information in the test view.
1398 # However, occationally tko parser may fail on parsing the
1399 # job_finished time from the job's keyval file. So we add another
1400 # layer of timeout check in run_suite. We do the check right after
1401 # the suite finishes to make it as accurate as possible.
1402 # There is a minor race condition here where we might have aborted
1403 # for some reason other than a timeout, and the job_timer thinks
1404 # it's a timeout because of the jitter in waiting for results.
1405 # The consequence would be that run_suite exits with code
1406 # SUITE_TIMEOUT while it should have returned INFRA_FAILURE
1407 # instead, which should happen very rarely.
1408 # Note the timeout will have no sense when using -m option.
1409 is_suite_timeout = job_timer.is_suite_timeout()
J. Richard Barnette712eb402013-08-13 18:03:00 -07001410
Fang Dengdd20e452014-04-07 15:39:47 -07001411 # Start collecting test results.
1412 collector = ResultCollector(instance_server=instance_server,
1413 afe=afe, tko=TKO, build=options.build,
MK Ryu977a9752014-10-21 11:58:09 -07001414 board=options.board,
Fang Dengdd20e452014-04-07 15:39:47 -07001415 suite_name=options.name,
1416 suite_job_id=job_id)
1417 collector.run()
1418 # Output test results, timings, web links.
1419 collector.output_results()
Fang Dengdd20e452014-04-07 15:39:47 -07001420 code = collector.return_code
Fang Deng5a43be62014-05-07 17:17:04 -07001421 return_message = collector.return_message
Fang Deng6865aab2015-02-20 14:49:47 -08001422 if is_real_time:
MK Ryu977a9752014-10-21 11:58:09 -07001423 # Do not record stats if the suite was aborted (either by a user
1424 # or through the golo rpc).
Fang Deng5a43be62014-05-07 17:17:04 -07001425 # Also do not record stats if is_aborted is None, indicating
1426 # aborting status is unknown yet.
1427 if collector.is_aborted == False:
MK Ryu977a9752014-10-21 11:58:09 -07001428 collector.gather_timing_stats()
Fang Deng6865aab2015-02-20 14:49:47 -08001429
Fang Deng5a43be62014-05-07 17:17:04 -07001430 if collector.is_aborted == True and is_suite_timeout:
1431 # There are two possible cases when a suite times out.
1432 # 1. the suite job was aborted due to timing out
1433 # 2. the suite job succeeded, but some child jobs
1434 # were already aborted before the suite job exited.
1435 # The case 2 was handled by ResultCollector,
1436 # here we handle case 1.
1437 old_code = code
Fang Dengaeab6172014-05-07 17:17:04 -07001438 code = get_worse_code(
1439 code, RETURN_CODES.SUITE_TIMEOUT)
Fang Deng5a43be62014-05-07 17:17:04 -07001440 if old_code != code:
Fang Dengaeab6172014-05-07 17:17:04 -07001441 return_message = 'Suite job timed out.'
Fang Deng5a43be62014-05-07 17:17:04 -07001442 logging.info('Upgrade return code from %s to %s '
1443 'because suite job has timed out.',
1444 RETURN_CODES.get_string(old_code),
1445 RETURN_CODES.get_string(code))
Fang Deng5a43be62014-05-07 17:17:04 -07001446 if is_suite_timeout:
1447 logging.info('\nAttempting to diagnose pool: %s', options.pool)
Fang Deng5a43be62014-05-07 17:17:04 -07001448 try:
1449 # Add some jitter to make up for any latency in
1450 # aborting the suite or checking for results.
1451 cutoff = (job_timer.timeout_hours +
1452 datetime_base.timedelta(hours=0.3))
1453 rpc_helper.diagnose_pool(
1454 options.board, options.pool, cutoff)
1455 except proxy.JSONRPCException as e:
1456 logging.warning('Unable to diagnose suite abort.')
1457
1458 # And output return message.
Fang Deng5a43be62014-05-07 17:17:04 -07001459 if return_message:
1460 logging.info('Reason: %s', return_message)
Prashanth B923ca262014-03-14 12:36:29 -07001461
Fang Dengdd20e452014-04-07 15:39:47 -07001462 logging.info('\nOutput below this line is for buildbot consumption:')
1463 collector.output_buildbot_links()
Chris Masoned5939fe2012-03-13 10:11:06 -07001464 else:
Scott Zawalski94457b72012-07-02 18:45:07 -04001465 logging.info('Created suite job: %r', job_id)
Alex Millera05498f2013-11-01 16:16:21 -07001466 link = LogLink(options.name, instance_server,
1467 '%s-%s' % (job_id, getpass.getuser()))
Craig Harrison25eb0f32012-08-23 16:48:49 -07001468 logging.info(link.GenerateBuildbotLink())
Scott Zawalski94457b72012-07-02 18:45:07 -04001469 logging.info('--no_wait specified; Exiting.')
Chris Masone24b80f12012-02-14 14:18:01 -08001470 return code
1471
Fang Dengdd20e452014-04-07 15:39:47 -07001472
Fang Dengfb4a9492014-09-18 17:52:06 -07001473def main():
1474 """Entry point."""
Fang Deng6197da32014-09-25 10:18:48 -07001475 code = RETURN_CODES.OK
Fang Dengfb4a9492014-09-18 17:52:06 -07001476 try:
Prashanth B6285f6a2014-05-08 18:01:27 -07001477 code = main_without_exception_handling()
Fang Deng6197da32014-09-25 10:18:48 -07001478 except diagnosis_utils.BoardNotAvailableError as e:
1479 logging.warning('Can not run suite: %s', e)
1480 code = RETURN_CODES.BOARD_NOT_AVAILABLE
1481 except utils.TestLabException as e:
1482 logging.warning('Can not run suite: %s', e)
1483 code = RETURN_CODES.INFRA_FAILURE
Fang Dengfb4a9492014-09-18 17:52:06 -07001484 except Exception as e:
1485 code = RETURN_CODES.INFRA_FAILURE
1486 logging.exception('Unhandled run_suite exception: %s', e)
Fang Deng6197da32014-09-25 10:18:48 -07001487
1488 logging.info('Will return from run_suite with status: %s',
1489 RETURN_CODES.get_string(code))
Gabe Black1e1c41b2015-02-04 23:55:15 -08001490 autotest_stats.Counter('run_suite.%s' %
1491 RETURN_CODES.get_string(code)).increment()
Fang Deng6197da32014-09-25 10:18:48 -07001492 return code
Fang Dengfb4a9492014-09-18 17:52:06 -07001493
1494
Chris Masone24b80f12012-02-14 14:18:01 -08001495if __name__ == "__main__":
1496 sys.exit(main())