blob: c9b6d5a71a21f45980ad6914116e1a4e5f7e3647 [file] [log] [blame]
Chris Masone24b80f12012-02-14 14:18:01 -08001#!/usr/bin/python
2#
3# Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
4# Use of this source code is governed by a BSD-style license that can be
5# found in the LICENSE file.
6
Fang Deng5a43be62014-05-07 17:17:04 -07007
Chris Masone24b80f12012-02-14 14:18:01 -08008"""Tool for running suites of tests and waiting for completion.
9
Fang Deng5a43be62014-05-07 17:17:04 -070010The desired test suite will be scheduled with autotest. By default,
Chris Masone24b80f12012-02-14 14:18:01 -080011this tool will block until the job is complete, printing a summary
12at the end. Error conditions result in exceptions.
13
14This is intended for use only with Chrome OS test suits that leverage the
15dynamic suite infrastructure in server/cros/dynamic_suite.py.
Fang Deng5a43be62014-05-07 17:17:04 -070016
17This script exits with one of the following codes:
180 - OK: Suite finished successfully
191 - ERROR: Test(s) failed, or hits its own timeout
Fang Dengaeab6172014-05-07 17:17:04 -0700202 - WARNING: Test(s) raised a warning or passed on retry, none failed/timed out.
Fang Deng5a43be62014-05-07 17:17:04 -0700213 - INFRA_FAILURE: Infrastructure related issues, e.g.
22 * Lab is down
23 * Too many duts (defined as a constant) in repair failed status
24 * Suite job issues, like bug in dynamic suite,
25 user aborted the suite, lose a drone/all devservers/rpc server,
26 0 tests ran, etc.
Fang Deng95af42f2014-09-12 14:16:11 -070027 * provision failed
28 TODO(fdeng): crbug.com/413918, reexamine treating all provision
29 failures as INFRA failures.
Fang Deng5a43be62014-05-07 17:17:04 -0700304 - SUITE_TIMEOUT: Suite timed out, some tests ran,
31 none failed by the time the suite job was aborted. This will cover,
32 but not limited to, the following cases:
33 * A devserver failure that manifests as a timeout
34 * No DUTs available midway through a suite
35 * Provision/Reset/Cleanup took longer time than expected for new image
36 * A regression in scheduler tick time.
Fang Deng6197da32014-09-25 10:18:48 -0700375- BOARD_NOT_AVAILABLE: If there is no host for the requested board/pool.
386- INVALID_OPTIONS: If options are not valid.
Chris Masone24b80f12012-02-14 14:18:01 -080039"""
40
Fang Deng5a43be62014-05-07 17:17:04 -070041
Prashanth B923ca262014-03-14 12:36:29 -070042import datetime as datetime_base
Shuqian Zhaof39bf2a2015-09-29 14:19:28 -070043import ast, getpass, json, logging, optparse, os, re, sys, time
Chris Masonecfa7efc2012-09-06 16:00:07 -070044from datetime import datetime
45
Chris Masone24b80f12012-02-14 14:18:01 -080046import common
Shuqian Zhao2fecacd2015-08-05 22:56:30 -070047from autotest_lib.client.common_lib import control_data
Fang Deng5a43be62014-05-07 17:17:04 -070048from autotest_lib.client.common_lib import error
J. Richard Barnette3cbd76b2013-11-27 12:11:25 -080049from autotest_lib.client.common_lib import global_config, enum
50from autotest_lib.client.common_lib import priorities
Dan Shidfea3682014-08-10 23:38:40 -070051from autotest_lib.client.common_lib import time_utils
Gabe Black1e1c41b2015-02-04 23:55:15 -080052from autotest_lib.client.common_lib.cros.graphite import autotest_stats
Prashanth B6285f6a2014-05-08 18:01:27 -070053from autotest_lib.client.common_lib.cros import retry
Prashanth B923ca262014-03-14 12:36:29 -070054from autotest_lib.frontend.afe.json_rpc import proxy
J. Richard Barnette3cbd76b2013-11-27 12:11:25 -080055from autotest_lib.server import utils
Dan Shi36cfd832014-10-10 13:38:51 -070056from autotest_lib.server.cros import provision
Chris Masone44e4d6c2012-08-15 14:25:53 -070057from autotest_lib.server.cros.dynamic_suite import constants
Chris Masoneb4935552012-08-14 12:05:54 -070058from autotest_lib.server.cros.dynamic_suite import frontend_wrappers
Prashanth B923ca262014-03-14 12:36:29 -070059from autotest_lib.server.cros.dynamic_suite import reporting_utils
J. Richard Barnettee7b98bb2013-08-21 16:34:16 -070060from autotest_lib.server.cros.dynamic_suite import tools
Prashanth B923ca262014-03-14 12:36:29 -070061from autotest_lib.site_utils import diagnosis_utils
MK Ryu977a9752014-10-21 11:58:09 -070062from autotest_lib.site_utils import job_overhead
63
Chris Masone24b80f12012-02-14 14:18:01 -080064
Chris Masone1120cdf2012-02-27 17:35:07 -080065CONFIG = global_config.global_config
66
Simran Basi7203d4e2015-02-03 15:50:18 -080067WMATRIX_RETRY_URL = CONFIG.get_config_value('BUG_REPORTING',
68 'wmatrix_retry_url')
69
Simran Basi22aa9fe2012-12-07 16:37:09 -080070# Return code that will be sent back to autotest_rpc_server.py
Fang Deng5a43be62014-05-07 17:17:04 -070071RETURN_CODES = enum.Enum(
Fang Dengfb4a9492014-09-18 17:52:06 -070072 'OK', 'ERROR', 'WARNING', 'INFRA_FAILURE', 'SUITE_TIMEOUT',
Fang Deng6197da32014-09-25 10:18:48 -070073 'BOARD_NOT_AVAILABLE', 'INVALID_OPTIONS')
Fang Deng5a43be62014-05-07 17:17:04 -070074# The severity of return code. If multiple codes
75# apply, the script should always return the severest one.
76# E.g. if we have a test failure and the suite also timed out,
77# we should return 'ERROR'.
78SEVERITY = {RETURN_CODES.OK: 0,
79 RETURN_CODES.WARNING: 1,
Fang Deng95af42f2014-09-12 14:16:11 -070080 RETURN_CODES.SUITE_TIMEOUT: 2,
81 RETURN_CODES.INFRA_FAILURE: 3,
Fang Deng6197da32014-09-25 10:18:48 -070082 RETURN_CODES.ERROR: 4}
Fang Deng5a43be62014-05-07 17:17:04 -070083
84
85def get_worse_code(code1, code2):
Fang Dengaeab6172014-05-07 17:17:04 -070086 """Compare the severity of two codes and return the worse code.
Fang Deng5a43be62014-05-07 17:17:04 -070087
88 @param code1: An enum value of RETURN_CODES
89 @param code2: An enum value of RETURN_CODES
90
Fang Dengaeab6172014-05-07 17:17:04 -070091 @returns: the more severe one between code1 and code2.
Fang Deng5a43be62014-05-07 17:17:04 -070092
93 """
Fang Dengaeab6172014-05-07 17:17:04 -070094 return code1 if SEVERITY[code1] >= SEVERITY[code2] else code2
Simran Basi22aa9fe2012-12-07 16:37:09 -080095
Chris Masonedfa0beba2012-03-19 11:41:47 -070096
Chris Masone24b80f12012-02-14 14:18:01 -080097def parse_options():
Aviv Keshet1480c4a2013-03-21 16:38:31 -070098 #pylint: disable-msg=C0111
Zdenek Behan77290c32012-06-26 17:39:47 +020099 usage = "usage: %prog [options]"
Chris Masone24b80f12012-02-14 14:18:01 -0800100 parser = optparse.OptionParser(usage=usage)
101 parser.add_option("-b", "--board", dest="board")
102 parser.add_option("-i", "--build", dest="build")
Prashanth Balasubramanian673016d2014-11-04 10:40:48 -0800103 parser.add_option("-w", "--web", dest="web", default=None,
104 help="Address of a webserver to receive suite requests.")
Dan Shi0723bf52015-06-24 10:52:38 -0700105 parser.add_option('--firmware_rw_build', dest='firmware_rw_build',
106 default=None,
Dan Shi36cfd832014-10-10 13:38:51 -0700107 help='Firmware build to be installed in dut RW firmware.')
108 parser.add_option('--firmware_ro_build', dest='firmware_ro_build',
109 default=None,
110 help='Firmware build to be installed in dut RO firmware.')
111 parser.add_option('--test_source_build', dest='test_source_build',
112 default=None,
113 help=('Build that contains the test code, '
114 'e.g., it can be the value of `--build`, '
Dan Shi0723bf52015-06-24 10:52:38 -0700115 '`--firmware_rw_build` or `--firmware_ro_build` '
Dan Shi36cfd832014-10-10 13:38:51 -0700116 'arguments. Default is None, that is, use the test '
117 'code from `--build` (CrOS image)'))
Chris Masone359c0fd2012-03-13 15:18:59 -0700118 # This should just be a boolean flag, but the autotest "proxy" code
119 # can't handle flags that don't take arguments.
Alex Millerab33ddb2012-10-03 12:56:02 -0700120 parser.add_option("-n", "--no_wait", dest="no_wait", default="False",
121 help='Must pass "True" or "False" if used.')
Alex Miller0032e932013-10-23 12:52:58 -0700122 # If you really want no pool, --pool="" will do it. USE WITH CARE.
123 parser.add_option("-p", "--pool", dest="pool", default="suites")
Chris Masone24b80f12012-02-14 14:18:01 -0800124 parser.add_option("-s", "--suite_name", dest="name")
Fang Dengfb4a9492014-09-18 17:52:06 -0700125 parser.add_option("-a", "--afe_timeout_mins", type="int",
126 dest="afe_timeout_mins", default=30)
127 parser.add_option("-t", "--timeout_mins", type="int",
128 dest="timeout_mins", default=1440)
Simran Basi441fbc12015-01-23 12:28:54 -0800129 parser.add_option("-x", "--max_runtime_mins", type="int",
130 dest="max_runtime_mins", default=1440)
Fang Dengfb4a9492014-09-18 17:52:06 -0700131 parser.add_option("-d", "--delay_sec", type="int",
132 dest="delay_sec", default=10)
Chris Masone986459e2012-04-11 11:36:48 -0700133 parser.add_option("-m", "--mock_job_id", dest="mock_job_id",
Aviv Keshetdb321de2015-04-10 19:09:58 -0700134 help="Attach to existing job id for already running "
135 "suite, and creates report.")
136 # NOTE(akeshet): This looks similar to --no_wait, but behaves differently.
137 # --no_wait is passed in to the suite rpc itself and affects the suite,
138 # while this does not.
139 parser.add_option("-c", "--create_and_return", dest="create_and_return",
140 action="store_true",
141 help="Create the suite and print the job id, then "
142 "finish immediately.")
Alex Miller05a2fff2012-09-10 10:14:34 -0700143 parser.add_option("-u", "--num", dest="num", type="int", default=None,
Chris Masone8906ab12012-07-23 15:37:56 -0700144 help="Run on at most NUM machines.")
Alex Millerf43d0eb2012-10-01 13:43:13 -0700145 # Same boolean flag issue applies here.
Alex Millerab33ddb2012-10-03 12:56:02 -0700146 parser.add_option("-f", "--file_bugs", dest="file_bugs", default='False',
147 help='File bugs on test failures. Must pass "True" or '
148 '"False" if used.')
Dan Shia02181f2013-01-29 14:03:32 -0800149 parser.add_option("-l", "--bypass_labstatus", dest="bypass_labstatus",
150 action="store_true", help='Bypass lab status check.')
Alex Miller88762a82013-09-04 15:41:28 -0700151 # We allow either a number or a string for the priority. This way, if you
152 # know what you're doing, one can specify a custom priority level between
153 # other levels.
154 parser.add_option("-r", "--priority", dest="priority",
155 default=priorities.Priority.DEFAULT,
156 action="store", help="Priority of suite")
Fang Deng058860c2014-05-15 15:41:50 -0700157 parser.add_option('--retry', dest='retry', default='False',
158 action='store', help='Enable test retry. '
159 'Must pass "True" or "False" if used.')
Fang Deng443f1952015-01-02 14:51:49 -0800160 parser.add_option('--max_retries', dest='max_retries', default=None,
161 type='int', action='store', help='Maximum retries'
162 'allowed at suite level. No limit if not specified.')
Dan Shi8de6d1b2014-06-12 09:10:37 -0700163 parser.add_option('--minimum_duts', dest='minimum_duts', type=int,
164 default=0, action='store',
Fang Dengcbc01212014-11-25 16:09:46 -0800165 help='Check that the pool has at least such many '
166 'healthy machines, otherwise suite will not run. '
167 'Default to 0.')
168 parser.add_option('--suite_min_duts', dest='suite_min_duts', type=int,
169 default=0, action='store',
170 help='Preferred minimum number of machines. Scheduler '
171 'will prioritize on getting such many machines for '
172 'the suite when it is competing with another suite '
173 'that has a higher priority but already got minimum '
174 'machines it needs. Default to 0.')
Aviv Keshet7cd12312013-07-25 10:25:55 -0700175 parser.add_option("--suite_args", dest="suite_args",
176 default=None, action="store",
177 help="Argument string for suite control file.")
Simran Basi1e10e922015-04-16 15:09:56 -0700178 parser.add_option('--offload_failures_only', dest='offload_failures_only',
179 action='store', default='False',
180 help='Only enable gs_offloading for failed tests. '
181 'Successful tests will be deleted. Must pass "True"'
182 ' or "False" if used.')
Shuqian Zhaoab1bedc2015-06-02 11:12:28 -0700183 parser.add_option('--use_suite_attr', dest='use_suite_attr',
184 action='store_true', default=False,
185 help='Advanced. Run the suite based on ATTRIBUTES of '
186 'control files, rather than SUITE.')
Shuqian Zhao2fecacd2015-08-05 22:56:30 -0700187 parser.add_option('--json_dump', dest='json_dump', action='store_true',
188 default=False,
189 help='Dump the output of run_suite to stdout.')
Chris Masone24b80f12012-02-14 14:18:01 -0800190 options, args = parser.parse_args()
191 return parser, options, args
192
193
Fang Deng0454e632014-04-07 15:39:47 -0700194def verify_options_and_args(options, args):
Fang Dengdd20e452014-04-07 15:39:47 -0700195 """Verify the validity of options and args.
196
Fang Dengdd20e452014-04-07 15:39:47 -0700197 @param options: The parsed options to verify.
198 @param args: The parsed args to verify.
199
200 @returns: True if verification passes, False otherwise.
201
202 """
Fang Deng6865aab2015-02-20 14:49:47 -0800203 if args:
204 print 'Unknown arguments: ' + str(args)
205 return False
206
207 if options.mock_job_id and (
208 not options.build or not options.name or not options.board):
209 print ('When using -m, need to specify build, board and suite '
210 'name which you have used for creating the original job')
211 return False
212 else:
Fang Dengdd20e452014-04-07 15:39:47 -0700213 if not options.build:
214 print 'Need to specify which build to use'
215 return False
216 if not options.board:
217 print 'Need to specify board'
218 return False
219 if not options.name:
220 print 'Need to specify suite name'
221 return False
222 if options.num is not None and options.num < 1:
223 print 'Number of machines must be more than 0, if specified.'
224 return False
225 if options.no_wait != 'True' and options.no_wait != 'False':
226 print 'Please specify "True" or "False" for --no_wait.'
227 return False
228 if options.file_bugs != 'True' and options.file_bugs != 'False':
229 print 'Please specify "True" or "False" for --file_bugs.'
230 return False
Fang Deng058860c2014-05-15 15:41:50 -0700231 if options.retry != 'True' and options.retry != 'False':
232 print 'Please specify "True" or "False" for --retry'
233 return False
Fang Deng443f1952015-01-02 14:51:49 -0800234 if options.retry == 'False' and options.max_retries is not None:
235 print 'max_retries can only be used with --retry=True'
236 return False
Shuqian Zhaoab1bedc2015-06-02 11:12:28 -0700237 if options.use_suite_attr and options.suite_args is not None:
238 print ('The new suite control file cannot parse the suite_args: %s.'
239 'Please not specify any suite_args here.' % options.suite_args)
240 return False
Fang Deng058860c2014-05-15 15:41:50 -0700241 if options.no_wait == 'True' and options.retry == 'True':
242 print 'Test retry is not available when using --no_wait=True'
Dan Shi36cfd832014-10-10 13:38:51 -0700243 # Default to use the test code in CrOS build.
244 if not options.test_source_build and options.build:
245 options.test_source_build = options.build
Fang Dengdd20e452014-04-07 15:39:47 -0700246 return True
247
248
Shuqian Zhaoab1bedc2015-06-02 11:12:28 -0700249def change_options_for_suite_attr(options):
250 """Change options to be prepared to run the suite_attr_wrapper.
251
252 If specify 'use_suite_attr' from the cmd line, it indicates to run the
253 new style suite control file, suite_attr_wrapper. Then, change the
254 options.suite_name to 'suite_attr_wrapper', change the options.suite_args to
255 include the arguments needed by suite_attr_wrapper.
256
257 @param options: The verified options.
258
259 @returns: The changed options.
260
261 """
262 # Convert the suite_name to attribute boolean expression.
263 if type(options.name) is str:
264 attr_filter_val = 'suite:%s' % options.name
265 else:
266 attr_filter_val = ' or '.join(['suite:%s' % x for x in options.name])
267
268 # change the suite_args to be a dict of arguments for suite_attr_wrapper
269 # if suite_args is not None, store the values in 'other_args' of the dict
270 args_dict = {}
271 args_dict['attr_filter'] = attr_filter_val
272 options.suite_args = str(args_dict)
273 options.name = 'suite_attr_wrapper'
274
275 return options
276
277
Chris Masone24b80f12012-02-14 14:18:01 -0800278def get_pretty_status(status):
Aviv Keshet1480c4a2013-03-21 16:38:31 -0700279 """
280 Converts a status string into a pretty-for-printing string.
281
282 @param status: Status to convert.
283
284 @return: Returns pretty string.
285 GOOD -> [ PASSED ]
286 TEST_NA -> [ INFO ]
287 other -> [ FAILED ]
288 """
Chris Masone24b80f12012-02-14 14:18:01 -0800289 if status == 'GOOD':
290 return '[ PASSED ]'
Chris Masone8906ab12012-07-23 15:37:56 -0700291 elif status == 'TEST_NA':
292 return '[ INFO ]'
Chris Masone24b80f12012-02-14 14:18:01 -0800293 return '[ FAILED ]'
294
Fang Dengdd20e452014-04-07 15:39:47 -0700295
Shuqian Zhaof39bf2a2015-09-29 14:19:28 -0700296def get_original_suite_name(suite_name, suite_args):
297 """Get the original suite name when running suite_attr_wrapper.
298
299 @param suite_name: the name of the suite launched in afe. When it is
300 suite_attr_wrapper, the suite that actually running is
301 specified in the suite_args.
302 @param suite_args: the parsed option which contains the original suite name.
303
304 @returns: the original suite name.
305
306 """
307 if suite_name == 'suite_attr_wrapper':
308 attrs = ast.literal_eval(suite_args).get('attr_filter', '')
309 suite_list = ([x[6:] for x in re.split('[() ]', attrs)
310 if x and x.startswith('suite:')])
311 return suite_list[0] if suite_list else suite_name
312 return suite_name
313
314
Aviv Keshet9afee5e2014-10-09 16:33:09 -0700315def GetBuildbotStepLink(anchor_text, url):
316 """Generate a buildbot formatted link.
317
318 @param anchor_text The link text.
319 @param url The url to link to.
320 """
321 return '@@@STEP_LINK@%s@%s@@@' % (anchor_text, url)
322
Chris Masone24b80f12012-02-14 14:18:01 -0800323
Craig Harrison25eb0f32012-08-23 16:48:49 -0700324class LogLink(object):
J. Richard Barnetteb9c911d2013-08-23 11:24:21 -0700325 """Information needed to record a link in the logs.
Craig Harrison25eb0f32012-08-23 16:48:49 -0700326
J. Richard Barnetteb9c911d2013-08-23 11:24:21 -0700327 Depending on context and the information provided at
328 construction time, the link may point to either to log files for
329 a job, or to a bug filed for a failure in the job.
Craig Harrison25eb0f32012-08-23 16:48:49 -0700330
J. Richard Barnetteb9c911d2013-08-23 11:24:21 -0700331 @var anchor The link text.
332 @var url The link url.
333 @var bug_id Id of a bug to link to, or None.
334 """
335
336 _BUG_URL_PREFIX = CONFIG.get_config_value('BUG_REPORTING',
337 'tracker_url')
J. Richard Barnetteb9c911d2013-08-23 11:24:21 -0700338 _URL_PATTERN = CONFIG.get_config_value('CROS',
339 'log_url_pattern', type=str)
340
341
Prashanth Balasubramanianae437212014-10-27 11:17:26 -0700342 @classmethod
343 def get_bug_link(cls, bug_id):
344 """Generate a bug link for the given bug_id.
345
346 @param bug_id: The id of the bug.
347 @return: A link, eg: https://crbug.com/<bug_id>.
348 """
349 return '%s%s' % (cls._BUG_URL_PREFIX, bug_id)
350
351
Fang Dengaeab6172014-05-07 17:17:04 -0700352 def __init__(self, anchor, server, job_string, bug_info=None, reason=None,
Simran Basi7203d4e2015-02-03 15:50:18 -0800353 retry_count=0, testname=None):
J. Richard Barnetteb9c911d2013-08-23 11:24:21 -0700354 """Initialize the LogLink by generating the log URL.
355
356 @param anchor The link text.
Alex Millerc7a59522013-10-30 15:18:57 -0700357 @param server The hostname of the server this suite ran on.
J. Richard Barnetteb9c911d2013-08-23 11:24:21 -0700358 @param job_string The job whose logs we'd like to link to.
359 @param bug_info Info about the bug, if one was filed.
Fang Deng53c6ff52014-02-24 17:51:24 -0800360 @param reason A string representing the reason of failure if any.
Fang Dengaeab6172014-05-07 17:17:04 -0700361 @param retry_count How many times the test has been retried.
Simran Basi7203d4e2015-02-03 15:50:18 -0800362 @param testname Optional Arg that supplies the testname.
Craig Harrison25eb0f32012-08-23 16:48:49 -0700363 """
364 self.anchor = anchor
Alex Millerc7a59522013-10-30 15:18:57 -0700365 self.url = self._URL_PATTERN % (server, job_string)
Fang Deng53c6ff52014-02-24 17:51:24 -0800366 self.reason = reason
Fang Dengaeab6172014-05-07 17:17:04 -0700367 self.retry_count = retry_count
Simran Basi7203d4e2015-02-03 15:50:18 -0800368 self.testname = testname
J. Richard Barnetteb9c911d2013-08-23 11:24:21 -0700369 if bug_info:
370 self.bug_id, self.bug_count = bug_info
371 else:
372 self.bug_id = None
373 self.bug_count = None
Craig Harrison25eb0f32012-08-23 16:48:49 -0700374
375
376 def GenerateBuildbotLink(self):
J. Richard Barnetteb9c911d2013-08-23 11:24:21 -0700377 """Generate a link formatted to meet buildbot expectations.
378
379 If there is a bug associated with this link, report that;
380 otherwise report a link to the job logs.
Craig Harrison25eb0f32012-08-23 16:48:49 -0700381
382 @return A link formatted for the buildbot log annotator.
383 """
Fang Dengaeab6172014-05-07 17:17:04 -0700384 info_strings = []
385 if self.retry_count > 0:
386 info_strings.append('retry_count: %d' % self.retry_count)
387
J. Richard Barnetteb9c911d2013-08-23 11:24:21 -0700388 if self.bug_id:
Prashanth Balasubramanianae437212014-10-27 11:17:26 -0700389 url = self.get_bug_link(self.bug_id)
beepsad4daf82013-09-26 10:07:33 -0700390 if self.bug_count is None:
Fang Dengaeab6172014-05-07 17:17:04 -0700391 bug_info = 'unknown number of reports'
beepsad4daf82013-09-26 10:07:33 -0700392 elif self.bug_count == 1:
Fang Dengaeab6172014-05-07 17:17:04 -0700393 bug_info = 'new report'
J. Richard Barnetteb9c911d2013-08-23 11:24:21 -0700394 else:
Fang Dengaeab6172014-05-07 17:17:04 -0700395 bug_info = '%s reports' % self.bug_count
396 info_strings.append(bug_info)
J. Richard Barnetteb9c911d2013-08-23 11:24:21 -0700397 else:
398 url = self.url
Fang Deng53c6ff52014-02-24 17:51:24 -0800399
400 if self.reason:
Fang Dengaeab6172014-05-07 17:17:04 -0700401 info_strings.append(self.reason.strip())
402
403 if info_strings:
404 info = ', '.join(info_strings)
405 anchor_text = '%(anchor)s: %(info)s' % {
406 'anchor': self.anchor.strip(), 'info': info}
407 else:
408 anchor_text = self.anchor.strip()
Fang Deng53c6ff52014-02-24 17:51:24 -0800409
Aviv Keshet9afee5e2014-10-09 16:33:09 -0700410 return GetBuildbotStepLink(anchor_text, url)
Craig Harrison25eb0f32012-08-23 16:48:49 -0700411
412
Craig Harrisond8451572012-08-31 10:29:33 -0700413 def GenerateTextLink(self):
J. Richard Barnetteb9c911d2013-08-23 11:24:21 -0700414 """Generate a link to the job's logs, for consumption by a human.
Craig Harrison25eb0f32012-08-23 16:48:49 -0700415
Craig Harrisond8451572012-08-31 10:29:33 -0700416 @return A link formatted for human readability.
Craig Harrison25eb0f32012-08-23 16:48:49 -0700417 """
Fang Deng53c6ff52014-02-24 17:51:24 -0800418 return '%s%s' % (self.anchor, self.url)
Craig Harrison25eb0f32012-08-23 16:48:49 -0700419
420
Simran Basi7203d4e2015-02-03 15:50:18 -0800421 def GenerateWmatrixRetryLink(self):
422 """Generate a link to the wmatrix retry dashboard.
423
424 @return A link formatted for the buildbot log annotator.
425 """
426 if not self.testname:
427 return None
428
429 return GetBuildbotStepLink(
430 'Flaky test dashboard view for test %s' %
431 self.testname, WMATRIX_RETRY_URL % self.testname)
432
433
Chris Masoneb61b4052012-04-30 14:35:28 -0700434class Timings(object):
435 """Timings for important events during a suite.
436
437 All timestamps are datetime.datetime objects.
438
Fang Dengdd20e452014-04-07 15:39:47 -0700439 @var suite_job_id: the afe job id of the suite job for which
440 we are recording the timing for.
441 @var download_start_time: the time the devserver starts staging
442 the build artifacts. Recorded in create_suite_job.
443 @var payload_end_time: the time when the artifacts only necessary to start
444 installsing images onto DUT's are staged.
445 Recorded in create_suite_job.
446 @var artifact_end_time: the remaining artifacts are downloaded after we kick
447 off the reimaging job, at which point we record
448 artifact_end_time. Recorded in dynamic_suite.py.
Chris Masoneb61b4052012-04-30 14:35:28 -0700449 @var suite_start_time: the time the suite started.
Chris Masoneb61b4052012-04-30 14:35:28 -0700450 @var tests_start_time: the time the first test started running.
Fang Dengdd20e452014-04-07 15:39:47 -0700451 @var tests_end_time: the time the last test finished running.
Chris Masoneb61b4052012-04-30 14:35:28 -0700452 """
beeps6f02d192013-03-22 13:15:49 -0700453
Fang Dengdd20e452014-04-07 15:39:47 -0700454 def __init__(self, suite_job_id):
455 self.suite_job_id = suite_job_id
456 # Timings related to staging artifacts on devserver.
457 self.download_start_time = None
458 self.payload_end_time = None
459 self.artifact_end_time = None
beeps6f02d192013-03-22 13:15:49 -0700460
Fang Dengdd20e452014-04-07 15:39:47 -0700461 # The test_start_time, but taken off the view that corresponds to the
462 # suite instead of an individual test.
463 self.suite_start_time = None
beeps6f02d192013-03-22 13:15:49 -0700464
Fang Dengdd20e452014-04-07 15:39:47 -0700465 # Earliest and Latest tests in the set of TestViews passed to us.
466 self.tests_start_time = None
467 self.tests_end_time = None
468
Chris Masoneb61b4052012-04-30 14:35:28 -0700469
Chris Masoned9f13c52012-08-29 10:37:08 -0700470 def RecordTiming(self, view):
471 """Given a test report view, extract and record pertinent time info.
Chris Masoneb61b4052012-04-30 14:35:28 -0700472
473 get_detailed_test_views() returns a list of entries that provide
474 info about the various parts of a suite run. This method can take
475 any one of these entries and look up timestamp info we might want
476 and record it.
477
Chris Masonecfa7efc2012-09-06 16:00:07 -0700478 If timestamps are unavailable, datetime.datetime.min/max will be used.
479
Fang Dengaeab6172014-05-07 17:17:04 -0700480 @param view: A TestView object.
Chris Masoneb61b4052012-04-30 14:35:28 -0700481 """
Chris Masonecfa7efc2012-09-06 16:00:07 -0700482 start_candidate = datetime.min
483 end_candidate = datetime.max
484 if view['test_started_time']:
Dan Shidfea3682014-08-10 23:38:40 -0700485 start_candidate = time_utils.time_string_to_datetime(
486 view['test_started_time'])
Chris Masonecfa7efc2012-09-06 16:00:07 -0700487 if view['test_finished_time']:
Dan Shidfea3682014-08-10 23:38:40 -0700488 end_candidate = time_utils.time_string_to_datetime(
489 view['test_finished_time'])
Chris Masonecfa7efc2012-09-06 16:00:07 -0700490
Fang Dengaeab6172014-05-07 17:17:04 -0700491 if view.get_testname() == TestView.SUITE_PREP:
Chris Masoneb61b4052012-04-30 14:35:28 -0700492 self.suite_start_time = start_candidate
Chris Masoneb61b4052012-04-30 14:35:28 -0700493 else:
494 self._UpdateFirstTestStartTime(start_candidate)
495 self._UpdateLastTestEndTime(end_candidate)
Fang Dengdd20e452014-04-07 15:39:47 -0700496 if view['afe_job_id'] == self.suite_job_id and 'job_keyvals' in view:
Chris Masoned9f13c52012-08-29 10:37:08 -0700497 keyvals = view['job_keyvals']
Dan Shidfea3682014-08-10 23:38:40 -0700498 self.download_start_time = time_utils.time_string_to_datetime(
499 keyvals.get(constants.DOWNLOAD_STARTED_TIME),
500 handle_type_error=True)
beeps6f02d192013-03-22 13:15:49 -0700501
Dan Shidfea3682014-08-10 23:38:40 -0700502 self.payload_end_time = time_utils.time_string_to_datetime(
503 keyvals.get(constants.PAYLOAD_FINISHED_TIME),
504 handle_type_error=True)
beeps6f02d192013-03-22 13:15:49 -0700505
Dan Shidfea3682014-08-10 23:38:40 -0700506 self.artifact_end_time = time_utils.time_string_to_datetime(
507 keyvals.get(constants.ARTIFACT_FINISHED_TIME),
508 handle_type_error=True)
Chris Masone44e4d6c2012-08-15 14:25:53 -0700509
Chris Masoneb61b4052012-04-30 14:35:28 -0700510
511 def _UpdateFirstTestStartTime(self, candidate):
512 """Update self.tests_start_time, iff candidate is an earlier time.
513
514 @param candidate: a datetime.datetime object.
515 """
516 if not self.tests_start_time or candidate < self.tests_start_time:
517 self.tests_start_time = candidate
518
519
520 def _UpdateLastTestEndTime(self, candidate):
521 """Update self.tests_end_time, iff candidate is a later time.
522
523 @param candidate: a datetime.datetime object.
524 """
525 if not self.tests_end_time or candidate > self.tests_end_time:
526 self.tests_end_time = candidate
527
528
529 def __str__(self):
530 return ('\n'
531 'Suite timings:\n'
Chris Masonea8066a92012-05-01 16:52:31 -0700532 'Downloads started at %s\n'
533 'Payload downloads ended at %s\n'
Chris Masoneb61b4052012-04-30 14:35:28 -0700534 'Suite started at %s\n'
Chris Masonea8066a92012-05-01 16:52:31 -0700535 'Artifact downloads ended (at latest) at %s\n'
Chris Masoneb61b4052012-04-30 14:35:28 -0700536 'Testing started at %s\n'
Chris Masonea8066a92012-05-01 16:52:31 -0700537 'Testing ended at %s\n' % (self.download_start_time,
538 self.payload_end_time,
539 self.suite_start_time,
Chris Masonea8066a92012-05-01 16:52:31 -0700540 self.artifact_end_time,
Chris Masoneb61b4052012-04-30 14:35:28 -0700541 self.tests_start_time,
542 self.tests_end_time))
543
544
beeps6f02d192013-03-22 13:15:49 -0700545 def SendResultsToStatsd(self, suite, build, board):
546 """
547 Sends data to statsd.
548
549 1. Makes a data_key of the form: run_suite.$board.$branch.$suite
550 eg: stats/gauges/<hostname>/run_suite/<board>/<branch>/<suite>/
551 2. Computes timings for several start and end event pairs.
Alex Miller9a1987a2013-08-21 15:51:16 -0700552 3. Sends all timing values to statsd.
beeps6f02d192013-03-22 13:15:49 -0700553
554 @param suite: scheduled suite that we want to record the results of.
555 @param build: the build that this suite ran on.
556 eg: 'lumpy-release/R26-3570.0.0'
557 @param board: the board that this suite ran on.
558 """
559 if sys.version_info < (2, 7):
560 logging.error('Sending run_suite perf data to statsd requires'
561 'python 2.7 or greater.')
562 return
563
MK Ryuc9c0c3f2014-10-27 14:36:01 -0700564 # Constructs the key used for logging statsd timing data.
565 data_key = utils.get_data_key('run_suite', suite, build, board)
beeps6f02d192013-03-22 13:15:49 -0700566
567 # Since we don't want to try subtracting corrupted datetime values
Dan Shidfea3682014-08-10 23:38:40 -0700568 # we catch TypeErrors in time_utils.time_string_to_datetime and insert
569 # None instead. This means that even if, say,
570 # keyvals.get(constants.ARTIFACT_FINISHED_TIME) returns a corrupt
571 # value the member artifact_end_time is set to None.
beeps6f02d192013-03-22 13:15:49 -0700572 if self.download_start_time:
573 if self.payload_end_time:
Gabe Black1e1c41b2015-02-04 23:55:15 -0800574 autotest_stats.Timer(data_key).send('payload_download_time',
575 (self.payload_end_time -
576 self.download_start_time).total_seconds())
beeps6f02d192013-03-22 13:15:49 -0700577
578 if self.artifact_end_time:
Gabe Black1e1c41b2015-02-04 23:55:15 -0800579 autotest_stats.Timer(data_key).send('artifact_download_time',
580 (self.artifact_end_time -
581 self.download_start_time).total_seconds())
beeps6f02d192013-03-22 13:15:49 -0700582
583 if self.tests_end_time:
584 if self.suite_start_time:
Gabe Black1e1c41b2015-02-04 23:55:15 -0800585 autotest_stats.Timer(data_key).send('suite_run_time',
586 (self.tests_end_time -
587 self.suite_start_time).total_seconds())
beeps6f02d192013-03-22 13:15:49 -0700588
589 if self.tests_start_time:
Gabe Black1e1c41b2015-02-04 23:55:15 -0800590 autotest_stats.Timer(data_key).send('tests_run_time',
591 (self.tests_end_time -
592 self.tests_start_time).total_seconds())
beeps6f02d192013-03-22 13:15:49 -0700593
beeps6f02d192013-03-22 13:15:49 -0700594
Alex Millerc7a59522013-10-30 15:18:57 -0700595_DEFAULT_AUTOTEST_INSTANCE = CONFIG.get_config_value(
596 'SERVER', 'hostname', type=str)
597
598
599def instance_for_pool(pool_name):
600 """
601 Return the hostname of the server that should be used to service a suite
602 for the specified pool.
603
604 @param pool_name: The pool (without 'pool:' to schedule the suite against.
605 @return: The correct host that should be used to service this suite run.
606 """
607 return CONFIG.get_config_value(
608 'POOL_INSTANCE_SHARDING', pool_name,
609 default=_DEFAULT_AUTOTEST_INSTANCE)
610
611
Fang Dengaeab6172014-05-07 17:17:04 -0700612class TestView(object):
613 """Represents a test view and provides a set of helper functions."""
614
615
616 SUITE_PREP = 'Suite prep'
Fang Deng95af42f2014-09-12 14:16:11 -0700617 INFRA_TESTS = ['provision']
Fang Dengaeab6172014-05-07 17:17:04 -0700618
619
Simran Basi17ca77c2015-10-14 19:05:00 -0700620 def __init__(self, view, afe_job, suite_name, build, user,
621 solo_test_run=False):
Fang Dengaeab6172014-05-07 17:17:04 -0700622 """Init a TestView object representing a tko test view.
623
624 @param view: A dictionary representing a tko test view.
Fang Dengf8503532014-06-12 18:21:55 -0700625 @param afe_job: An instance of frontend.afe.models.Job
626 representing the job that kicked off the test.
Fang Dengaeab6172014-05-07 17:17:04 -0700627 @param suite_name: The name of the suite
628 that the test belongs to.
629 @param build: The build for which the test is run.
Simran Basi01984f52015-10-12 15:36:45 -0700630 @param user: The user for which the test is run.
Simran Basi17ca77c2015-10-14 19:05:00 -0700631 @param solo_test_run: This is a solo test run not part of a suite.
Fang Dengaeab6172014-05-07 17:17:04 -0700632 """
633 self.view = view
Fang Dengf8503532014-06-12 18:21:55 -0700634 self.afe_job = afe_job
Fang Dengaeab6172014-05-07 17:17:04 -0700635 self.suite_name = suite_name
636 self.build = build
Simran Basi17ca77c2015-10-14 19:05:00 -0700637 self.is_suite_view = afe_job.parent_job is None and not solo_test_run
Fang Dengaeab6172014-05-07 17:17:04 -0700638 # This is the test name that will be shown in the output.
639 self.testname = None
Simran Basi01984f52015-10-12 15:36:45 -0700640 self.user = user
Fang Dengaeab6172014-05-07 17:17:04 -0700641
Fang Dengf8503532014-06-12 18:21:55 -0700642 # The case that a job was aborted before it got a chance to run
643 # usually indicates suite has timed out (unless aborted by user).
644 # In this case, the abort reason will be None.
645 # Update the reason with proper information.
646 if (self.is_relevant_suite_view() and
647 not self.get_testname() == self.SUITE_PREP and
648 self.view['status'] == 'ABORT' and
649 not self.view['reason']):
650 self.view['reason'] = 'Timed out, did not run.'
651
Fang Dengaeab6172014-05-07 17:17:04 -0700652
653 def __getitem__(self, key):
654 """Overload __getitem__ so that we can still use []
655
656 @param key: A key of the tko test view.
657
658 @returns: The value of an attribute in the view.
659
660 """
661 return self.view[key]
662
663
Fang Dengaeab6172014-05-07 17:17:04 -0700664 def __iter__(self):
665 """Overload __iter__ so that it supports 'in' operator."""
666 return iter(self.view)
667
668
669 def get_testname(self):
670 """Get test name that should be shown in the output.
671
672 Formalize the test_name we got from the test view.
673
674 Remove 'build/suite' prefix if any. And append 'experimental' prefix
675 for experimental tests if their names do not start with 'experimental'.
676
677 If one runs a test in control file via the following code,
678 job.runtest('my_Test', tag='tag')
679 for most of the cases, view['test_name'] would look like 'my_Test.tag'.
680 If this is the case, this method will just return the original
681 test name, i.e. 'my_Test.tag'.
682
683 There are four special cases.
684 1) A test view is for the suite job's SERVER_JOB.
685 In this case, this method will return 'Suite prep'.
686
Simran Basi17ca77c2015-10-14 19:05:00 -0700687 2) A test view is of a child job or a solo test run not part of a
688 suite, and for a SERVER_JOB or CLIENT_JOB.
Fang Dengaeab6172014-05-07 17:17:04 -0700689 In this case, we will take the job name, remove the build/suite
690 prefix from the job name, and append the rest to 'SERVER_JOB'
691 or 'CLIENT_JOB' as a prefix. So the names returned by this
692 method will look like:
693 'experimental_Telemetry Smoothness Measurement_SERVER_JOB'
694 'experimental_dummy_Pass_SERVER_JOB'
695 'dummy_Fail_SERVER_JOB'
696
Fang Dengf8503532014-06-12 18:21:55 -0700697 3) A test view is of a suite job and its status is ABORT.
Fang Dengaeab6172014-05-07 17:17:04 -0700698 In this case, the view['test_name'] is the child job's name.
699 If it is an experimental test, 'experimental' will be part
700 of the name. For instance,
701 'lumpy-release/R35-5712.0.0/perf_v2/
702 experimental_Telemetry Smoothness Measurement'
703 'lumpy-release/R35-5712.0.0/dummy/experimental_dummy_Pass'
704 'lumpy-release/R35-5712.0.0/dummy/dummy_Fail'
705 The above names will be converted to the following:
706 'experimental_Telemetry Smoothness Measurement'
707 'experimental_dummy_Pass'
708 'dummy_Fail'
709
Fang Dengf8503532014-06-12 18:21:55 -0700710 4) A test view's status is of a suite job and its status is TEST_NA.
Fang Dengaeab6172014-05-07 17:17:04 -0700711 In this case, the view['test_name'] is the NAME field of the control
712 file. If it is an experimental test, 'experimental' will part of
713 the name. For instance,
714 'experimental_Telemetry Smoothness Measurement'
715 'experimental_dummy_Pass'
716 'dummy_Fail'
717 This method will not modify these names.
718
719 @returns: Test name after normalization.
720
721 """
722 if self.testname is not None:
723 return self.testname
724
725 if (self.is_suite_view and
726 self.view['test_name'].startswith('SERVER_JOB')):
727 # Rename suite job's SERVER_JOB to 'Suite prep'.
728 self.testname = self.SUITE_PREP
729 return self.testname
730
731 if (self.view['test_name'].startswith('SERVER_JOB') or
732 self.view['test_name'].startswith('CLIENT_JOB')):
733 # Append job name as a prefix for SERVER_JOB and CLIENT_JOB
734 testname= '%s_%s' % (self.view['job_name'], self.view['test_name'])
735 else:
736 testname = self.view['test_name']
737 experimental = self.is_experimental()
738 # Remove the build and suite name from testname if any.
739 testname = tools.get_test_name(
740 self.build, self.suite_name, testname)
741 # If an experimental test was aborted, testname
742 # would include the 'experimental' prefix already.
743 prefix = constants.EXPERIMENTAL_PREFIX if (
744 experimental and not
745 testname.startswith(constants.EXPERIMENTAL_PREFIX)) else ''
746 self.testname = prefix + testname
747 return self.testname
748
749
750 def is_relevant_suite_view(self):
751 """Checks whether this is a suite view we should care about.
752
753 @returns: True if it is relevant. False otherwise.
754 """
755 return (self.get_testname() == self.SUITE_PREP or
756 (self.is_suite_view and
757 not self.view['test_name'].startswith('CLIENT_JOB') and
758 not self.view['subdir']))
759
760
761 def is_test(self):
762 """Return whether the view is for an actual test.
763
764 @returns True if the view is for an actual test.
765 False if the view is for SERVER_JOB or CLIENT_JOB.
766
767 """
768 return not (self.view['test_name'].startswith('SERVER_JOB') or
769 self.view['test_name'].startswith('CLIENT_JOB'))
770
771
772 def is_retry(self):
773 """Check whether the view is for a retry.
774
775 @returns: True, if the view is for a retry; False otherwise.
776
777 """
778 return self.view['job_keyvals'].get('retry_original_job_id') is not None
779
780
781 def is_experimental(self):
782 """Check whether a test view is for an experimental test.
783
784 @returns: True if it is for an experimental test, False otherwise.
785
786 """
787 return (self.view['job_keyvals'].get('experimental') == 'True' or
788 tools.get_test_name(self.build, self.suite_name,
789 self.view['test_name']).startswith('experimental'))
790
791
Fang Dengf8503532014-06-12 18:21:55 -0700792 def hit_timeout(self):
793 """Check whether the corresponding job has hit its own timeout.
Fang Dengaeab6172014-05-07 17:17:04 -0700794
Fang Dengf8503532014-06-12 18:21:55 -0700795 Note this method should not be called for those test views
796 that belongs to a suite job and are determined as irrelevant
797 by is_relevant_suite_view. This is because they are associated
798 to the suite job, whose job start/finished time make no sense
799 to an irrelevant test view.
Fang Dengaeab6172014-05-07 17:17:04 -0700800
Fang Dengf8503532014-06-12 18:21:55 -0700801 @returns: True if the corresponding afe job has hit timeout.
802 False otherwise.
803 """
804 if (self.is_relevant_suite_view() and
805 self.get_testname() != self.SUITE_PREP):
806 # Any relevant suite test view except SUITE_PREP
807 # did not hit its own timeout because it was not ever run.
808 return False
809 start = (datetime.strptime(
Dan Shidfea3682014-08-10 23:38:40 -0700810 self.view['job_started_time'], time_utils.TIME_FMT)
Fang Dengf8503532014-06-12 18:21:55 -0700811 if self.view['job_started_time'] else None)
812 end = (datetime.strptime(
Dan Shidfea3682014-08-10 23:38:40 -0700813 self.view['job_finished_time'], time_utils.TIME_FMT)
Fang Dengf8503532014-06-12 18:21:55 -0700814 if self.view['job_finished_time'] else None)
815 if not start or not end:
816 return False
817 else:
818 return ((end - start).total_seconds()/60.0
819 > self.afe_job.max_runtime_mins)
820
821
822 def is_aborted(self):
823 """Check if the view was aborted.
824
825 For suite prep and child job test views, we check job keyval
826 'aborted_by' and test status.
827
828 For relevant suite job test views, we only check test status
829 because the suite job keyval won't make sense to individual
830 test views.
831
832 @returns: True if the test was as aborted, False otherwise.
Fang Dengaeab6172014-05-07 17:17:04 -0700833
834 """
Fang Dengf8503532014-06-12 18:21:55 -0700835
836 if (self.is_relevant_suite_view() and
837 self.get_testname() != self.SUITE_PREP):
838 return self.view['status'] == 'ABORT'
839 else:
840 return (bool(self.view['job_keyvals'].get('aborted_by')) and
841 self.view['status'] in ['ABORT', 'RUNNING'])
Fang Dengaeab6172014-05-07 17:17:04 -0700842
843
844 def is_in_fail_status(self):
Fang Deng95af42f2014-09-12 14:16:11 -0700845 """Check if the given test's status corresponds to a failure.
Fang Dengaeab6172014-05-07 17:17:04 -0700846
847 @returns: True if the test's status is FAIL or ERROR. False otherwise.
848
849 """
850 # All the statuses tests can have when they fail.
851 return self.view['status'] in ['FAIL', 'ERROR', 'ABORT']
852
853
Fang Deng95af42f2014-09-12 14:16:11 -0700854 def is_infra_test(self):
855 """Check whether this is a test that only lab infra is concerned.
856
857 @returns: True if only lab infra is concerned, False otherwise.
858
859 """
860 return self.get_testname() in self.INFRA_TESTS
861
862
Fang Dengaeab6172014-05-07 17:17:04 -0700863 def get_buildbot_link_reason(self):
864 """Generate the buildbot link reason for the test.
865
866 @returns: A string representing the reason.
867
868 """
869 return ('%s: %s' % (self.view['status'], self.view['reason'])
870 if self.view['reason'] else self.view['status'])
871
872
873 def get_job_id_owner_str(self):
874 """Generate the job_id_owner string for a test.
875
876 @returns: A string which looks like 135036-username
877
878 """
Simran Basi01984f52015-10-12 15:36:45 -0700879 return '%s-%s' % (self.view['afe_job_id'], self.user)
Fang Dengaeab6172014-05-07 17:17:04 -0700880
881
882 def get_bug_info(self, suite_job_keyvals):
883 """Get the bug info from suite_job_keyvals.
884
885 If a bug has been filed for the test, its bug info (bug id and counts)
886 will be stored in the suite job's keyvals. This method attempts to
887 retrieve bug info of the test from |suite_job_keyvals|. It will return
888 None if no bug info is found. No need to check bug info if the view is
889 SUITE_PREP.
890
891 @param suite_job_keyvals: The job keyval dictionary of the suite job.
892 All the bug info about child jobs are stored in
893 suite job's keyvals.
894
895 @returns: None if there is no bug info, or a pair with the
896 id of the bug, and the count of the number of
897 times the bug has been seen.
898
899 """
900 if self.get_testname() == self.SUITE_PREP:
901 return None
902 if (self.view['test_name'].startswith('SERVER_JOB') or
903 self.view['test_name'].startswith('CLIENT_JOB')):
904 # Append job name as a prefix for SERVER_JOB and CLIENT_JOB
905 testname= '%s_%s' % (self.view['job_name'], self.view['test_name'])
906 else:
907 testname = self.view['test_name']
908
909 return tools.get_test_failure_bug_info(
910 suite_job_keyvals, self.view['afe_job_id'],
911 testname)
912
913
914 def should_display_buildbot_link(self):
915 """Check whether a buildbot link should show for this view.
916
Fang Dengf8503532014-06-12 18:21:55 -0700917 For suite prep view, show buildbot link if it fails.
918 For normal test view,
919 show buildbot link if it is a retry
920 show buildbot link if it hits its own timeout.
921 show buildbot link if it fails. This doesn't
922 include the case where it was aborted but has
923 not hit its own timeout (most likely it was aborted because
924 suite has timed out).
Fang Dengaeab6172014-05-07 17:17:04 -0700925
926 @returns: True if we should show the buildbot link.
927 False otherwise.
928 """
929 is_bad_status = (self.view['status'] != 'GOOD' and
930 self.view['status'] != 'TEST_NA')
Fang Dengf8503532014-06-12 18:21:55 -0700931 if self.get_testname() == self.SUITE_PREP:
932 return is_bad_status
933 else:
934 if self.is_retry():
935 return True
936 if is_bad_status:
937 return not self.is_aborted() or self.hit_timeout()
Fang Dengaeab6172014-05-07 17:17:04 -0700938
939
Shuqian Zhao2fecacd2015-08-05 22:56:30 -0700940 def get_control_file_attributes(self):
941 """Get the attributes from the control file of the test.
942
943 @returns: A list of test attribute or None.
944 """
945 control_file = self.afe_job.control_file
946 attributes = None
947 if control_file:
948 cd = control_data.parse_control_string(control_file)
949 attributes = list(cd.attributes)
950 return attributes
951
952
Fang Dengdd20e452014-04-07 15:39:47 -0700953class ResultCollector(object):
Simran Basi17ca77c2015-10-14 19:05:00 -0700954 """Collect test results of a suite or a single test run.
Fang Dengdd20e452014-04-07 15:39:47 -0700955
956 Once a suite job has finished, use this class to collect test results.
957 `run` is the core method that is to be called first. Then the caller
958 could retrieve information like return code, return message, is_aborted,
959 and timings by accessing the collector's public attributes. And output
960 the test results and links by calling the 'output_*' methods.
961
962 Here is a overview of what `run` method does.
963
964 1) Collect the suite job's results from tko_test_view_2.
965 For the suite job, we only pull test views without a 'subdir'.
966 A NULL subdir indicates that the test was _not_ executed. This could be
967 that no child job was scheduled for this test or the child job got
968 aborted before starts running.
969 (Note 'SERVER_JOB'/'CLIENT_JOB' are handled specially)
970
971 2) Collect the child jobs' results from tko_test_view_2.
972 For child jobs, we pull all the test views associated with them.
973 (Note 'SERVER_JOB'/'CLIENT_JOB' are handled speically)
974
Fang Dengaeab6172014-05-07 17:17:04 -0700975 3) Generate web and buildbot links.
Fang Dengdd20e452014-04-07 15:39:47 -0700976 4) Compute timings of the suite run.
977 5) Compute the return code based on test results.
978
979 @var _instance_server: The hostname of the server that is used
980 to service the suite.
981 @var _afe: The afe rpc client.
982 @var _tko: The tko rpc client.
983 @var _build: The build for which the suite is run,
984 e.g. 'lumpy-release/R35-5712.0.0'
MK Ryu977a9752014-10-21 11:58:09 -0700985 @var _board: The target board for which the suite is run,
986 e.g., 'lumpy', 'link'.
Fang Dengdd20e452014-04-07 15:39:47 -0700987 @var _suite_name: The suite name, e.g. 'bvt', 'dummy'.
988 @var _suite_job_id: The job id of the suite for which we are going to
989 collect results.
Shuqian Zhaof39bf2a2015-09-29 14:19:28 -0700990 @var _original_suite_name: The suite name we record timing would be
991 different from _suite_name when running
992 suite_attr_wrapper.
Fang Dengaeab6172014-05-07 17:17:04 -0700993 @var _suite_views: A list of TestView objects, representing relevant
994 test views of the suite job.
995 @var _child_views: A list of TestView objects, representing test views
996 of the child jobs.
997 @var _test_views: A list of TestView objects, representing all test views
998 from _suite_views and _child_views.
Fang Dengdd20e452014-04-07 15:39:47 -0700999 @var _web_links: A list of web links pointing to the results of jobs.
1000 @var _buildbot_links: A list of buildbot links for non-passing tests.
Fang Dengaeab6172014-05-07 17:17:04 -07001001 @var _max_testname_width: Max width of all test names.
Simran Basi17ca77c2015-10-14 19:05:00 -07001002 @var _solo_test_run: True if this is a single test run.
Fang Dengdd20e452014-04-07 15:39:47 -07001003 @var return_code: The exit code that should be returned by run_suite.
1004 @var return_message: Any message that should be displayed to explain
1005 the return code.
1006 @var is_aborted: Whether the suite was aborted or not.
1007 True, False or None (aborting status is unknown yet)
1008 @var timings: A Timing object that records the suite's timings.
1009
1010 """
1011
1012
MK Ryu977a9752014-10-21 11:58:09 -07001013 def __init__(self, instance_server, afe, tko, build, board,
Simran Basi01984f52015-10-12 15:36:45 -07001014 suite_name, suite_job_id, original_suite_name=None,
Simran Basi17ca77c2015-10-14 19:05:00 -07001015 user=None, solo_test_run=False):
Fang Dengdd20e452014-04-07 15:39:47 -07001016 self._instance_server = instance_server
1017 self._afe = afe
1018 self._tko = tko
1019 self._build = build
MK Ryu977a9752014-10-21 11:58:09 -07001020 self._board = board
Fang Dengdd20e452014-04-07 15:39:47 -07001021 self._suite_name = suite_name
1022 self._suite_job_id = suite_job_id
Shuqian Zhaof39bf2a2015-09-29 14:19:28 -07001023 self._original_suite_name = original_suite_name or suite_name
Fang Deng0454e632014-04-07 15:39:47 -07001024 self._suite_views = []
1025 self._child_views = []
Fang Dengdd20e452014-04-07 15:39:47 -07001026 self._test_views = []
Fang Dengaeab6172014-05-07 17:17:04 -07001027 self._retry_counts = {}
Fang Dengdd20e452014-04-07 15:39:47 -07001028 self._web_links = []
1029 self._buildbot_links = []
Fang Deng0454e632014-04-07 15:39:47 -07001030 self._max_testname_width = 0
MK Ryu977a9752014-10-21 11:58:09 -07001031 self._num_child_jobs = 0
Fang Dengdd20e452014-04-07 15:39:47 -07001032 self.return_code = None
Fang Deng0454e632014-04-07 15:39:47 -07001033 self.return_message = ''
Fang Dengdd20e452014-04-07 15:39:47 -07001034 self.is_aborted = None
1035 self.timings = None
Simran Basi01984f52015-10-12 15:36:45 -07001036 self._user = user or getpass.getuser()
Simran Basi17ca77c2015-10-14 19:05:00 -07001037 self._solo_test_run = solo_test_run
Fang Dengdd20e452014-04-07 15:39:47 -07001038
1039
Fang Dengdd20e452014-04-07 15:39:47 -07001040 def _fetch_relevant_test_views_of_suite(self):
1041 """Fetch relevant test views of the suite job.
1042
1043 For the suite job, there will be a test view for SERVER_JOB, and views
1044 for results of its child jobs. For example, assume we've ceated
1045 a suite job (afe_job_id: 40) that runs dummy_Pass, dummy_Fail,
1046 dummy_Pass.bluetooth. Assume dummy_Pass was aborted before running while
1047 dummy_Path.bluetooth got TEST_NA as no duts have bluetooth.
1048 So the suite job's test views would look like
1049 _____________________________________________________________________
1050 test_idx| job_idx|test_name |subdir |afe_job_id|status
1051 10 | 1000 |SERVER_JOB |---- |40 |GOOD
1052 11 | 1000 |dummy_Pass |NULL |40 |ABORT
1053 12 | 1000 |dummy_Fail.Fail |41-onwer/...|40 |FAIL
1054 13 | 1000 |dummy_Fail.Error |42-owner/...|40 |ERROR
1055 14 | 1000 |dummy_Pass.bluetooth|NULL |40 |TEST_NA
1056
1057 For a suite job, we only care about
1058 a) The test view for the suite job's SERVER_JOB
1059 b) The test views for real tests without a subdir. A NULL subdir
1060 indicates that a test didn't get executed.
1061 So, for the above example, we only keep test views whose test_idxs
1062 are 10, 11, 14.
1063
Fang Dengaeab6172014-05-07 17:17:04 -07001064 @returns: A list of TestView objects, representing relevant
1065 test views of the suite job.
Fang Dengdd20e452014-04-07 15:39:47 -07001066
1067 """
Fang Dengf8503532014-06-12 18:21:55 -07001068 suite_job = self._afe.get_jobs(id=self._suite_job_id)[0]
Fang Deng0454e632014-04-07 15:39:47 -07001069 views = self._tko.run(call='get_detailed_test_views',
1070 afe_job_id=self._suite_job_id)
Fang Dengdd20e452014-04-07 15:39:47 -07001071 relevant_views = []
1072 for v in views:
Simran Basi17ca77c2015-10-14 19:05:00 -07001073 v = TestView(v, suite_job, self._suite_name, self._build, self._user,
1074 solo_test_run=self._solo_test_run)
Fang Dengaeab6172014-05-07 17:17:04 -07001075 if v.is_relevant_suite_view():
Fang Dengdd20e452014-04-07 15:39:47 -07001076 relevant_views.append(v)
Fang Dengdd20e452014-04-07 15:39:47 -07001077 return relevant_views
1078
1079
Fang Dengaeab6172014-05-07 17:17:04 -07001080 def _compute_retry_count(self, view):
1081 """Return how many times the test has been retried.
1082
1083 @param view: A TestView instance.
1084 @returns: An int value indicating the retry count.
1085
1086 """
1087 old_job = view['job_keyvals'].get('retry_original_job_id')
1088 count = 0
1089 while old_job:
1090 count += 1
1091 views = self._tko.run(
1092 call='get_detailed_test_views', afe_job_id=old_job)
1093 old_job = (views[0]['job_keyvals'].get('retry_original_job_id')
1094 if views else None)
1095 return count
1096
1097
Simran Basi17ca77c2015-10-14 19:05:00 -07001098 def _fetch_test_views_of_child_jobs(self, jobs=None):
Fang Dengdd20e452014-04-07 15:39:47 -07001099 """Fetch test views of child jobs.
1100
Fang Dengaeab6172014-05-07 17:17:04 -07001101 @returns: A tuple (child_views, retry_counts)
1102 child_views is list of TestView objects, representing
1103 all valid views. retry_counts is a dictionary that maps
1104 test_idx to retry counts. It only stores retry
1105 counts that are greater than 0.
Fang Deng0454e632014-04-07 15:39:47 -07001106
Fang Dengdd20e452014-04-07 15:39:47 -07001107 """
Fang Dengdd20e452014-04-07 15:39:47 -07001108 child_views = []
Fang Dengaeab6172014-05-07 17:17:04 -07001109 retry_counts = {}
Simran Basi17ca77c2015-10-14 19:05:00 -07001110 child_jobs = jobs or self._afe.get_jobs(parent_job_id=self._suite_job_id)
MK Ryu977a9752014-10-21 11:58:09 -07001111 if child_jobs:
1112 self._num_child_jobs = len(child_jobs)
Fang Dengf8503532014-06-12 18:21:55 -07001113 for job in child_jobs:
Simran Basi01984f52015-10-12 15:36:45 -07001114 views = [TestView(v, job, self._suite_name, self._build, self._user)
Fang Dengaeab6172014-05-07 17:17:04 -07001115 for v in self._tko.run(
Fang Dengf8503532014-06-12 18:21:55 -07001116 call='get_detailed_test_views', afe_job_id=job.id,
Fang Dengaeab6172014-05-07 17:17:04 -07001117 invalid=0)]
Fang Dengdd20e452014-04-07 15:39:47 -07001118 contains_test_failure = any(
Fang Dengaeab6172014-05-07 17:17:04 -07001119 v.is_test() and v['status'] != 'GOOD' for v in views)
Fang Dengdd20e452014-04-07 15:39:47 -07001120 for v in views:
Fang Dengaeab6172014-05-07 17:17:04 -07001121 if (v.is_test() or
1122 v['status'] != 'GOOD' and not contains_test_failure):
1123 # For normal test view, just keep it.
1124 # For SERVER_JOB or CLIENT_JOB, only keep it
1125 # if it fails and no other test failure.
Fang Dengdd20e452014-04-07 15:39:47 -07001126 child_views.append(v)
Fang Dengaeab6172014-05-07 17:17:04 -07001127 retry_count = self._compute_retry_count(v)
1128 if retry_count > 0:
1129 retry_counts[v['test_idx']] = retry_count
1130 return child_views, retry_counts
Fang Dengdd20e452014-04-07 15:39:47 -07001131
1132
1133 def _generate_web_and_buildbot_links(self):
1134 """Generate web links and buildbot links."""
1135 # TODO(fdeng): If a job was aborted before it reaches Running
1136 # state, we read the test view from the suite job
1137 # and thus this method generates a link pointing to the
1138 # suite job's page for the aborted job. Need a fix.
1139 self._web_links = []
1140 self._buildbot_links = []
1141 # Bug info are stored in the suite job's keyvals.
Simran Basi17ca77c2015-10-14 19:05:00 -07001142 if self._solo_test_run:
1143 suite_job_keyvals = {}
1144 else:
1145 suite_job_keyvals = self._suite_views[0]['job_keyvals']
Fang Dengdd20e452014-04-07 15:39:47 -07001146 for v in self._test_views:
Fang Dengaeab6172014-05-07 17:17:04 -07001147 retry_count = self._retry_counts.get(v['test_idx'], 0)
1148 bug_info = v.get_bug_info(suite_job_keyvals)
1149 job_id_owner = v.get_job_id_owner_str()
Fang Dengdd20e452014-04-07 15:39:47 -07001150 link = LogLink(
Fang Dengaeab6172014-05-07 17:17:04 -07001151 anchor=v.get_testname().ljust(
Fang Dengdd20e452014-04-07 15:39:47 -07001152 self._max_testname_width),
1153 server=self._instance_server,
1154 job_string=job_id_owner,
Simran Basi7203d4e2015-02-03 15:50:18 -08001155 bug_info=bug_info, retry_count=retry_count,
1156 testname=v.get_testname())
Fang Dengdd20e452014-04-07 15:39:47 -07001157 self._web_links.append(link)
1158
Fang Dengaeab6172014-05-07 17:17:04 -07001159 if v.should_display_buildbot_link():
1160 link.reason = v.get_buildbot_link_reason()
Fang Dengdd20e452014-04-07 15:39:47 -07001161 self._buildbot_links.append(link)
1162
1163
1164 def _record_timings(self):
1165 """Record suite timings."""
1166 self.timings = Timings(self._suite_job_id)
1167 for v in self._test_views:
1168 self.timings.RecordTiming(v)
1169
1170
Fang Dengaeab6172014-05-07 17:17:04 -07001171 def _get_return_msg(self, code, tests_passed_after_retry):
1172 """Return the proper message for a given return code.
1173
1174 @param code: An enum value of RETURN_CODES
1175 @param test_passed_after_retry: True/False, indicating
1176 whether there are test(s) that have passed after retry.
1177
1178 @returns: A string, representing the message.
1179
1180 """
1181 if code == RETURN_CODES.INFRA_FAILURE:
Fang Deng95af42f2014-09-12 14:16:11 -07001182 return 'Suite job failed or provisioning failed.'
Fang Dengaeab6172014-05-07 17:17:04 -07001183 elif code == RETURN_CODES.SUITE_TIMEOUT:
1184 return ('Some test(s) was aborted before running,'
1185 ' suite must have timed out.')
1186 elif code == RETURN_CODES.WARNING:
1187 if tests_passed_after_retry:
1188 return 'Some test(s) passed after retry.'
1189 else:
1190 return 'Some test(s) raised a warning.'
1191 elif code == RETURN_CODES.ERROR:
1192 return 'Some test(s) failed.'
1193 else:
1194 return ''
1195
1196
Fang Dengdd20e452014-04-07 15:39:47 -07001197 def _compute_return_code(self):
1198 """Compute the exit code based on test results."""
1199 code = RETURN_CODES.OK
Fang Dengaeab6172014-05-07 17:17:04 -07001200 tests_passed_after_retry = False
1201
Fang Dengdd20e452014-04-07 15:39:47 -07001202 for v in self._test_views:
Fang Dengf8503532014-06-12 18:21:55 -07001203 # The order of checking each case is important.
Fang Dengaeab6172014-05-07 17:17:04 -07001204 if v.is_experimental():
Fang Deng5a43be62014-05-07 17:17:04 -07001205 continue
Fang Dengf8503532014-06-12 18:21:55 -07001206 if v.get_testname() == TestView.SUITE_PREP:
1207 if v.is_aborted() and v.hit_timeout():
1208 current_code = RETURN_CODES.SUITE_TIMEOUT
1209 elif v.is_in_fail_status():
1210 current_code = RETURN_CODES.INFRA_FAILURE
1211 elif v['status'] == 'WARN':
1212 current_code = RETURN_CODES.WARNING
1213 else:
1214 current_code = RETURN_CODES.OK
Fang Deng5a43be62014-05-07 17:17:04 -07001215 else:
Fang Dengf8503532014-06-12 18:21:55 -07001216 if v.is_aborted() and v.is_relevant_suite_view():
1217 # The test was aborted before started
1218 # This gurantees that the suite has timed out.
1219 current_code = RETURN_CODES.SUITE_TIMEOUT
1220 elif v.is_aborted() and not v.hit_timeout():
1221 # The test was aborted, but
1222 # not due to a timeout. This is most likely
1223 # because the suite has timed out, but may
1224 # also because it was aborted by the user.
1225 # Since suite timing out is determined by checking
1226 # the suite prep view, we simply ignore this view here.
1227 current_code = RETURN_CODES.OK
1228 elif v.is_in_fail_status():
1229 # The test job failed.
Fang Deng95af42f2014-09-12 14:16:11 -07001230 if v.is_infra_test():
1231 current_code = RETURN_CODES.INFRA_FAILURE
1232 else:
1233 current_code = RETURN_CODES.ERROR
Fang Dengf8503532014-06-12 18:21:55 -07001234 elif v['status'] == 'WARN':
1235 # The test/suite job raised a wanrning.
1236 current_code = RETURN_CODES.WARNING
1237 elif v.is_retry():
1238 # The test is a passing retry.
1239 current_code = RETURN_CODES.WARNING
1240 tests_passed_after_retry = True
1241 else:
1242 current_code = RETURN_CODES.OK
1243 code = get_worse_code(code, current_code)
1244
Fang Dengdd20e452014-04-07 15:39:47 -07001245 self.return_code = code
Fang Dengaeab6172014-05-07 17:17:04 -07001246 self.return_message = self._get_return_msg(
1247 code, tests_passed_after_retry)
Fang Dengdd20e452014-04-07 15:39:47 -07001248
1249
1250 def output_results(self):
1251 """Output test results, timings and web links."""
1252 # Output test results
1253 for v in self._test_views:
Fang Dengaeab6172014-05-07 17:17:04 -07001254 display_name = v.get_testname().ljust(self._max_testname_width)
Fang Dengdd20e452014-04-07 15:39:47 -07001255 logging.info('%s%s', display_name,
1256 get_pretty_status(v['status']))
1257 if v['status'] != 'GOOD':
Fang Dengaeab6172014-05-07 17:17:04 -07001258 logging.info('%s %s: %s', display_name, v['status'],
Fang Dengdd20e452014-04-07 15:39:47 -07001259 v['reason'])
Fang Dengaeab6172014-05-07 17:17:04 -07001260 if v.is_retry():
1261 retry_count = self._retry_counts.get(v['test_idx'], 0)
1262 logging.info('%s retry_count: %s',
1263 display_name, retry_count)
Fang Dengdd20e452014-04-07 15:39:47 -07001264 # Output suite timings
1265 logging.info(self.timings)
1266 # Output links to test logs
1267 logging.info('\nLinks to test logs:')
1268 for link in self._web_links:
1269 logging.info(link.GenerateTextLink())
Fang Deng5a43be62014-05-07 17:17:04 -07001270 logging.info('\n')
Fang Dengdd20e452014-04-07 15:39:47 -07001271
1272
Shuqian Zhao2fecacd2015-08-05 22:56:30 -07001273 def get_results_dict(self):
1274 """Write test results, timings and web links into a dict.
1275
1276 @returns: A dict of results in the format like:
1277 {
1278 'tests': {
1279 'test_1': {'status': 'PASSED', 'attributes': [1,2], ...}
1280 'test_2': {'status': 'FAILED', 'attributes': [1],...}
1281 }
1282 'suite_timings': {
1283 'download_start': '1998-07-17 00:00:00',
1284 'payload_download_end': '1998-07-17 00:00:05',
1285 ...
1286 }
1287 }
1288 """
1289 output_dict = {}
1290 tests_dict = output_dict.setdefault('tests', {})
1291 for v in self._test_views:
1292 test_name = v.get_testname()
1293 test_info = tests_dict.setdefault(test_name, {})
1294 test_info.update({
1295 'status': v['status'],
1296 'attributes': v.get_control_file_attributes() or list(),
1297 'reason': v['reason'],
1298 'retry_count': self._retry_counts.get(v['test_idx'], 0),
1299 })
1300
1301 # Write the links to test logs into the |tests_dict| of |output_dict|.
1302 # For test whose status is not 'GOOD', the link is also buildbot_link.
1303 for link in self._web_links:
1304 test_name = link.anchor.strip()
1305 test_info = tests_dict.get(test_name)
1306 if test_info:
1307 test_info['link_to_logs'] = link.url
1308 # Write the wmatrix link into the dict.
1309 if link in self._buildbot_links and link.testname:
1310 test_info['wmatrix_link'] = WMATRIX_RETRY_URL % link.testname
1311 # Write the bug url into the dict.
1312 if link.bug_id:
1313 test_info['bug_url'] = link.get_bug_link(link.bug_id)
1314
1315 # Write the suite timings into |output_dict|
1316 time_dict = output_dict.setdefault('suite_timings', {})
1317 time_dict.update({
1318 'download_start' : str(self.timings.download_start_time),
1319 'payload_download_end' : str(self.timings.payload_end_time),
1320 'suite_start' : str(self.timings.suite_start_time),
1321 'artifact_download_end' : str(self.timings.artifact_end_time),
1322 'tests_start' : str(self.timings.tests_start_time),
1323 'tests_end' : str(self.timings.tests_end_time),
1324 })
1325
1326 output_dict['suite_job_id'] = self._suite_job_id
1327
1328 return output_dict
1329
1330
Fang Dengdd20e452014-04-07 15:39:47 -07001331 def output_buildbot_links(self):
1332 """Output buildbot links."""
1333 for link in self._buildbot_links:
1334 logging.info(link.GenerateBuildbotLink())
Simran Basi7203d4e2015-02-03 15:50:18 -08001335 wmatrix_link = link.GenerateWmatrixRetryLink()
1336 if wmatrix_link:
1337 logging.info(wmatrix_link)
Fang Dengdd20e452014-04-07 15:39:47 -07001338
1339
1340 def run(self):
1341 """Collect test results.
1342
1343 This method goes through the following steps:
1344 Fetch relevent test views of the suite job.
1345 Fetch test views of child jobs
1346 Check whether the suite was aborted.
Fang Dengaeab6172014-05-07 17:17:04 -07001347 Generate links.
Fang Dengdd20e452014-04-07 15:39:47 -07001348 Calculate suite timings.
1349 Compute return code based on the test result.
1350
1351 """
Simran Basi17ca77c2015-10-14 19:05:00 -07001352 if self._solo_test_run:
1353 self._test_views, self.retry_count = (
1354 self._fetch_test_views_of_child_jobs(
1355 jobs=self._afe.get_jobs(id=self._suite_job_id)))
1356 else:
1357 self._suite_views = self._fetch_relevant_test_views_of_suite()
1358 self._child_views, self._retry_counts = (
1359 self._fetch_test_views_of_child_jobs())
1360 self._test_views = self._suite_views + self._child_views
Fang Dengdd20e452014-04-07 15:39:47 -07001361 # For hostless job in Starting status, there is no test view associated.
1362 # This can happen when a suite job in Starting status is aborted. When
1363 # the scheduler hits some limit, e.g., max_hostless_jobs_per_drone,
1364 # max_jobs_started_per_cycle, a suite job can stays in Starting status.
1365 if not self._test_views:
Fang Deng5a43be62014-05-07 17:17:04 -07001366 self.return_code = RETURN_CODES.INFRA_FAILURE
Fang Dengdd20e452014-04-07 15:39:47 -07001367 self.return_message = 'No test view was found.'
1368 return
1369 self.is_aborted = any([view['job_keyvals'].get('aborted_by')
1370 for view in self._suite_views])
Fang Dengaeab6172014-05-07 17:17:04 -07001371 self._max_testname_width = max(
1372 [len(v.get_testname()) for v in self._test_views]) + 3
Fang Dengdd20e452014-04-07 15:39:47 -07001373 self._generate_web_and_buildbot_links()
1374 self._record_timings()
1375 self._compute_return_code()
1376
1377
MK Ryu977a9752014-10-21 11:58:09 -07001378 def gather_timing_stats(self):
1379 """Collect timing related statistics."""
1380 # Send timings to statsd.
1381 self.timings.SendResultsToStatsd(
Shuqian Zhaof39bf2a2015-09-29 14:19:28 -07001382 self._original_suite_name, self._build, self._board)
MK Ryu977a9752014-10-21 11:58:09 -07001383
1384 # Record suite runtime in metadata db.
Prathmesh Prabhua3713a02015-03-11 13:50:55 -07001385 # Some failure modes can leave times unassigned, report sentinel value
1386 # in that case.
1387 runtime_in_secs = -1
1388 if (self.timings.tests_end_time is not None and
1389 self.timings.suite_start_time is not None):
Dan Shi0723bf52015-06-24 10:52:38 -07001390 runtime_in_secs = (self.timings.tests_end_time -
1391 self.timings.suite_start_time).total_seconds()
Prathmesh Prabhua3713a02015-03-11 13:50:55 -07001392
MK Ryu977a9752014-10-21 11:58:09 -07001393 job_overhead.record_suite_runtime(self._suite_job_id, self._suite_name,
1394 self._board, self._build, self._num_child_jobs, runtime_in_secs)
1395
1396
Prashanth B6285f6a2014-05-08 18:01:27 -07001397@retry.retry(error.StageControlFileFailure, timeout_min=10)
1398def create_suite(afe, options):
1399 """Create a suite with retries.
1400
1401 @param afe: The afe object to insert the new suite job into.
1402 @param options: The options to use in creating the suite.
1403
1404 @return: The afe_job_id of the new suite job.
1405 """
Dan Shi36cfd832014-10-10 13:38:51 -07001406 builds = {}
1407 if options.build:
1408 builds[provision.CROS_VERSION_PREFIX] = options.build
Dan Shi0723bf52015-06-24 10:52:38 -07001409 if options.firmware_rw_build:
1410 builds[provision.FW_RW_VERSION_PREFIX] = options.firmware_rw_build
Dan Shi36cfd832014-10-10 13:38:51 -07001411 if options.firmware_ro_build:
1412 builds[provision.FW_RO_VERSION_PREFIX] = options.firmware_ro_build
Prashanth B6285f6a2014-05-08 18:01:27 -07001413 wait = options.no_wait == 'False'
1414 file_bugs = options.file_bugs == 'True'
1415 retry = options.retry == 'True'
Simran Basi1e10e922015-04-16 15:09:56 -07001416 offload_failures_only = options.offload_failures_only == 'True'
Prashanth B6285f6a2014-05-08 18:01:27 -07001417 try:
1418 priority = int(options.priority)
1419 except ValueError:
1420 try:
1421 priority = priorities.Priority.get_value(options.priority)
1422 except AttributeError:
1423 print 'Unknown priority level %s. Try one of %s.' % (
1424 options.priority, ', '.join(priorities.Priority.names))
1425 raise
1426 logging.info('%s Submitted create_suite_job rpc',
1427 diagnosis_utils.JobTimer.format_time(datetime.now()))
1428 return afe.run('create_suite_job', name=options.name,
1429 board=options.board, build=options.build,
Dan Shi36cfd832014-10-10 13:38:51 -07001430 builds=builds, test_source_build=options.test_source_build,
Prashanth B6285f6a2014-05-08 18:01:27 -07001431 check_hosts=wait, pool=options.pool,
1432 num=options.num,
1433 file_bugs=file_bugs, priority=priority,
1434 suite_args=options.suite_args,
1435 wait_for_results=wait,
1436 timeout_mins=options.timeout_mins,
Simran Basi441fbc12015-01-23 12:28:54 -08001437 max_runtime_mins=options.max_runtime_mins,
Fang Deng443f1952015-01-02 14:51:49 -08001438 job_retry=retry, max_retries=options.max_retries,
Simran Basi1e10e922015-04-16 15:09:56 -07001439 suite_min_duts=options.suite_min_duts,
1440 offload_failures_only=offload_failures_only)
Prashanth B6285f6a2014-05-08 18:01:27 -07001441
1442
Shuqian Zhao2fecacd2015-08-05 22:56:30 -07001443def main_without_exception_handling(options):
Aviv Keshet1480c4a2013-03-21 16:38:31 -07001444 """
Shuqian Zhao2fecacd2015-08-05 22:56:30 -07001445 run_suite script without exception handling.
Shuqian Zhaod2351072015-08-06 01:48:23 +00001446
Shuqian Zhao2fecacd2015-08-05 22:56:30 -07001447 @param options: The parsed options.
1448
1449 @returns: A tuple contains the return_code of run_suite and the dictionary
1450 of the output.
1451
1452 """
Shuqian Zhaoab1bedc2015-06-02 11:12:28 -07001453 # If indicate to use the new style suite control file, convert the args
1454 if options.use_suite_attr:
1455 options = change_options_for_suite_attr(options)
1456
Chris Masone3a850642012-07-11 11:11:18 -07001457 log_name = 'run_suite-default.log'
Fang Deng6865aab2015-02-20 14:49:47 -08001458 if options.build:
Chris Masone3a850642012-07-11 11:11:18 -07001459 # convert build name from containing / to containing only _
1460 log_name = 'run_suite-%s.log' % options.build.replace('/', '_')
1461 log_dir = os.path.join(common.autotest_dir, 'logs')
1462 if os.path.exists(log_dir):
1463 log_name = os.path.join(log_dir, log_name)
Alex Miller88762a82013-09-04 15:41:28 -07001464
MK Ryu83184352014-12-10 14:59:40 -08001465 utils.setup_logging(logfile=log_name)
Alex Miller88762a82013-09-04 15:41:28 -07001466
Fang Deng6197da32014-09-25 10:18:48 -07001467 if not options.bypass_labstatus:
1468 utils.check_lab_status(options.build)
Prashanth Balasubramanian673016d2014-11-04 10:40:48 -08001469 instance_server = (options.web if options.web else
1470 instance_for_pool(options.pool))
Alex Millerc7a59522013-10-30 15:18:57 -07001471 afe = frontend_wrappers.RetryingAFE(server=instance_server,
Simran Basi25effe32013-11-26 13:02:11 -08001472 timeout_min=options.afe_timeout_mins,
Chris Masone8ac66712012-02-15 14:21:02 -08001473 delay_sec=options.delay_sec)
Alex Millerc7a59522013-10-30 15:18:57 -07001474 logging.info('Autotest instance: %s', instance_server)
Chris Masone359c0fd2012-03-13 15:18:59 -07001475
Dan Shi20952c12014-05-14 17:07:38 -07001476 rpc_helper = diagnosis_utils.RPCHelper(afe)
Fang Deng6865aab2015-02-20 14:49:47 -08001477 is_real_time = True
Chris Masone986459e2012-04-11 11:36:48 -07001478 if options.mock_job_id:
1479 job_id = int(options.mock_job_id)
Fang Deng6865aab2015-02-20 14:49:47 -08001480 existing_job = afe.get_jobs(id=job_id, finished=True)
1481 if existing_job:
1482 is_real_time = False
1483 else:
1484 existing_job = afe.get_jobs(id=job_id)
1485 if existing_job:
1486 job_created_on = time_utils.date_string_to_epoch_time(
1487 existing_job[0].created_on)
1488 else:
1489 raise utils.TestLabException('Failed to retrieve job: %d' % job_id)
Chris Masone986459e2012-04-11 11:36:48 -07001490 else:
Fang Deng5a43be62014-05-07 17:17:04 -07001491 try:
Fang Deng6865aab2015-02-20 14:49:47 -08001492 rpc_helper.check_dut_availability(options.board, options.pool,
1493 options.minimum_duts)
Prashanth B6285f6a2014-05-08 18:01:27 -07001494 job_id = create_suite(afe, options)
Fang Deng6865aab2015-02-20 14:49:47 -08001495 job_created_on = time.time()
1496 except diagnosis_utils.NotEnoughDutsError:
1497 logging.info(GetBuildbotStepLink(
1498 'Pool Health Bug', LogLink.get_bug_link(rpc_helper.bug)))
1499 raise
Fang Deng5a43be62014-05-07 17:17:04 -07001500 except (error.CrosDynamicSuiteException,
1501 error.RPCException, proxy.JSONRPCException) as e:
1502 logging.warning('Error Message: %s', e)
Shuqian Zhao2fecacd2015-08-05 22:56:30 -07001503 return (RETURN_CODES.INFRA_FAILURE, {'return_message': e})
Prashanth B6285f6a2014-05-08 18:01:27 -07001504 except AttributeError:
Shuqian Zhao2fecacd2015-08-05 22:56:30 -07001505 return (RETURN_CODES.INVALID_OPTIONS, {})
Fang Deng5a43be62014-05-07 17:17:04 -07001506
Prashanth B923ca262014-03-14 12:36:29 -07001507 job_timer = diagnosis_utils.JobTimer(
Fang Deng6865aab2015-02-20 14:49:47 -08001508 job_created_on, float(options.timeout_mins))
Aviv Keshet9afee5e2014-10-09 16:33:09 -07001509 job_url = reporting_utils.link_job(job_id,
1510 instance_server=instance_server)
Prashanth B923ca262014-03-14 12:36:29 -07001511 logging.info('%s Created suite job: %s',
1512 job_timer.format_time(job_timer.job_created_time),
Aviv Keshet9afee5e2014-10-09 16:33:09 -07001513 job_url)
Aviv Keshetdb321de2015-04-10 19:09:58 -07001514 # TODO(akeshet): Move this link-printing to chromite.
Aviv Keshet9afee5e2014-10-09 16:33:09 -07001515 logging.info(GetBuildbotStepLink('Suite created', job_url))
Aviv Keshetdb321de2015-04-10 19:09:58 -07001516
1517 if options.create_and_return:
Shuqian Zhao2fecacd2015-08-05 22:56:30 -07001518 msg = '--create_and_return was specified, terminating now.'
1519 logging.info(msg)
1520 return (RETURN_CODES.OK, {'return_message':msg})
Aviv Keshetdb321de2015-04-10 19:09:58 -07001521
Alex Millerc7a59522013-10-30 15:18:57 -07001522 TKO = frontend_wrappers.RetryingTKO(server=instance_server,
Simran Basi25effe32013-11-26 13:02:11 -08001523 timeout_min=options.afe_timeout_mins,
Chris Masone8ac66712012-02-15 14:21:02 -08001524 delay_sec=options.delay_sec)
Aviv Keshet1480c4a2013-03-21 16:38:31 -07001525 code = RETURN_CODES.OK
Prashanth B6285f6a2014-05-08 18:01:27 -07001526 wait = options.no_wait == 'False'
Shuqian Zhao2fecacd2015-08-05 22:56:30 -07001527 output_dict = {}
J. Richard Barnette712eb402013-08-13 18:03:00 -07001528 if wait:
1529 while not afe.get_jobs(id=job_id, finished=True):
Prashanth B923ca262014-03-14 12:36:29 -07001530 # Note that this call logs output, preventing buildbot's
1531 # 9000 second silent timeout from kicking in. Let there be no
1532 # doubt, this is a hack. The timeout is from upstream buildbot and
1533 # this is the easiest work around.
1534 if job_timer.first_past_halftime():
MK Ryu4790eec2014-07-31 11:39:02 -07001535 rpc_helper.diagnose_job(job_id, instance_server)
Prashanth Ba7be2072014-07-15 15:03:21 -07001536 if job_timer.debug_output_timer.poll():
1537 logging.info('The suite job has another %s till timeout.',
Prashanth B923ca262014-03-14 12:36:29 -07001538 job_timer.timeout_hours - job_timer.elapsed_time())
Alex Miller764227d2013-11-15 10:28:56 -08001539 time.sleep(10)
Fang Dengf8503532014-06-12 18:21:55 -07001540 # For most cases, ResultCollector should be able to determine whether
1541 # a suite has timed out by checking information in the test view.
1542 # However, occationally tko parser may fail on parsing the
1543 # job_finished time from the job's keyval file. So we add another
1544 # layer of timeout check in run_suite. We do the check right after
1545 # the suite finishes to make it as accurate as possible.
1546 # There is a minor race condition here where we might have aborted
1547 # for some reason other than a timeout, and the job_timer thinks
1548 # it's a timeout because of the jitter in waiting for results.
1549 # The consequence would be that run_suite exits with code
1550 # SUITE_TIMEOUT while it should have returned INFRA_FAILURE
1551 # instead, which should happen very rarely.
1552 # Note the timeout will have no sense when using -m option.
1553 is_suite_timeout = job_timer.is_suite_timeout()
J. Richard Barnette712eb402013-08-13 18:03:00 -07001554
Shuqian Zhaof39bf2a2015-09-29 14:19:28 -07001555 # Extract the original suite name to record timing.
1556 original_suite_name = get_original_suite_name(options.name,
1557 options.suite_args)
Fang Dengdd20e452014-04-07 15:39:47 -07001558 # Start collecting test results.
1559 collector = ResultCollector(instance_server=instance_server,
1560 afe=afe, tko=TKO, build=options.build,
MK Ryu977a9752014-10-21 11:58:09 -07001561 board=options.board,
Fang Dengdd20e452014-04-07 15:39:47 -07001562 suite_name=options.name,
Shuqian Zhaof39bf2a2015-09-29 14:19:28 -07001563 suite_job_id=job_id,
1564 original_suite_name=original_suite_name)
Fang Dengdd20e452014-04-07 15:39:47 -07001565 collector.run()
Shuqian Zhao2fecacd2015-08-05 22:56:30 -07001566 # Dump test outputs into json.
1567 output_dict = collector.get_results_dict()
1568 output_dict['autotest_instance'] = instance_server
1569 if not options.json_dump:
1570 collector.output_results()
Fang Dengdd20e452014-04-07 15:39:47 -07001571 code = collector.return_code
Fang Deng5a43be62014-05-07 17:17:04 -07001572 return_message = collector.return_message
Fang Deng6865aab2015-02-20 14:49:47 -08001573 if is_real_time:
MK Ryu977a9752014-10-21 11:58:09 -07001574 # Do not record stats if the suite was aborted (either by a user
1575 # or through the golo rpc).
Fang Deng5a43be62014-05-07 17:17:04 -07001576 # Also do not record stats if is_aborted is None, indicating
1577 # aborting status is unknown yet.
1578 if collector.is_aborted == False:
MK Ryu977a9752014-10-21 11:58:09 -07001579 collector.gather_timing_stats()
Fang Deng6865aab2015-02-20 14:49:47 -08001580
Fang Deng5a43be62014-05-07 17:17:04 -07001581 if collector.is_aborted == True and is_suite_timeout:
1582 # There are two possible cases when a suite times out.
1583 # 1. the suite job was aborted due to timing out
1584 # 2. the suite job succeeded, but some child jobs
1585 # were already aborted before the suite job exited.
1586 # The case 2 was handled by ResultCollector,
1587 # here we handle case 1.
1588 old_code = code
Fang Dengaeab6172014-05-07 17:17:04 -07001589 code = get_worse_code(
1590 code, RETURN_CODES.SUITE_TIMEOUT)
Fang Deng5a43be62014-05-07 17:17:04 -07001591 if old_code != code:
Fang Dengaeab6172014-05-07 17:17:04 -07001592 return_message = 'Suite job timed out.'
Fang Deng5a43be62014-05-07 17:17:04 -07001593 logging.info('Upgrade return code from %s to %s '
1594 'because suite job has timed out.',
1595 RETURN_CODES.get_string(old_code),
1596 RETURN_CODES.get_string(code))
Fang Deng5a43be62014-05-07 17:17:04 -07001597 if is_suite_timeout:
1598 logging.info('\nAttempting to diagnose pool: %s', options.pool)
Fang Deng5a43be62014-05-07 17:17:04 -07001599 try:
1600 # Add some jitter to make up for any latency in
1601 # aborting the suite or checking for results.
1602 cutoff = (job_timer.timeout_hours +
1603 datetime_base.timedelta(hours=0.3))
1604 rpc_helper.diagnose_pool(
1605 options.board, options.pool, cutoff)
1606 except proxy.JSONRPCException as e:
1607 logging.warning('Unable to diagnose suite abort.')
1608
1609 # And output return message.
Fang Deng5a43be62014-05-07 17:17:04 -07001610 if return_message:
1611 logging.info('Reason: %s', return_message)
Shuqian Zhao2fecacd2015-08-05 22:56:30 -07001612 output_dict['return_message'] = return_message
Prashanth B923ca262014-03-14 12:36:29 -07001613
Fang Dengdd20e452014-04-07 15:39:47 -07001614 logging.info('\nOutput below this line is for buildbot consumption:')
1615 collector.output_buildbot_links()
Chris Masoned5939fe2012-03-13 10:11:06 -07001616 else:
Scott Zawalski94457b72012-07-02 18:45:07 -04001617 logging.info('Created suite job: %r', job_id)
Alex Millera05498f2013-11-01 16:16:21 -07001618 link = LogLink(options.name, instance_server,
1619 '%s-%s' % (job_id, getpass.getuser()))
Craig Harrison25eb0f32012-08-23 16:48:49 -07001620 logging.info(link.GenerateBuildbotLink())
Shuqian Zhao2fecacd2015-08-05 22:56:30 -07001621 output_dict['return_message'] = '--no_wait specified; Exiting.'
Scott Zawalski94457b72012-07-02 18:45:07 -04001622 logging.info('--no_wait specified; Exiting.')
Shuqian Zhao2fecacd2015-08-05 22:56:30 -07001623 return (code, output_dict)
Chris Masone24b80f12012-02-14 14:18:01 -08001624
Fang Dengdd20e452014-04-07 15:39:47 -07001625
Fang Dengfb4a9492014-09-18 17:52:06 -07001626def main():
1627 """Entry point."""
Simran Basi9f364a62015-12-07 14:15:19 -08001628 utils.verify_not_root_user()
Fang Deng6197da32014-09-25 10:18:48 -07001629 code = RETURN_CODES.OK
Shuqian Zhao2fecacd2015-08-05 22:56:30 -07001630 output_dict = {}
1631
Fang Dengfb4a9492014-09-18 17:52:06 -07001632 try:
Shuqian Zhao2fecacd2015-08-05 22:56:30 -07001633 parser, options, args = parse_options()
1634 # Silence the log when dumping outputs into json
1635 if options.json_dump:
1636 logging.disable(logging.CRITICAL)
1637
1638 if not verify_options_and_args(options, args):
1639 parser.print_help()
1640 code = RETURN_CODES.INVALID_OPTIONS
1641 else:
1642 (code, output_dict) = main_without_exception_handling(options)
Shuqian Zhaoade6e7d2015-12-07 18:01:11 -08001643 except diagnosis_utils.BoardNotAvailableError as e:
1644 output_dict['return_message'] = 'Skipping testing: %s' % e.message
1645 code = RETURN_CODES.BOARD_NOT_AVAILABLE
1646 logging.info(output_dict['return_message'])
1647 except utils.TestLabException as e:
1648 output_dict['return_message'] = 'TestLabException: %s' % e
1649 code = RETURN_CODES.INFRA_FAILURE
1650 logging.exception(output_dict['return_message'])
Fang Dengfb4a9492014-09-18 17:52:06 -07001651 except Exception as e:
Shuqian Zhaoade6e7d2015-12-07 18:01:11 -08001652 output_dict['return_message'] = 'Unhandled run_suite exception: %s' % e
1653 code = RETURN_CODES.INFRA_FAILURE
1654 logging.exception(output_dict['return_message'])
Shuqian Zhao2fecacd2015-08-05 22:56:30 -07001655
1656 # Dump test outputs into json.
1657 output_dict['return_code'] = code
1658 output_json = json.dumps(output_dict, sort_keys=True)
1659 if options.json_dump:
Shuqian Zhao74ca35d2015-11-25 14:33:50 -08001660 output_json_marked = '#JSON_START#%s#JSON_END#' % output_json.strip()
1661 sys.stdout.write(output_json_marked)
Fang Deng6197da32014-09-25 10:18:48 -07001662
1663 logging.info('Will return from run_suite with status: %s',
1664 RETURN_CODES.get_string(code))
Gabe Black1e1c41b2015-02-04 23:55:15 -08001665 autotest_stats.Counter('run_suite.%s' %
1666 RETURN_CODES.get_string(code)).increment()
Fang Deng6197da32014-09-25 10:18:48 -07001667 return code
Fang Dengfb4a9492014-09-18 17:52:06 -07001668
1669
Chris Masone24b80f12012-02-14 14:18:01 -08001670if __name__ == "__main__":
1671 sys.exit(main())