blob: 1670ccd3aefda74eeb6cbd16723864ec7ab8fa0c [file] [log] [blame]
Chris Masone24b80f12012-02-14 14:18:01 -08001#!/usr/bin/python
2#
3# Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
4# Use of this source code is governed by a BSD-style license that can be
5# found in the LICENSE file.
6
Fang Deng5a43be62014-05-07 17:17:04 -07007
Chris Masone24b80f12012-02-14 14:18:01 -08008"""Tool for running suites of tests and waiting for completion.
9
Fang Deng5a43be62014-05-07 17:17:04 -070010The desired test suite will be scheduled with autotest. By default,
Chris Masone24b80f12012-02-14 14:18:01 -080011this tool will block until the job is complete, printing a summary
12at the end. Error conditions result in exceptions.
13
14This is intended for use only with Chrome OS test suits that leverage the
15dynamic suite infrastructure in server/cros/dynamic_suite.py.
Fang Deng5a43be62014-05-07 17:17:04 -070016
17This script exits with one of the following codes:
180 - OK: Suite finished successfully
191 - ERROR: Test(s) failed, or hits its own timeout
Fang Dengaeab6172014-05-07 17:17:04 -0700202 - WARNING: Test(s) raised a warning or passed on retry, none failed/timed out.
Fang Deng5a43be62014-05-07 17:17:04 -0700213 - INFRA_FAILURE: Infrastructure related issues, e.g.
22 * Lab is down
23 * Too many duts (defined as a constant) in repair failed status
24 * Suite job issues, like bug in dynamic suite,
25 user aborted the suite, lose a drone/all devservers/rpc server,
26 0 tests ran, etc.
Fang Deng95af42f2014-09-12 14:16:11 -070027 * provision failed
28 TODO(fdeng): crbug.com/413918, reexamine treating all provision
29 failures as INFRA failures.
Fang Deng5a43be62014-05-07 17:17:04 -0700304 - SUITE_TIMEOUT: Suite timed out, some tests ran,
31 none failed by the time the suite job was aborted. This will cover,
32 but not limited to, the following cases:
33 * A devserver failure that manifests as a timeout
34 * No DUTs available midway through a suite
35 * Provision/Reset/Cleanup took longer time than expected for new image
36 * A regression in scheduler tick time.
Fang Deng6197da32014-09-25 10:18:48 -0700375- BOARD_NOT_AVAILABLE: If there is no host for the requested board/pool.
386- INVALID_OPTIONS: If options are not valid.
Chris Masone24b80f12012-02-14 14:18:01 -080039"""
40
Allen Li93f4db52016-09-14 14:44:59 -070041import argparse
42import ast
Allen Licc205492017-07-10 17:26:04 -070043import collections
Chris Masonecfa7efc2012-09-06 16:00:07 -070044from datetime import datetime
Allen Li93f4db52016-09-14 14:44:59 -070045from datetime import timedelta
Allen Licc205492017-07-10 17:26:04 -070046import functools
Allen Li93f4db52016-09-14 14:44:59 -070047import getpass
Xixuan Wu2af0b062019-03-27 11:58:56 -070048import json
Allen Li93f4db52016-09-14 14:44:59 -070049import logging
50import os
51import re
52import sys
53import time
Allen Li04afc8f2017-11-27 15:36:34 -080054import warnings
Chris Masonecfa7efc2012-09-06 16:00:07 -070055
Chris Masone24b80f12012-02-14 14:18:01 -080056import common
Allen Lie082ced2016-09-14 15:19:20 -070057from chromite.lib import buildbot_annotations as annotations
Xixuan Wu081c6de2019-03-26 10:50:17 -070058from chromite.lib import cros_build_lib
Xixuan Wuc4d33662019-03-18 14:07:15 -070059from chromite.lib import gs
60from chromite.lib import osutils
Allen Lie082ced2016-09-14 15:19:20 -070061
Aviv Keshet9eb5c892018-07-16 13:44:42 -070062from django.core import exceptions as django_exceptions
63
Xixuan Wuc4d33662019-03-18 14:07:15 -070064try:
65 from suite_scheduler import config_reader
66 from suite_scheduler import skylab
67except ImportError:
68 # For unittest
69 config_reader = None
70 skylab = None
71
Shuqian Zhao2fecacd2015-08-05 22:56:30 -070072from autotest_lib.client.common_lib import control_data
Fang Deng5a43be62014-05-07 17:17:04 -070073from autotest_lib.client.common_lib import error
Xixuan Wu888ee7a2018-04-24 10:27:27 -070074from autotest_lib.client.common_lib import global_config
J. Richard Barnette3cbd76b2013-11-27 12:11:25 -080075from autotest_lib.client.common_lib import priorities
Dan Shidfea3682014-08-10 23:38:40 -070076from autotest_lib.client.common_lib import time_utils
Prashanth B6285f6a2014-05-08 18:01:27 -070077from autotest_lib.client.common_lib.cros import retry
Prathmesh Prabhucd246f52018-01-03 13:45:48 -080078from autotest_lib.frontend.afe import rpc_client_lib
Prashanth B923ca262014-03-14 12:36:29 -070079from autotest_lib.frontend.afe.json_rpc import proxy
xixuanae791b12017-06-29 15:40:19 -070080from autotest_lib.server import site_utils
J. Richard Barnette3cbd76b2013-11-27 12:11:25 -080081from autotest_lib.server import utils
Xixuan Wu081c6de2019-03-26 10:50:17 -070082from autotest_lib.server.cros import provision
Chris Masone44e4d6c2012-08-15 14:25:53 -070083from autotest_lib.server.cros.dynamic_suite import constants
Chris Masoneb4935552012-08-14 12:05:54 -070084from autotest_lib.server.cros.dynamic_suite import frontend_wrappers
Prashanth B923ca262014-03-14 12:36:29 -070085from autotest_lib.server.cros.dynamic_suite import reporting_utils
Xixuan Wu7cc10e52018-04-25 17:04:51 -070086from autotest_lib.server.cros.dynamic_suite import suite_common
J. Richard Barnettee7b98bb2013-08-21 16:34:16 -070087from autotest_lib.server.cros.dynamic_suite import tools
Aviv Keshet9eb5c892018-07-16 13:44:42 -070088try:
89 from autotest_lib.site_utils import diagnosis_utils
90except django_exceptions.ImproperlyConfigured as e:
91 if 'Error loading MySQLdb module: libmariadbclient' in str(e):
92 logging.error('Unable to import a necessary MySQLdb module. This is '
93 'commonly caused by running a command inside[outside] '
94 'of the chroot but having autotest utility packages '
95 'that were build outside[inside] the chroot. '
96 'Please re-run utils/build_externals.py inside[outside] '
97 'of the chroot accordingly.')
98 raise
Xixuan Wu07224482019-04-11 18:08:19 -070099
100from autotest_lib.site_utils import paygen
Xixuan Wu888ee7a2018-04-24 10:27:27 -0700101from autotest_lib.site_utils import run_suite_common
MK Ryu977a9752014-10-21 11:58:09 -0700102
Chris Masone1120cdf2012-02-27 17:35:07 -0800103CONFIG = global_config.global_config
104
Allen Lidc2c69a2016-09-14 19:05:47 -0700105_DEFAULT_AUTOTEST_INSTANCE = CONFIG.get_config_value(
106 'SERVER', 'hostname', type=str)
107_URL_PATTERN = CONFIG.get_config_value('CROS', 'log_url_pattern', type=str)
Xixuan Wuc4d33662019-03-18 14:07:15 -0700108_ENABLE_RUN_SUITE_TRAMPOLINE = CONFIG.get_config_value(
109 'CROS', 'enable_run_suite_trampoline', type=bool, default=False)
110
Xixuan Wu081c6de2019-03-26 10:50:17 -0700111_SKYLAB_TOOL = '/opt/infra-tools/skylab'
Xixuan Wu2af0b062019-03-27 11:58:56 -0700112_SKYLAB_SERVICE_ACCOUNT = '/creds/service_accounts/skylab_swarming.json'
Xixuan Wuc4d33662019-03-18 14:07:15 -0700113_MIGRATION_CONFIG_FILE = 'migration_config.ini'
114_MIGRATION_CONFIG_BUCKET = 'suite-scheduler.google.com.a.appspot.com'
115_TRAMPOLINE_CONFIG = 'gs://%s/%s' % (_MIGRATION_CONFIG_BUCKET,
116 _MIGRATION_CONFIG_FILE)
Simran Basi7203d4e2015-02-03 15:50:18 -0800117
xixuanae791b12017-06-29 15:40:19 -0700118# Minimum RPC timeout setting for calls expected to take long time, e.g.,
119# create_suite_job. If default socket time (socket.getdefaulttimeout()) is
120# None or greater than this value, the default will be used.
121# The value here is set to be the same as the timeout for the RetryingAFE object
122# so long running RPCs can wait long enough before being aborted.
123_MIN_RPC_TIMEOUT = 600
124
125# Number of days back to search for existing job.
126_SEARCH_JOB_MAX_DAYS = 14
127
Allen Li977760b2017-11-06 18:11:37 -0800128_PROVISION_SUITE = 'provision'
129
Xixuan Wucdc0d382019-04-19 14:58:44 -0700130# Only special tasks can have a priority lower than 50.
131_SKYLAB_PRIORITY_MIN = 50
132# Mandated by Swarming.
133_SKYLAB_PRIORITY_MAX = 255
134
Fang Deng5a43be62014-05-07 17:17:04 -0700135
Allen Licc205492017-07-10 17:26:04 -0700136@functools.total_ordering
137class _ReturnResult(object):
138 """Represents overall result of run_suite operation.
Fang Deng5a43be62014-05-07 17:17:04 -0700139
Allen Licc205492017-07-10 17:26:04 -0700140 _ReturnResult instances sort based on priority (the order in
141 _RETURN_RESULTS).
Fang Deng5a43be62014-05-07 17:17:04 -0700142
Allen Licc205492017-07-10 17:26:04 -0700143 Furthermore, _ReturnResult instances can be combined by bitwise or
144 ("union"), which returns the instance with the higher priority
145 between the two (the instance with higher priority is a "superset"
146 of the other).
Fang Deng5a43be62014-05-07 17:17:04 -0700147
Allen Licc205492017-07-10 17:26:04 -0700148 Do not create new instances of this; use _RETURN_RESULTS instead.
Fang Deng5a43be62014-05-07 17:17:04 -0700149 """
Allen Licc205492017-07-10 17:26:04 -0700150
151 def __init__(self, return_code, message):
152 self.return_code = return_code
153 self.message = message
154
155 def __repr__(self):
156 return '<{cls} {key}, {this.return_code}, {this.message}>'.format(
157 cls=type(self).__name__,
158 key=self._getkey(),
159 this=self)
160
161 def __gt__(self, other):
162 if isinstance(other, type(self)):
163 return self._getkey() > other._getkey()
164 else:
165 return NotImplemented
166
167 def __eq__(self, other):
168 if isinstance(other, type(self)):
169 return (self.return_code == other.return_code
170 and self.message == other.message)
171 else:
172 return NotImplemented
173
174 def __hash__(self):
175 return hash(self.return_code) ^ hash(self.message)
176
177 def __or__(self, other):
178 if isinstance(other, type(self)):
179 if self > other:
180 return self
181 else:
182 return other
183 else:
184 return NotImplemented
185
186 def _getkey(self):
187 """Return sort key."""
188 return _RETURN_RESULTS_LIST.index(self)
189
190 def suite_result(self, output_dict=None):
191 """Make a SuiteResult using this _ReturnResult.
192
193 @param output_dict: output_dict to merge into SuiteResult.
194 """
195 if output_dict is None:
196 output_dict = dict()
197 else:
198 output_dict = output_dict.copy()
199 if self.message:
200 output_dict['return_message'] = self.message
Xixuan Wu888ee7a2018-04-24 10:27:27 -0700201 return run_suite_common.SuiteResult(self.return_code, output_dict)
Allen Licc205492017-07-10 17:26:04 -0700202
203
204_RETURN_RESULTS = collections.OrderedDict([
Xixuan Wu888ee7a2018-04-24 10:27:27 -0700205 ('ok', _ReturnResult(run_suite_common.RETURN_CODES.OK, '')),
Allen Licc205492017-07-10 17:26:04 -0700206
207 ('test_warning', _ReturnResult(
Xixuan Wu888ee7a2018-04-24 10:27:27 -0700208 run_suite_common.RETURN_CODES.WARNING, 'Test job raised warning.')),
Allen Licc205492017-07-10 17:26:04 -0700209 ('suite_warning', _ReturnResult(
Xixuan Wu888ee7a2018-04-24 10:27:27 -0700210 run_suite_common.RETURN_CODES.WARNING, 'Suite job raised warning.')),
Allen Licc205492017-07-10 17:26:04 -0700211 ('test_retry', _ReturnResult(
Xixuan Wu888ee7a2018-04-24 10:27:27 -0700212 run_suite_common.RETURN_CODES.WARNING, 'Tests were retried.')),
Allen Licc205492017-07-10 17:26:04 -0700213
Prathmesh Prabhu316180c2017-12-19 16:06:44 -0800214 ('test_aborted_prestart', _ReturnResult(
Xixuan Wu888ee7a2018-04-24 10:27:27 -0700215 run_suite_common.RETURN_CODES.SUITE_TIMEOUT,
Allen Licc205492017-07-10 17:26:04 -0700216 'Tests were aborted before running; suite must have timed out.')),
Prathmesh Prabhu316180c2017-12-19 16:06:44 -0800217 # This really indicates a user action or an infra failure. But, suite
218 # timeouts cause similar fauilres in the individual tests, so we must
219 # classify these lower than suite_timeout. In case of a suite_timeout, the
220 # result from the suite job will promote the result to suite_timeout.
221 ('test_aborted_mystery',
222 _ReturnResult(
Xixuan Wu888ee7a2018-04-24 10:27:27 -0700223 run_suite_common.RETURN_CODES.SUITE_TIMEOUT,
Prathmesh Prabhu316180c2017-12-19 16:06:44 -0800224 'Tests were aborted after running, but before timeout; '
225 'Test was manually aborted or parsing results failed: '
226 'crbug.com/796348.')),
Allen Licc205492017-07-10 17:26:04 -0700227 ('suite_timeout', _ReturnResult(
Xixuan Wu888ee7a2018-04-24 10:27:27 -0700228 run_suite_common.RETURN_CODES.SUITE_TIMEOUT, 'Suite job timed out.')),
Allen Licc205492017-07-10 17:26:04 -0700229
230 ('test_views_missing', _ReturnResult(
Xixuan Wu888ee7a2018-04-24 10:27:27 -0700231 run_suite_common.RETURN_CODES.INFRA_FAILURE, 'No test views found.')),
Allen Licc205492017-07-10 17:26:04 -0700232 ('suite_failed', _ReturnResult(
Xixuan Wu888ee7a2018-04-24 10:27:27 -0700233 run_suite_common.RETURN_CODES.INFRA_FAILURE, 'Suite job failed.')),
Allen Licc205492017-07-10 17:26:04 -0700234 ('provision_failed', _ReturnResult(
Xixuan Wu888ee7a2018-04-24 10:27:27 -0700235 run_suite_common.RETURN_CODES.INFRA_FAILURE, 'Provisioning failed.')),
Allen Licc205492017-07-10 17:26:04 -0700236
237 ('test_failure', _ReturnResult(
Xixuan Wu888ee7a2018-04-24 10:27:27 -0700238 run_suite_common.RETURN_CODES.ERROR, 'Tests failed.')),
Allen Licc205492017-07-10 17:26:04 -0700239])
240_RETURN_RESULTS_LIST = list(_RETURN_RESULTS.values())
Simran Basi22aa9fe2012-12-07 16:37:09 -0800241
Chris Masonedfa0beba2012-03-19 11:41:47 -0700242
Allen Li93f4db52016-09-14 14:44:59 -0700243def bool_str(x):
244 """Boolean string type for option arguments.
245
246 @param x: string representation of boolean value.
247
248 """
249 if x == 'True':
250 return True
251 elif x == 'False':
252 return False
253 else:
254 raise argparse.ArgumentTypeError(
255 '%s is not one of True or False' % (x,))
256
257
Allen Li603728a2016-12-08 13:58:11 -0800258def _get_priority_value(x):
259 """Convert a priority representation to its int value.
260
261 Priorities can be described either by an int value (possibly as a string)
262 or a name string. This function coerces both forms to an int value.
263
264 This function is intended for casting command line arguments during
265 parsing.
266
267 @param x: priority value as an int, int string, or name string
268
269 @returns: int value of priority
270 """
271 try:
272 return int(x)
273 except ValueError:
274 try:
275 return priorities.Priority.get_value(x)
276 except AttributeError:
277 raise argparse.ArgumentTypeError(
278 'Unknown priority level %s. Try one of %s.'
279 % (x, ', '.join(priorities.Priority.names)))
280
281
Xixuan Wucdc0d382019-04-19 14:58:44 -0700282def skylab_priority_for(afe_priority):
283 """Convert AFE priority to Skylab priority.
284
285 Args:
286 afe_priority: An integer get from _get_priority_value().
287
288 Returns:
289 An integer representing Skylab priority.
290 """
291 skylab_priority = 260 - 3 * int(afe_priority)
292 skylab_priority = min(skylab_priority, _SKYLAB_PRIORITY_MAX)
293 skylab_priority = max(skylab_priority, _SKYLAB_PRIORITY_MIN)
294 return skylab_priority
295
296
Allen Li93f4db52016-09-14 14:44:59 -0700297def make_parser():
298 """Make ArgumentParser instance for run_suite.py."""
299 parser = argparse.ArgumentParser(
300 usage="%(prog)s [options]")
301 parser.add_argument("-b", "--board", dest="board")
Prathmesh Prabhu9b8e7ad2017-10-30 14:26:13 -0700302 parser.add_argument(
303 "--model",
304 help="The device model to run tests against. For non-unified "
305 "builds, model and board are synonymous, but board is more "
306 "accurate in some cases. Only pass this option if your build "
307 "is a unified build.",
308 )
Allen Li93f4db52016-09-14 14:44:59 -0700309 parser.add_argument("-i", "--build", dest="build")
310 parser.add_argument(
311 "-w", "--web", dest="web", default=None,
312 help="Address of a webserver to receive suite requests.")
313 parser.add_argument(
Rohit Makasanadf0a3a32017-06-30 13:55:18 -0700314 '--cheets_build', dest='cheets_build', default=None,
315 help='ChromeOS Android build to be installed on dut.')
316 parser.add_argument(
Allen Li93f4db52016-09-14 14:44:59 -0700317 '--firmware_rw_build', dest='firmware_rw_build', default=None,
318 help='Firmware build to be installed in dut RW firmware.')
319 parser.add_argument(
320 '--firmware_ro_build', dest='firmware_ro_build', default=None,
321 help='Firmware build to be installed in dut RO firmware.')
322 parser.add_argument(
323 '--test_source_build', dest='test_source_build', default=None,
324 help=('Build that contains the test code, '
325 'e.g., it can be the value of `--build`, '
326 '`--firmware_rw_build` or `--firmware_ro_build` '
327 'arguments. Default is None, that is, use the test '
328 'code from `--build` (CrOS image)'))
Chris Masone359c0fd2012-03-13 15:18:59 -0700329 # This should just be a boolean flag, but the autotest "proxy" code
330 # can't handle flags that don't take arguments.
Allen Li93f4db52016-09-14 14:44:59 -0700331 parser.add_argument(
332 "-n", "--no_wait", dest="no_wait", default=False, type=bool_str,
333 help='Must pass "True" or "False" if used.')
Alex Miller0032e932013-10-23 12:52:58 -0700334 # If you really want no pool, --pool="" will do it. USE WITH CARE.
Allen Li93f4db52016-09-14 14:44:59 -0700335 parser.add_argument("-p", "--pool", dest="pool", default="suites")
336 parser.add_argument("-s", "--suite_name", dest="name")
337 parser.add_argument("-a", "--afe_timeout_mins", type=int,
338 dest="afe_timeout_mins", default=30)
339 parser.add_argument("-t", "--timeout_mins", type=int,
340 dest="timeout_mins", default=1440)
341 parser.add_argument("-x", "--max_runtime_mins", type=int,
342 dest="max_runtime_mins", default=1440)
343 parser.add_argument("-d", "--delay_sec", type=int,
344 dest="delay_sec", default=10)
345 parser.add_argument("-m", "--mock_job_id", dest="mock_job_id",
346 help="Attach to existing job id for already running "
347 "suite, and creates report.")
Aviv Keshetdb321de2015-04-10 19:09:58 -0700348 # NOTE(akeshet): This looks similar to --no_wait, but behaves differently.
349 # --no_wait is passed in to the suite rpc itself and affects the suite,
350 # while this does not.
Allen Li93f4db52016-09-14 14:44:59 -0700351 parser.add_argument("-c", "--create_and_return", dest="create_and_return",
352 action="store_true",
353 help="Create the suite and print the job id, then "
354 "finish immediately.")
355 parser.add_argument("-u", "--num", dest="num", type=int, default=None,
Allen Li04afc8f2017-11-27 15:36:34 -0800356 help="Deprecated, does nothing.")
Alex Millerf43d0eb2012-10-01 13:43:13 -0700357 # Same boolean flag issue applies here.
Allen Li93f4db52016-09-14 14:44:59 -0700358 parser.add_argument(
359 "-f", "--file_bugs", dest="file_bugs", default=False, type=bool_str,
360 help=('File bugs on test failures. Must pass "True" or '
361 '"False" if used.'))
362 parser.add_argument("-l", "--bypass_labstatus", dest="bypass_labstatus",
363 action="store_true", help='Bypass lab status check.')
Alex Miller88762a82013-09-04 15:41:28 -0700364 # We allow either a number or a string for the priority. This way, if you
365 # know what you're doing, one can specify a custom priority level between
366 # other levels.
Allen Li93f4db52016-09-14 14:44:59 -0700367 parser.add_argument("-r", "--priority", dest="priority",
Allen Li603728a2016-12-08 13:58:11 -0800368 type=_get_priority_value,
Allen Li93f4db52016-09-14 14:44:59 -0700369 default=priorities.Priority.DEFAULT,
370 action="store",
371 help="Priority of suite. Either numerical value, or "
372 "one of (" + ", ".join(priorities.Priority.names)
373 + ").")
374 parser.add_argument(
375 '--retry', dest='retry', default=False, type=bool_str, action='store',
376 help='Enable test retry. Must pass "True" or "False" if used.')
377 parser.add_argument('--max_retries', dest='max_retries', default=None,
378 type=int, action='store', help='Maximum retries'
379 'allowed at suite level. No limit if not specified.')
380 parser.add_argument('--minimum_duts', dest='minimum_duts', type=int,
381 default=0, action='store',
382 help='Check that the pool has at least such many '
383 'healthy machines, otherwise suite will not run. '
384 'Default to 0.')
385 parser.add_argument('--suite_min_duts', dest='suite_min_duts', type=int,
386 default=0, action='store',
387 help='Preferred minimum number of machines. Scheduler '
388 'will prioritize on getting such many machines for '
389 'the suite when it is competing with another suite '
390 'that has a higher priority but already got minimum '
391 'machines it needs. Default to 0.')
392 parser.add_argument("--suite_args", dest="suite_args",
Allen Liecdba6c2017-07-11 12:10:26 -0700393 type=ast.literal_eval,
Allen Li93f4db52016-09-14 14:44:59 -0700394 default=None, action="store",
Allen Liecdba6c2017-07-11 12:10:26 -0700395 help="A dict of args passed to the suite control file.")
Aviv Keshet576d9622019-06-24 16:26:38 -0700396 parser.add_argument("--suite_args_json", dest="suite_args_json",
397 type=json.loads,
398 default=None, action="store",
399 help="A json-encoded string representation of args to "
400 "passed to the suite control file. Overrides "
401 "suite_args if specified.")
Allen Li93f4db52016-09-14 14:44:59 -0700402 parser.add_argument('--offload_failures_only',
Allen Li40599a32016-12-08 13:23:35 -0800403 dest='offload_failures_only', type=bool_str,
404 action='store', default=False,
Allen Li93f4db52016-09-14 14:44:59 -0700405 help='Only enable gs_offloading for failed tests. '
406 'Successful tests will be deleted. Must pass "True"'
407 ' or "False" if used.')
408 parser.add_argument('--use_suite_attr', dest='use_suite_attr',
409 action='store_true', default=False,
410 help='Advanced. Run the suite based on ATTRIBUTES of '
411 'control files, rather than SUITE.')
412 parser.add_argument('--json_dump', dest='json_dump', action='store_true',
413 default=False,
Aviv Keshet6cbd9cb2019-06-26 12:54:19 -0700414 help='Dump the output of run_suite to stdout as json; '
415 'silence other output.')
416 parser.add_argument('--json_dump_postfix', dest='json_dump_postfix',
417 action='store_true',
418 help='Dump the output of run_suite to stdout as json; '
419 'do not silence other logging. Similar to '
420 '--json_dump, the json payload will be wrapped in '
421 'a tag to differentiate it from logging.')
Allen Li93f4db52016-09-14 14:44:59 -0700422 parser.add_argument(
423 '--run_prod_code', dest='run_prod_code',
424 action='store_true', default=False,
425 help='Run the test code that lives in prod aka the test '
426 'code currently on the lab servers.')
427 parser.add_argument(
428 '--delay_minutes', type=int, default=0,
429 help=('Delay the creation of test jobs for a given '
430 'number of minutes. This argument can be used to '
431 'force provision jobs being delayed, which helps '
432 'to distribute loads across devservers.'))
433 parser.add_argument(
434 '--skip_duts_check', dest='skip_duts_check', action='store_true',
435 default=False, help='If True, skip minimum available DUTs check')
Shuqian Zhao843ae5c72017-02-22 11:25:01 -0800436 parser.add_argument(
Shuqian Zhao637d22c2017-03-06 15:52:32 -0800437 '--job_keyvals', dest='job_keyvals', type=ast.literal_eval,
Shuqian Zhao843ae5c72017-02-22 11:25:01 -0800438 action='store', default=None,
439 help='A dict of job keyvals to be inject to suite control file')
Shuqian Zhaoed0da862017-03-06 14:47:13 -0800440 parser.add_argument(
441 '--test_args', dest='test_args', type=ast.literal_eval,
442 action='store', default=None,
443 help=('A dict of args passed all the way to each individual test that '
444 'will be actually ran.'))
xixuand3cb33d2017-07-07 14:47:53 -0700445 parser.add_argument(
xixuan99eba0b2017-07-12 15:10:01 -0700446 '--require_logfile', action='store_true',
xixuand3cb33d2017-07-07 14:47:53 -0700447 help=('Stream logs of run_suite.py to a local file named '
448 'run_suite-<build name>.log.'))
Aviv Keshet97bebd42017-05-24 21:02:32 -0700449
450 # Used for monitoring purposes, to measure no-op swarming proxy latency.
451 parser.add_argument('--do_nothing', action='store_true',
452 help=argparse.SUPPRESS)
453
xixuanae791b12017-06-29 15:40:19 -0700454 # Used when lab/job status checking is needed. Currently its only user is
455 # suite scheduler v2.
456 parser.add_argument(
457 '--pre_check', action='store_true',
458 help=('Check lab and job status before kicking off a suite. Used by '
459 'suite scheduler v2.'))
460
Allen Li93f4db52016-09-14 14:44:59 -0700461 return parser
Chris Masone24b80f12012-02-14 14:18:01 -0800462
463
Allen Li85ae5df2017-07-10 14:58:16 -0700464def verify_and_clean_options(options):
Allen Li93f4db52016-09-14 14:44:59 -0700465 """Verify the validity of options.
Fang Dengdd20e452014-04-07 15:39:47 -0700466
Fang Dengdd20e452014-04-07 15:39:47 -0700467 @param options: The parsed options to verify.
Fang Dengdd20e452014-04-07 15:39:47 -0700468
469 @returns: True if verification passes, False otherwise.
470
471 """
Fang Deng6865aab2015-02-20 14:49:47 -0800472 if options.mock_job_id and (
473 not options.build or not options.name or not options.board):
474 print ('When using -m, need to specify build, board and suite '
475 'name which you have used for creating the original job')
476 return False
477 else:
Fang Dengdd20e452014-04-07 15:39:47 -0700478 if not options.build:
479 print 'Need to specify which build to use'
480 return False
481 if not options.board:
482 print 'Need to specify board'
483 return False
484 if not options.name:
485 print 'Need to specify suite name'
486 return False
Allen Li04afc8f2017-11-27 15:36:34 -0800487 if options.num is not None:
488 warnings.warn('-u/--num option is deprecated; it does nothing.')
489 del options.num
Allen Li93f4db52016-09-14 14:44:59 -0700490 if not options.retry and options.max_retries is not None:
Fang Deng443f1952015-01-02 14:51:49 -0800491 print 'max_retries can only be used with --retry=True'
492 return False
Shuqian Zhaoab1bedc2015-06-02 11:12:28 -0700493 if options.use_suite_attr and options.suite_args is not None:
494 print ('The new suite control file cannot parse the suite_args: %s.'
495 'Please not specify any suite_args here.' % options.suite_args)
496 return False
Allen Li93f4db52016-09-14 14:44:59 -0700497 if options.no_wait and options.retry:
Fang Deng058860c2014-05-15 15:41:50 -0700498 print 'Test retry is not available when using --no_wait=True'
Aviv Keshet6cbd9cb2019-06-26 12:54:19 -0700499 if options.json_dump and options.json_dump_postfix:
500 print '--json_dump and --json_dump_postfix are mutually exclusive'
501 return False
Dan Shi36cfd832014-10-10 13:38:51 -0700502 # Default to use the test code in CrOS build.
503 if not options.test_source_build and options.build:
504 options.test_source_build = options.build
Richard Barnetteae9eaa42018-09-12 10:28:07 -0700505 options.child_dependencies = _make_child_dependencies(options)
506 base_dependencies = ('board:%s' % options.board,
507 'pool:%s' % options.pool)
508 options.dependencies = base_dependencies + options.child_dependencies
Fang Dengdd20e452014-04-07 15:39:47 -0700509 return True
510
511
Shuqian Zhaoab1bedc2015-06-02 11:12:28 -0700512def change_options_for_suite_attr(options):
513 """Change options to be prepared to run the suite_attr_wrapper.
514
515 If specify 'use_suite_attr' from the cmd line, it indicates to run the
516 new style suite control file, suite_attr_wrapper. Then, change the
Allen Li6a612392016-08-18 12:09:32 -0700517 options.name to 'suite_attr_wrapper', change the options.suite_args to
Shuqian Zhaoab1bedc2015-06-02 11:12:28 -0700518 include the arguments needed by suite_attr_wrapper.
519
520 @param options: The verified options.
521
522 @returns: The changed options.
523
524 """
525 # Convert the suite_name to attribute boolean expression.
526 if type(options.name) is str:
527 attr_filter_val = 'suite:%s' % options.name
528 else:
529 attr_filter_val = ' or '.join(['suite:%s' % x for x in options.name])
530
531 # change the suite_args to be a dict of arguments for suite_attr_wrapper
532 # if suite_args is not None, store the values in 'other_args' of the dict
533 args_dict = {}
534 args_dict['attr_filter'] = attr_filter_val
Allen Liecdba6c2017-07-11 12:10:26 -0700535 options.suite_args = args_dict
Shuqian Zhaoab1bedc2015-06-02 11:12:28 -0700536 options.name = 'suite_attr_wrapper'
537
538 return options
539
540
Allen Li34613242016-09-02 11:52:34 -0700541class TestResult(object):
Aviv Keshet1480c4a2013-03-21 16:38:31 -0700542
Allen Li34613242016-09-02 11:52:34 -0700543 """Represents the result of a TestView."""
Aviv Keshet1480c4a2013-03-21 16:38:31 -0700544
Allen Li34613242016-09-02 11:52:34 -0700545 def __init__(self, test_view, retry_count=0):
546 """Initialize instance.
547
548 @param test_view: TestView instance.
549 @param retry_count: Retry count for test. Optional.
550 """
551 self.name = test_view.get_testname()
552 self.status = test_view['status']
553 self.reason = test_view['reason']
554 self.retry_count = retry_count
555
556 _PRETTY_STATUS_MAP = {
557 'GOOD': '[ PASSED ]',
558 'TEST_NA': '[ INFO ]',
559 }
560
561 @property
562 def _pretty_status(self):
563 """Pretty status string."""
564 return self._PRETTY_STATUS_MAP.get(self.status, '[ FAILED ]')
565
566 def log_using(self, log_function, name_column_width):
567 """Log the test result using the given log function.
568
569 @param log_function: Log function to use. Example: logging.info
570 @param name_column_width: Width of name column for formatting.
571 """
572 padded_name = self.name.ljust(name_column_width)
573 log_function('%s%s', padded_name, self._pretty_status)
574 if self.status != 'GOOD':
575 log_function('%s %s: %s', padded_name, self.status, self.reason)
576 if self.retry_count > 0:
577 log_function('%s retry_count: %s', padded_name, self.retry_count)
Chris Masone24b80f12012-02-14 14:18:01 -0800578
Fang Dengdd20e452014-04-07 15:39:47 -0700579
Shuqian Zhaof39bf2a2015-09-29 14:19:28 -0700580def get_original_suite_name(suite_name, suite_args):
581 """Get the original suite name when running suite_attr_wrapper.
582
583 @param suite_name: the name of the suite launched in afe. When it is
584 suite_attr_wrapper, the suite that actually running is
585 specified in the suite_args.
Allen Liecdba6c2017-07-11 12:10:26 -0700586 @param suite_args: dict of suite args from argument parsing.
Shuqian Zhaof39bf2a2015-09-29 14:19:28 -0700587
588 @returns: the original suite name.
589
590 """
591 if suite_name == 'suite_attr_wrapper':
Allen Liecdba6c2017-07-11 12:10:26 -0700592 attrs = suite_args.get('attr_filter', '')
Shuqian Zhaof39bf2a2015-09-29 14:19:28 -0700593 suite_list = ([x[6:] for x in re.split('[() ]', attrs)
594 if x and x.startswith('suite:')])
595 return suite_list[0] if suite_list else suite_name
596 return suite_name
597
598
Craig Harrison25eb0f32012-08-23 16:48:49 -0700599class LogLink(object):
J. Richard Barnetteb9c911d2013-08-23 11:24:21 -0700600 """Information needed to record a link in the logs.
Craig Harrison25eb0f32012-08-23 16:48:49 -0700601
J. Richard Barnetteb9c911d2013-08-23 11:24:21 -0700602 Depending on context and the information provided at
603 construction time, the link may point to either to log files for
604 a job, or to a bug filed for a failure in the job.
Craig Harrison25eb0f32012-08-23 16:48:49 -0700605
J. Richard Barnetteb9c911d2013-08-23 11:24:21 -0700606 @var anchor The link text.
607 @var url The link url.
608 @var bug_id Id of a bug to link to, or None.
609 """
610
Kevin Cheng2bdd3722016-03-24 21:30:52 -0700611 # A list of tests that don't get retried so skip the dashboard.
612 _SKIP_RETRY_DASHBOARD = ['provision']
613
Ningning Xiabd911bd2016-04-19 14:06:03 -0700614 _BUG_LINK_PREFIX = 'Auto-Bug'
615 _LOG_LINK_PREFIX = 'Test-Logs'
616
J. Richard Barnetteb9c911d2013-08-23 11:24:21 -0700617
Fang Dengaeab6172014-05-07 17:17:04 -0700618 def __init__(self, anchor, server, job_string, bug_info=None, reason=None,
Dan Shi9b620c22017-10-10 10:58:37 -0700619 retry_count=0, testname=None, sponge_url=None):
J. Richard Barnetteb9c911d2013-08-23 11:24:21 -0700620 """Initialize the LogLink by generating the log URL.
621
622 @param anchor The link text.
Alex Millerc7a59522013-10-30 15:18:57 -0700623 @param server The hostname of the server this suite ran on.
J. Richard Barnetteb9c911d2013-08-23 11:24:21 -0700624 @param job_string The job whose logs we'd like to link to.
625 @param bug_info Info about the bug, if one was filed.
Fang Deng53c6ff52014-02-24 17:51:24 -0800626 @param reason A string representing the reason of failure if any.
Fang Dengaeab6172014-05-07 17:17:04 -0700627 @param retry_count How many times the test has been retried.
Simran Basi7203d4e2015-02-03 15:50:18 -0800628 @param testname Optional Arg that supplies the testname.
Dan Shi9b620c22017-10-10 10:58:37 -0700629 @param sponge_url url to Sponge result.
Craig Harrison25eb0f32012-08-23 16:48:49 -0700630 """
631 self.anchor = anchor
Prathmesh Prabhucd246f52018-01-03 13:45:48 -0800632 self.url = _URL_PATTERN % (rpc_client_lib.add_protocol(server),
633 job_string)
Fang Deng53c6ff52014-02-24 17:51:24 -0800634 self.reason = reason
Fang Dengaeab6172014-05-07 17:17:04 -0700635 self.retry_count = retry_count
Simran Basi7203d4e2015-02-03 15:50:18 -0800636 self.testname = testname
Dan Shi9b620c22017-10-10 10:58:37 -0700637 self.sponge_url = sponge_url
J. Richard Barnetteb9c911d2013-08-23 11:24:21 -0700638 if bug_info:
639 self.bug_id, self.bug_count = bug_info
640 else:
641 self.bug_id = None
642 self.bug_count = None
Craig Harrison25eb0f32012-08-23 16:48:49 -0700643
644
Allen Lie082ced2016-09-14 15:19:20 -0700645 @property
646 def bug_url(self):
647 """URL of associated bug."""
648 if self.bug_id:
649 return reporting_utils.link_crbug(self.bug_id)
650 else:
651 return None
652
653
654 @property
655 def _bug_count_text(self):
656 """Return bug count as human friendly text."""
657 if self.bug_count is None:
658 bug_info = 'unknown number of reports'
659 elif self.bug_count == 1:
660 bug_info = 'new report'
661 else:
662 bug_info = '%s reports' % self.bug_count
663 return bug_info
664
665
Ningning Xiabd911bd2016-04-19 14:06:03 -0700666 def GenerateBuildbotLinks(self):
J. Richard Barnetteb9c911d2013-08-23 11:24:21 -0700667 """Generate a link formatted to meet buildbot expectations.
668
Ningning Xiabd911bd2016-04-19 14:06:03 -0700669 If there is a bug associated with this link, report a link to the bug
Allen Li4e7365e2017-07-10 15:40:24 -0700670 and a link to the job logs; otherwise report a link to the job logs.
Craig Harrison25eb0f32012-08-23 16:48:49 -0700671
Allen Li4e7365e2017-07-10 15:40:24 -0700672 @return A generator of links formatted for the buildbot log annotator.
Craig Harrison25eb0f32012-08-23 16:48:49 -0700673 """
Allen Li4e7365e2017-07-10 15:40:24 -0700674 if self.bug_url:
675 yield self._get_link_to_bug()
676 yield self._get_link_to_job_logs()
Ningning Xiabd911bd2016-04-19 14:06:03 -0700677
Allen Li4e7365e2017-07-10 15:40:24 -0700678
679 def _get_link_to_bug(self):
680 """Return buildbot link to bug.
681
682 @return A link formatted for the buildbot log annotator.
683 """
684 info_strings = self._get_info_strings()
685 info_strings.append(self._bug_count_text)
686 anchor_text = self._format_anchor_text(self._BUG_LINK_PREFIX,
687 info_strings)
688 return annotations.StepLink(anchor_text, self.bug_url)
689
690
691 def _get_link_to_job_logs(self):
692 """Return buildbot link to job logs.
693
694 @return A link formatted for the buildbot log annotator.
695 """
696 anchor_text = self._format_anchor_text(self._LOG_LINK_PREFIX,
697 self._get_info_strings())
698 return annotations.StepLink(anchor_text, self.url)
699
700
701 def _get_info_strings(self):
702 """Return a list of info strings for _format_anchor_text()."""
703 info_strings = []
Fang Dengaeab6172014-05-07 17:17:04 -0700704 if self.retry_count > 0:
705 info_strings.append('retry_count: %d' % self.retry_count)
Fang Deng53c6ff52014-02-24 17:51:24 -0800706 if self.reason:
Allen Lie082ced2016-09-14 15:19:20 -0700707 info_strings.append(self.reason)
Allen Li4e7365e2017-07-10 15:40:24 -0700708 return info_strings
Ningning Xiabd911bd2016-04-19 14:06:03 -0700709
710
Allen Lie082ced2016-09-14 15:19:20 -0700711 def _format_anchor_text(self, prefix, info_strings):
712 """Format anchor text given a prefix and info strings.
Ningning Xiabd911bd2016-04-19 14:06:03 -0700713
714 @param prefix The prefix of the anchor text.
Allen Lib1cb3842017-07-10 15:34:29 -0700715 @param info_strings Iterable of strings.
Ningning Xiabd911bd2016-04-19 14:06:03 -0700716 @return A anchor_text with the right prefix and info strings.
717 """
Allen Lib1cb3842017-07-10 15:34:29 -0700718 return '[{prefix}]: {anchor}: {info}'.format(
Allen Lie082ced2016-09-14 15:19:20 -0700719 prefix=prefix,
Allen Lib1cb3842017-07-10 15:34:29 -0700720 anchor=self.anchor.strip(),
721 info=', '.join(info_strings))
Craig Harrison25eb0f32012-08-23 16:48:49 -0700722
Allen Lie082ced2016-09-14 15:19:20 -0700723 @property
724 def text_link(self):
725 """Link to the job's logs, for consumption by a human.
Craig Harrison25eb0f32012-08-23 16:48:49 -0700726
Craig Harrisond8451572012-08-31 10:29:33 -0700727 @return A link formatted for human readability.
Craig Harrison25eb0f32012-08-23 16:48:49 -0700728 """
Aviv Keshet269848b2016-10-03 00:13:19 -0700729 return '%s %s' % (self.anchor, self.url)
Craig Harrison25eb0f32012-08-23 16:48:49 -0700730
Shuhei Takahashi18f56492017-11-14 16:23:46 +0900731 def GenerateRetryLink(self):
732 """Generate a link to the retry dashboard.
Simran Basi7203d4e2015-02-03 15:50:18 -0800733
734 @return A link formatted for the buildbot log annotator.
735 """
Allen Lie082ced2016-09-14 15:19:20 -0700736 if not self.testname or self.testname in self._SKIP_RETRY_DASHBOARD:
Simran Basi7203d4e2015-02-03 15:50:18 -0800737 return None
Xixuan Wu1729fca2018-03-21 16:32:58 -0700738
739 # TODO(xixuan): Return the right flake dashboard later.
740 return None
Simran Basi7203d4e2015-02-03 15:50:18 -0800741
Shuhei Takahashi18f56492017-11-14 16:23:46 +0900742 def GenerateHistoryLink(self):
743 """Generate a link to the test history dashboard.
David Rileya0cd1c22017-07-10 11:15:57 -0700744
745 @return A link formatted for the buildbot log annotator.
746 """
747 if not self.testname or self.testname in self._SKIP_RETRY_DASHBOARD:
748 return None
749 return annotations.StepLink(
750 text='[Test-History]: %s' % self.testname,
751 url=reporting_utils.link_test_history(self.testname))
752
Simran Basi7203d4e2015-02-03 15:50:18 -0800753
Chris Masoneb61b4052012-04-30 14:35:28 -0700754class Timings(object):
755 """Timings for important events during a suite.
756
757 All timestamps are datetime.datetime objects.
758
Fang Dengdd20e452014-04-07 15:39:47 -0700759 @var suite_job_id: the afe job id of the suite job for which
760 we are recording the timing for.
761 @var download_start_time: the time the devserver starts staging
762 the build artifacts. Recorded in create_suite_job.
763 @var payload_end_time: the time when the artifacts only necessary to start
764 installsing images onto DUT's are staged.
765 Recorded in create_suite_job.
766 @var artifact_end_time: the remaining artifacts are downloaded after we kick
767 off the reimaging job, at which point we record
768 artifact_end_time. Recorded in dynamic_suite.py.
Chris Masoneb61b4052012-04-30 14:35:28 -0700769 @var suite_start_time: the time the suite started.
Chris Masoneb61b4052012-04-30 14:35:28 -0700770 @var tests_start_time: the time the first test started running.
Fang Dengdd20e452014-04-07 15:39:47 -0700771 @var tests_end_time: the time the last test finished running.
Chris Masoneb61b4052012-04-30 14:35:28 -0700772 """
beeps6f02d192013-03-22 13:15:49 -0700773
Fang Dengdd20e452014-04-07 15:39:47 -0700774 def __init__(self, suite_job_id):
775 self.suite_job_id = suite_job_id
776 # Timings related to staging artifacts on devserver.
777 self.download_start_time = None
778 self.payload_end_time = None
779 self.artifact_end_time = None
beeps6f02d192013-03-22 13:15:49 -0700780
Fang Dengdd20e452014-04-07 15:39:47 -0700781 # The test_start_time, but taken off the view that corresponds to the
782 # suite instead of an individual test.
783 self.suite_start_time = None
beeps6f02d192013-03-22 13:15:49 -0700784
Fang Dengdd20e452014-04-07 15:39:47 -0700785 # Earliest and Latest tests in the set of TestViews passed to us.
786 self.tests_start_time = None
787 self.tests_end_time = None
788
Chris Masoneb61b4052012-04-30 14:35:28 -0700789
Chris Masoned9f13c52012-08-29 10:37:08 -0700790 def RecordTiming(self, view):
791 """Given a test report view, extract and record pertinent time info.
Chris Masoneb61b4052012-04-30 14:35:28 -0700792
793 get_detailed_test_views() returns a list of entries that provide
794 info about the various parts of a suite run. This method can take
795 any one of these entries and look up timestamp info we might want
796 and record it.
797
Chris Masonecfa7efc2012-09-06 16:00:07 -0700798 If timestamps are unavailable, datetime.datetime.min/max will be used.
799
Fang Dengaeab6172014-05-07 17:17:04 -0700800 @param view: A TestView object.
Chris Masoneb61b4052012-04-30 14:35:28 -0700801 """
Chris Masonecfa7efc2012-09-06 16:00:07 -0700802 start_candidate = datetime.min
803 end_candidate = datetime.max
804 if view['test_started_time']:
Dan Shidfea3682014-08-10 23:38:40 -0700805 start_candidate = time_utils.time_string_to_datetime(
806 view['test_started_time'])
Chris Masonecfa7efc2012-09-06 16:00:07 -0700807 if view['test_finished_time']:
Dan Shidfea3682014-08-10 23:38:40 -0700808 end_candidate = time_utils.time_string_to_datetime(
809 view['test_finished_time'])
Chris Masonecfa7efc2012-09-06 16:00:07 -0700810
Shuqian Zhaoc085abb2016-02-24 11:27:26 -0800811 if view.get_testname() == TestView.SUITE_JOB:
Chris Masoneb61b4052012-04-30 14:35:28 -0700812 self.suite_start_time = start_candidate
Chris Masoneb61b4052012-04-30 14:35:28 -0700813 else:
814 self._UpdateFirstTestStartTime(start_candidate)
815 self._UpdateLastTestEndTime(end_candidate)
Fang Dengdd20e452014-04-07 15:39:47 -0700816 if view['afe_job_id'] == self.suite_job_id and 'job_keyvals' in view:
Chris Masoned9f13c52012-08-29 10:37:08 -0700817 keyvals = view['job_keyvals']
Dan Shidfea3682014-08-10 23:38:40 -0700818 self.download_start_time = time_utils.time_string_to_datetime(
819 keyvals.get(constants.DOWNLOAD_STARTED_TIME),
820 handle_type_error=True)
beeps6f02d192013-03-22 13:15:49 -0700821
Dan Shidfea3682014-08-10 23:38:40 -0700822 self.payload_end_time = time_utils.time_string_to_datetime(
823 keyvals.get(constants.PAYLOAD_FINISHED_TIME),
824 handle_type_error=True)
beeps6f02d192013-03-22 13:15:49 -0700825
Dan Shidfea3682014-08-10 23:38:40 -0700826 self.artifact_end_time = time_utils.time_string_to_datetime(
827 keyvals.get(constants.ARTIFACT_FINISHED_TIME),
828 handle_type_error=True)
Chris Masone44e4d6c2012-08-15 14:25:53 -0700829
Chris Masoneb61b4052012-04-30 14:35:28 -0700830
831 def _UpdateFirstTestStartTime(self, candidate):
832 """Update self.tests_start_time, iff candidate is an earlier time.
833
834 @param candidate: a datetime.datetime object.
835 """
836 if not self.tests_start_time or candidate < self.tests_start_time:
837 self.tests_start_time = candidate
838
839
840 def _UpdateLastTestEndTime(self, candidate):
841 """Update self.tests_end_time, iff candidate is a later time.
842
843 @param candidate: a datetime.datetime object.
844 """
845 if not self.tests_end_time or candidate > self.tests_end_time:
846 self.tests_end_time = candidate
847
848
849 def __str__(self):
850 return ('\n'
851 'Suite timings:\n'
Chris Masonea8066a92012-05-01 16:52:31 -0700852 'Downloads started at %s\n'
853 'Payload downloads ended at %s\n'
Chris Masoneb61b4052012-04-30 14:35:28 -0700854 'Suite started at %s\n'
Chris Masonea8066a92012-05-01 16:52:31 -0700855 'Artifact downloads ended (at latest) at %s\n'
Chris Masoneb61b4052012-04-30 14:35:28 -0700856 'Testing started at %s\n'
Chris Masonea8066a92012-05-01 16:52:31 -0700857 'Testing ended at %s\n' % (self.download_start_time,
858 self.payload_end_time,
859 self.suite_start_time,
Chris Masonea8066a92012-05-01 16:52:31 -0700860 self.artifact_end_time,
Chris Masoneb61b4052012-04-30 14:35:28 -0700861 self.tests_start_time,
862 self.tests_end_time))
863
864
Alex Millerc7a59522013-10-30 15:18:57 -0700865def instance_for_pool(pool_name):
866 """
867 Return the hostname of the server that should be used to service a suite
868 for the specified pool.
869
870 @param pool_name: The pool (without 'pool:' to schedule the suite against.
871 @return: The correct host that should be used to service this suite run.
872 """
873 return CONFIG.get_config_value(
874 'POOL_INSTANCE_SHARDING', pool_name,
875 default=_DEFAULT_AUTOTEST_INSTANCE)
876
877
Fang Dengaeab6172014-05-07 17:17:04 -0700878class TestView(object):
879 """Represents a test view and provides a set of helper functions."""
880
881
Shuqian Zhaoc085abb2016-02-24 11:27:26 -0800882 SUITE_JOB = 'Suite job'
Fang Dengaeab6172014-05-07 17:17:04 -0700883
884
Simran Basi17ca77c2015-10-14 19:05:00 -0700885 def __init__(self, view, afe_job, suite_name, build, user,
886 solo_test_run=False):
Fang Dengaeab6172014-05-07 17:17:04 -0700887 """Init a TestView object representing a tko test view.
888
889 @param view: A dictionary representing a tko test view.
Fang Dengf8503532014-06-12 18:21:55 -0700890 @param afe_job: An instance of frontend.afe.models.Job
891 representing the job that kicked off the test.
Fang Dengaeab6172014-05-07 17:17:04 -0700892 @param suite_name: The name of the suite
893 that the test belongs to.
894 @param build: The build for which the test is run.
Simran Basi01984f52015-10-12 15:36:45 -0700895 @param user: The user for which the test is run.
Simran Basi17ca77c2015-10-14 19:05:00 -0700896 @param solo_test_run: This is a solo test run not part of a suite.
Fang Dengaeab6172014-05-07 17:17:04 -0700897 """
898 self.view = view
Fang Dengf8503532014-06-12 18:21:55 -0700899 self.afe_job = afe_job
Fang Dengaeab6172014-05-07 17:17:04 -0700900 self.suite_name = suite_name
901 self.build = build
Simran Basi17ca77c2015-10-14 19:05:00 -0700902 self.is_suite_view = afe_job.parent_job is None and not solo_test_run
Fang Dengaeab6172014-05-07 17:17:04 -0700903 # This is the test name that will be shown in the output.
904 self.testname = None
Simran Basi01984f52015-10-12 15:36:45 -0700905 self.user = user
Fang Dengaeab6172014-05-07 17:17:04 -0700906
Fang Dengf8503532014-06-12 18:21:55 -0700907 # The case that a job was aborted before it got a chance to run
908 # usually indicates suite has timed out (unless aborted by user).
909 # In this case, the abort reason will be None.
910 # Update the reason with proper information.
911 if (self.is_relevant_suite_view() and
Shuqian Zhaoc085abb2016-02-24 11:27:26 -0800912 not self.get_testname() == self.SUITE_JOB and
Fang Dengf8503532014-06-12 18:21:55 -0700913 self.view['status'] == 'ABORT' and
914 not self.view['reason']):
915 self.view['reason'] = 'Timed out, did not run.'
916
Fang Dengaeab6172014-05-07 17:17:04 -0700917
918 def __getitem__(self, key):
919 """Overload __getitem__ so that we can still use []
920
921 @param key: A key of the tko test view.
922
923 @returns: The value of an attribute in the view.
924
925 """
926 return self.view[key]
927
928
Fang Dengaeab6172014-05-07 17:17:04 -0700929 def __iter__(self):
930 """Overload __iter__ so that it supports 'in' operator."""
931 return iter(self.view)
932
933
934 def get_testname(self):
935 """Get test name that should be shown in the output.
936
937 Formalize the test_name we got from the test view.
938
Allen Lie6236ec2017-07-05 12:52:36 -0700939 Remove 'build/suite' prefix if any.
Fang Dengaeab6172014-05-07 17:17:04 -0700940
941 If one runs a test in control file via the following code,
942 job.runtest('my_Test', tag='tag')
943 for most of the cases, view['test_name'] would look like 'my_Test.tag'.
944 If this is the case, this method will just return the original
945 test name, i.e. 'my_Test.tag'.
946
947 There are four special cases.
948 1) A test view is for the suite job's SERVER_JOB.
Shuqian Zhaoc085abb2016-02-24 11:27:26 -0800949 In this case, this method will return 'Suite job'.
Fang Dengaeab6172014-05-07 17:17:04 -0700950
Simran Basi17ca77c2015-10-14 19:05:00 -0700951 2) A test view is of a child job or a solo test run not part of a
952 suite, and for a SERVER_JOB or CLIENT_JOB.
Fang Dengaeab6172014-05-07 17:17:04 -0700953 In this case, we will take the job name, remove the build/suite
954 prefix from the job name, and append the rest to 'SERVER_JOB'
955 or 'CLIENT_JOB' as a prefix. So the names returned by this
956 method will look like:
Allen Lie6236ec2017-07-05 12:52:36 -0700957 'dummy_Pass_SERVER_JOB'
Fang Dengaeab6172014-05-07 17:17:04 -0700958 'dummy_Fail_SERVER_JOB'
959
Fang Dengf8503532014-06-12 18:21:55 -0700960 3) A test view is of a suite job and its status is ABORT.
Fang Dengaeab6172014-05-07 17:17:04 -0700961 In this case, the view['test_name'] is the child job's name.
Allen Lie6236ec2017-07-05 12:52:36 -0700962 For instance,
Allen Lie6236ec2017-07-05 12:52:36 -0700963 'lumpy-release/R35-5712.0.0/dummy/dummy_Pass'
Fang Dengaeab6172014-05-07 17:17:04 -0700964 'lumpy-release/R35-5712.0.0/dummy/dummy_Fail'
965 The above names will be converted to the following:
Allen Lie6236ec2017-07-05 12:52:36 -0700966 'dummy_Pass'
Fang Dengaeab6172014-05-07 17:17:04 -0700967 'dummy_Fail'
968
Fang Dengf8503532014-06-12 18:21:55 -0700969 4) A test view's status is of a suite job and its status is TEST_NA.
Fang Dengaeab6172014-05-07 17:17:04 -0700970 In this case, the view['test_name'] is the NAME field of the control
Allen Lie6236ec2017-07-05 12:52:36 -0700971 file. For instance,
Allen Lie6236ec2017-07-05 12:52:36 -0700972 'dummy_Pass'
Fang Dengaeab6172014-05-07 17:17:04 -0700973 'dummy_Fail'
974 This method will not modify these names.
975
976 @returns: Test name after normalization.
977
978 """
979 if self.testname is not None:
980 return self.testname
981
982 if (self.is_suite_view and
983 self.view['test_name'].startswith('SERVER_JOB')):
Shuqian Zhaoc085abb2016-02-24 11:27:26 -0800984 # Rename suite job's SERVER_JOB to 'Suite job'.
985 self.testname = self.SUITE_JOB
Fang Dengaeab6172014-05-07 17:17:04 -0700986 return self.testname
987
988 if (self.view['test_name'].startswith('SERVER_JOB') or
989 self.view['test_name'].startswith('CLIENT_JOB')):
990 # Append job name as a prefix for SERVER_JOB and CLIENT_JOB
991 testname= '%s_%s' % (self.view['job_name'], self.view['test_name'])
992 else:
993 testname = self.view['test_name']
Fang Dengaeab6172014-05-07 17:17:04 -0700994 # Remove the build and suite name from testname if any.
Allen Lie6236ec2017-07-05 12:52:36 -0700995 self.testname = tools.get_test_name(
Fang Dengaeab6172014-05-07 17:17:04 -0700996 self.build, self.suite_name, testname)
Fang Dengaeab6172014-05-07 17:17:04 -0700997 return self.testname
998
999
1000 def is_relevant_suite_view(self):
1001 """Checks whether this is a suite view we should care about.
1002
1003 @returns: True if it is relevant. False otherwise.
1004 """
Shuqian Zhaoc085abb2016-02-24 11:27:26 -08001005 return (self.get_testname() == self.SUITE_JOB or
Fang Dengaeab6172014-05-07 17:17:04 -07001006 (self.is_suite_view and
1007 not self.view['test_name'].startswith('CLIENT_JOB') and
1008 not self.view['subdir']))
1009
1010
1011 def is_test(self):
1012 """Return whether the view is for an actual test.
1013
1014 @returns True if the view is for an actual test.
1015 False if the view is for SERVER_JOB or CLIENT_JOB.
1016
1017 """
1018 return not (self.view['test_name'].startswith('SERVER_JOB') or
1019 self.view['test_name'].startswith('CLIENT_JOB'))
1020
1021
1022 def is_retry(self):
1023 """Check whether the view is for a retry.
1024
1025 @returns: True, if the view is for a retry; False otherwise.
1026
1027 """
1028 return self.view['job_keyvals'].get('retry_original_job_id') is not None
1029
1030
Fang Dengf8503532014-06-12 18:21:55 -07001031 def hit_timeout(self):
1032 """Check whether the corresponding job has hit its own timeout.
Fang Dengaeab6172014-05-07 17:17:04 -07001033
Fang Dengf8503532014-06-12 18:21:55 -07001034 Note this method should not be called for those test views
1035 that belongs to a suite job and are determined as irrelevant
1036 by is_relevant_suite_view. This is because they are associated
1037 to the suite job, whose job start/finished time make no sense
1038 to an irrelevant test view.
Fang Dengaeab6172014-05-07 17:17:04 -07001039
Fang Dengf8503532014-06-12 18:21:55 -07001040 @returns: True if the corresponding afe job has hit timeout.
1041 False otherwise.
1042 """
1043 if (self.is_relevant_suite_view() and
Shuqian Zhaoc085abb2016-02-24 11:27:26 -08001044 self.get_testname() != self.SUITE_JOB):
1045 # Any relevant suite test view except SUITE_JOB
Fang Dengf8503532014-06-12 18:21:55 -07001046 # did not hit its own timeout because it was not ever run.
1047 return False
1048 start = (datetime.strptime(
Dan Shidfea3682014-08-10 23:38:40 -07001049 self.view['job_started_time'], time_utils.TIME_FMT)
Fang Dengf8503532014-06-12 18:21:55 -07001050 if self.view['job_started_time'] else None)
1051 end = (datetime.strptime(
Dan Shidfea3682014-08-10 23:38:40 -07001052 self.view['job_finished_time'], time_utils.TIME_FMT)
Fang Dengf8503532014-06-12 18:21:55 -07001053 if self.view['job_finished_time'] else None)
1054 if not start or not end:
1055 return False
1056 else:
1057 return ((end - start).total_seconds()/60.0
1058 > self.afe_job.max_runtime_mins)
1059
1060
1061 def is_aborted(self):
1062 """Check if the view was aborted.
1063
Shuqian Zhaoc085abb2016-02-24 11:27:26 -08001064 For suite job and child job test views, we check job keyval
Fang Dengf8503532014-06-12 18:21:55 -07001065 'aborted_by' and test status.
1066
1067 For relevant suite job test views, we only check test status
1068 because the suite job keyval won't make sense to individual
1069 test views.
1070
1071 @returns: True if the test was as aborted, False otherwise.
Fang Dengaeab6172014-05-07 17:17:04 -07001072
1073 """
Fang Dengf8503532014-06-12 18:21:55 -07001074
1075 if (self.is_relevant_suite_view() and
Shuqian Zhaoc085abb2016-02-24 11:27:26 -08001076 self.get_testname() != self.SUITE_JOB):
Fang Dengf8503532014-06-12 18:21:55 -07001077 return self.view['status'] == 'ABORT'
1078 else:
1079 return (bool(self.view['job_keyvals'].get('aborted_by')) and
1080 self.view['status'] in ['ABORT', 'RUNNING'])
Fang Dengaeab6172014-05-07 17:17:04 -07001081
1082
1083 def is_in_fail_status(self):
Fang Deng95af42f2014-09-12 14:16:11 -07001084 """Check if the given test's status corresponds to a failure.
Fang Dengaeab6172014-05-07 17:17:04 -07001085
1086 @returns: True if the test's status is FAIL or ERROR. False otherwise.
1087
1088 """
1089 # All the statuses tests can have when they fail.
1090 return self.view['status'] in ['FAIL', 'ERROR', 'ABORT']
1091
1092
Allen Licc205492017-07-10 17:26:04 -07001093 def is_provision(self):
1094 """Check whether this is a provision test."""
1095 return self.get_testname() == 'provision'
Fang Deng95af42f2014-09-12 14:16:11 -07001096
1097
Fang Dengaeab6172014-05-07 17:17:04 -07001098 def get_buildbot_link_reason(self):
1099 """Generate the buildbot link reason for the test.
1100
1101 @returns: A string representing the reason.
1102
1103 """
1104 return ('%s: %s' % (self.view['status'], self.view['reason'])
1105 if self.view['reason'] else self.view['status'])
1106
1107
1108 def get_job_id_owner_str(self):
1109 """Generate the job_id_owner string for a test.
1110
1111 @returns: A string which looks like 135036-username
Fang Dengaeab6172014-05-07 17:17:04 -07001112 """
Aviv Keshetdb06d192019-06-26 17:53:24 -07001113 # self.user is actually the user that is executing this run_suite
1114 # call, which is not necessarily the same as the job-creating user.
1115 # The job creating user is available as a keyval; but fall back to
1116 # self.user in case that key is missing.
1117 job_user = self.view['job_keyvals'].get('user') or self.user
1118 return '%s-%s' % (self.view['afe_job_id'], job_user)
Fang Dengaeab6172014-05-07 17:17:04 -07001119
1120
1121 def get_bug_info(self, suite_job_keyvals):
1122 """Get the bug info from suite_job_keyvals.
1123
1124 If a bug has been filed for the test, its bug info (bug id and counts)
1125 will be stored in the suite job's keyvals. This method attempts to
1126 retrieve bug info of the test from |suite_job_keyvals|. It will return
1127 None if no bug info is found. No need to check bug info if the view is
Shuqian Zhaoc085abb2016-02-24 11:27:26 -08001128 SUITE_JOB.
Fang Dengaeab6172014-05-07 17:17:04 -07001129
1130 @param suite_job_keyvals: The job keyval dictionary of the suite job.
1131 All the bug info about child jobs are stored in
1132 suite job's keyvals.
1133
1134 @returns: None if there is no bug info, or a pair with the
1135 id of the bug, and the count of the number of
1136 times the bug has been seen.
1137
1138 """
Shuqian Zhaoc085abb2016-02-24 11:27:26 -08001139 if self.get_testname() == self.SUITE_JOB:
Fang Dengaeab6172014-05-07 17:17:04 -07001140 return None
1141 if (self.view['test_name'].startswith('SERVER_JOB') or
1142 self.view['test_name'].startswith('CLIENT_JOB')):
1143 # Append job name as a prefix for SERVER_JOB and CLIENT_JOB
1144 testname= '%s_%s' % (self.view['job_name'], self.view['test_name'])
1145 else:
1146 testname = self.view['test_name']
1147
1148 return tools.get_test_failure_bug_info(
1149 suite_job_keyvals, self.view['afe_job_id'],
1150 testname)
1151
1152
1153 def should_display_buildbot_link(self):
1154 """Check whether a buildbot link should show for this view.
1155
Shuqian Zhaoc085abb2016-02-24 11:27:26 -08001156 For suite job view, show buildbot link if it fails.
Fang Dengf8503532014-06-12 18:21:55 -07001157 For normal test view,
1158 show buildbot link if it is a retry
1159 show buildbot link if it hits its own timeout.
1160 show buildbot link if it fails. This doesn't
1161 include the case where it was aborted but has
1162 not hit its own timeout (most likely it was aborted because
1163 suite has timed out).
Fang Dengaeab6172014-05-07 17:17:04 -07001164
1165 @returns: True if we should show the buildbot link.
1166 False otherwise.
1167 """
1168 is_bad_status = (self.view['status'] != 'GOOD' and
1169 self.view['status'] != 'TEST_NA')
Shuqian Zhaoc085abb2016-02-24 11:27:26 -08001170 if self.get_testname() == self.SUITE_JOB:
Fang Dengf8503532014-06-12 18:21:55 -07001171 return is_bad_status
1172 else:
1173 if self.is_retry():
1174 return True
1175 if is_bad_status:
1176 return not self.is_aborted() or self.hit_timeout()
Fang Dengaeab6172014-05-07 17:17:04 -07001177
1178
Shuqian Zhao2fecacd2015-08-05 22:56:30 -07001179 def get_control_file_attributes(self):
1180 """Get the attributes from the control file of the test.
1181
1182 @returns: A list of test attribute or None.
1183 """
1184 control_file = self.afe_job.control_file
1185 attributes = None
1186 if control_file:
1187 cd = control_data.parse_control_string(control_file)
1188 attributes = list(cd.attributes)
1189 return attributes
1190
1191
David Rileydcd1a642017-03-01 23:15:08 -08001192 def override_afe_job_id(self, afe_job_id):
1193 """Overrides the AFE job id for the test.
1194
1195 @param afe_job_id: The new AFE job id to use.
1196 """
1197 self.view['afe_job_id'] = afe_job_id
1198
1199
Allen Lidc2c69a2016-09-14 19:05:47 -07001200def log_buildbot_links(log_func, links):
1201 """Output buildbot links to log.
1202
1203 @param log_func: Logging function to use.
1204 @param links: Iterable of LogLink instances.
1205 """
1206 for link in links:
1207 for generated_link in link.GenerateBuildbotLinks():
1208 log_func(generated_link)
Shuhei Takahashi18f56492017-11-14 16:23:46 +09001209 retry_link = link.GenerateRetryLink()
1210 if retry_link:
1211 log_func(retry_link)
1212 history_link = link.GenerateHistoryLink()
1213 if history_link:
1214 log_func(history_link)
Allen Lidc2c69a2016-09-14 19:05:47 -07001215
1216
Allen Li0b675b62017-07-05 13:38:04 -07001217class _ReturnCodeComputer(object):
Allen Li422f24b2017-07-12 15:15:43 -07001218 """This is responsible for returning the _ReturnResult for a suite."""
Allen Li0b675b62017-07-05 13:38:04 -07001219
1220 def __call__(self, test_views):
1221 """Compute the exit code based on test results."""
Allen Licc205492017-07-10 17:26:04 -07001222 result = _RETURN_RESULTS['ok']
Allen Li0b675b62017-07-05 13:38:04 -07001223
1224 for v in test_views:
Allen Li0b675b62017-07-05 13:38:04 -07001225 if v.get_testname() == TestView.SUITE_JOB:
Allen Li422f24b2017-07-12 15:15:43 -07001226 result |= self._get_suite_result(v)
Allen Li0b675b62017-07-05 13:38:04 -07001227 else:
Allen Li422f24b2017-07-12 15:15:43 -07001228 result |= self._get_test_result(v)
1229 return result
1230
1231 def _get_suite_result(self, test_view):
1232 """Return the _ReturnResult for the given suite job."""
1233 # The order of checking each case is important.
1234 if test_view.is_aborted() and test_view.hit_timeout():
1235 return _RETURN_RESULTS['suite_timeout']
1236 elif test_view.is_in_fail_status():
1237 return _RETURN_RESULTS['suite_failed']
1238 elif test_view['status'] == 'WARN':
1239 return _RETURN_RESULTS['suite_warning']
1240 else:
1241 return _RETURN_RESULTS['ok']
1242
1243 def _get_test_result(self, test_view):
1244 """Return the _ReturnResult for the given test job."""
1245 # The order of checking each case is important.
1246 if test_view.is_aborted() and test_view.is_relevant_suite_view():
1247 # The test was aborted before started
1248 # This gurantees that the suite has timed out.
Prathmesh Prabhu316180c2017-12-19 16:06:44 -08001249 return _RETURN_RESULTS['test_aborted_prestart']
Allen Li422f24b2017-07-12 15:15:43 -07001250 elif test_view.is_aborted() and not test_view.hit_timeout():
1251 # The test was aborted, but
1252 # not due to a timeout. This is most likely
1253 # because the suite has timed out, but may
1254 # also because it was aborted by the user.
1255 # Since suite timing out is determined by checking
1256 # the suite job view, we simply ignore this view here.
Prathmesh Prabhu316180c2017-12-19 16:06:44 -08001257 return _RETURN_RESULTS['test_aborted_mystery']
Allen Li422f24b2017-07-12 15:15:43 -07001258 elif test_view.is_in_fail_status(): # The test job failed
1259 if test_view.is_provision():
1260 return _RETURN_RESULTS['provision_failed']
1261 else:
1262 return _RETURN_RESULTS['test_failure']
1263 elif test_view['status'] == 'WARN':
1264 return _RETURN_RESULTS['test_warning']
1265 elif test_view.is_retry():
1266 # The test is a passing retry.
1267 return _RETURN_RESULTS['test_retry']
1268 else:
1269 return _RETURN_RESULTS['ok']
1270
1271
1272class _ProvisionReturnCodeComputer(_ReturnCodeComputer):
1273 """This is used for returning the _ReturnResult for provision suites."""
1274
1275 def __init__(self, num_required):
1276 """Initialize instance.
1277
1278 num_required is the number of passing provision jobs needed.
1279 """
1280 super(_ProvisionReturnCodeComputer, self).__init__()
1281 self._num_required = num_required
1282 self._num_successful = 0
1283
1284 def __call__(self, test_views):
1285 result = super(_ProvisionReturnCodeComputer, self).__call__(test_views)
1286 if self._num_successful >= self._num_required:
1287 logging.info('Return result upgraded from %r'
1288 ' due to enough ok provisions',
1289 result)
1290 return _RETURN_RESULTS['ok']
1291 else:
1292 return result
1293
1294 def _get_test_result(self, test_view):
1295 result = (super(_ProvisionReturnCodeComputer, self)
1296 ._get_test_result(test_view))
1297 if result in {_RETURN_RESULTS[s] for s in ('ok', 'test_retry')}:
1298 self._num_successful += 1
Allen Licc205492017-07-10 17:26:04 -07001299 return result
Allen Li0b675b62017-07-05 13:38:04 -07001300
1301
Fang Dengdd20e452014-04-07 15:39:47 -07001302class ResultCollector(object):
Simran Basi17ca77c2015-10-14 19:05:00 -07001303 """Collect test results of a suite or a single test run.
Fang Dengdd20e452014-04-07 15:39:47 -07001304
1305 Once a suite job has finished, use this class to collect test results.
1306 `run` is the core method that is to be called first. Then the caller
1307 could retrieve information like return code, return message, is_aborted,
1308 and timings by accessing the collector's public attributes. And output
1309 the test results and links by calling the 'output_*' methods.
1310
1311 Here is a overview of what `run` method does.
1312
1313 1) Collect the suite job's results from tko_test_view_2.
1314 For the suite job, we only pull test views without a 'subdir'.
1315 A NULL subdir indicates that the test was _not_ executed. This could be
1316 that no child job was scheduled for this test or the child job got
1317 aborted before starts running.
1318 (Note 'SERVER_JOB'/'CLIENT_JOB' are handled specially)
1319
1320 2) Collect the child jobs' results from tko_test_view_2.
1321 For child jobs, we pull all the test views associated with them.
Allen Lidc2c69a2016-09-14 19:05:47 -07001322 (Note 'SERVER_JOB'/'CLIENT_JOB' are handled specially)
Fang Dengdd20e452014-04-07 15:39:47 -07001323
Fang Dengaeab6172014-05-07 17:17:04 -07001324 3) Generate web and buildbot links.
Fang Dengdd20e452014-04-07 15:39:47 -07001325 4) Compute timings of the suite run.
1326 5) Compute the return code based on test results.
1327
1328 @var _instance_server: The hostname of the server that is used
1329 to service the suite.
1330 @var _afe: The afe rpc client.
1331 @var _tko: The tko rpc client.
1332 @var _build: The build for which the suite is run,
1333 e.g. 'lumpy-release/R35-5712.0.0'
1334 @var _suite_name: The suite name, e.g. 'bvt', 'dummy'.
1335 @var _suite_job_id: The job id of the suite for which we are going to
1336 collect results.
Shuqian Zhaof39bf2a2015-09-29 14:19:28 -07001337 @var _original_suite_name: The suite name we record timing would be
1338 different from _suite_name when running
1339 suite_attr_wrapper.
Allen Li0b675b62017-07-05 13:38:04 -07001340 @var _return_code_function: Called to return what the overall result of
1341 the suite is.
Fang Dengaeab6172014-05-07 17:17:04 -07001342 @var _suite_views: A list of TestView objects, representing relevant
1343 test views of the suite job.
1344 @var _child_views: A list of TestView objects, representing test views
1345 of the child jobs.
1346 @var _test_views: A list of TestView objects, representing all test views
1347 from _suite_views and _child_views.
Fang Dengdd20e452014-04-07 15:39:47 -07001348 @var _web_links: A list of web links pointing to the results of jobs.
Allen Li28be0642017-07-10 15:16:26 -07001349 @var buildbot_links: A list of buildbot links for non-passing tests.
Simran Basi17ca77c2015-10-14 19:05:00 -07001350 @var _solo_test_run: True if this is a single test run.
Allen Licc205492017-07-10 17:26:04 -07001351 @var return_result: The _ReturnResult of the suite run.
Fang Dengdd20e452014-04-07 15:39:47 -07001352 @var is_aborted: Whether the suite was aborted or not.
1353 True, False or None (aborting status is unknown yet)
1354 @var timings: A Timing object that records the suite's timings.
1355
1356 """
1357
1358
Richard Barnetteed5115a2018-09-12 17:12:54 -07001359 def __init__(self, instance_server, afe, tko, build,
1360 suite_name, suite_job_id, return_code_function,
Allen Li0b675b62017-07-05 13:38:04 -07001361 original_suite_name=None,
Simran Basi17ca77c2015-10-14 19:05:00 -07001362 user=None, solo_test_run=False):
Fang Dengdd20e452014-04-07 15:39:47 -07001363 self._instance_server = instance_server
1364 self._afe = afe
1365 self._tko = tko
1366 self._build = build
1367 self._suite_name = suite_name
1368 self._suite_job_id = suite_job_id
Shuqian Zhaof39bf2a2015-09-29 14:19:28 -07001369 self._original_suite_name = original_suite_name or suite_name
Allen Li0b675b62017-07-05 13:38:04 -07001370 self._return_code_function = return_code_function
Fang Deng0454e632014-04-07 15:39:47 -07001371 self._suite_views = []
1372 self._child_views = []
Fang Dengdd20e452014-04-07 15:39:47 -07001373 self._test_views = []
Fang Dengaeab6172014-05-07 17:17:04 -07001374 self._retry_counts = {}
David Rileydcd1a642017-03-01 23:15:08 -08001375 self._missing_results = {}
Fang Dengdd20e452014-04-07 15:39:47 -07001376 self._web_links = []
Allen Li28be0642017-07-10 15:16:26 -07001377 self.buildbot_links = []
MK Ryu977a9752014-10-21 11:58:09 -07001378 self._num_child_jobs = 0
Allen Licc205492017-07-10 17:26:04 -07001379 self.return_result = None
Fang Dengdd20e452014-04-07 15:39:47 -07001380 self.is_aborted = None
1381 self.timings = None
Simran Basi01984f52015-10-12 15:36:45 -07001382 self._user = user or getpass.getuser()
Simran Basi17ca77c2015-10-14 19:05:00 -07001383 self._solo_test_run = solo_test_run
Fang Dengdd20e452014-04-07 15:39:47 -07001384
1385
Fang Dengdd20e452014-04-07 15:39:47 -07001386 def _fetch_relevant_test_views_of_suite(self):
1387 """Fetch relevant test views of the suite job.
1388
1389 For the suite job, there will be a test view for SERVER_JOB, and views
Allen Lidc2c69a2016-09-14 19:05:47 -07001390 for results of its child jobs. For example, assume we've created
Fang Dengdd20e452014-04-07 15:39:47 -07001391 a suite job (afe_job_id: 40) that runs dummy_Pass, dummy_Fail,
1392 dummy_Pass.bluetooth. Assume dummy_Pass was aborted before running while
1393 dummy_Path.bluetooth got TEST_NA as no duts have bluetooth.
1394 So the suite job's test views would look like
1395 _____________________________________________________________________
1396 test_idx| job_idx|test_name |subdir |afe_job_id|status
1397 10 | 1000 |SERVER_JOB |---- |40 |GOOD
1398 11 | 1000 |dummy_Pass |NULL |40 |ABORT
1399 12 | 1000 |dummy_Fail.Fail |41-onwer/...|40 |FAIL
1400 13 | 1000 |dummy_Fail.Error |42-owner/...|40 |ERROR
1401 14 | 1000 |dummy_Pass.bluetooth|NULL |40 |TEST_NA
1402
1403 For a suite job, we only care about
1404 a) The test view for the suite job's SERVER_JOB
1405 b) The test views for real tests without a subdir. A NULL subdir
1406 indicates that a test didn't get executed.
1407 So, for the above example, we only keep test views whose test_idxs
1408 are 10, 11, 14.
1409
Fang Dengaeab6172014-05-07 17:17:04 -07001410 @returns: A list of TestView objects, representing relevant
1411 test views of the suite job.
Fang Dengdd20e452014-04-07 15:39:47 -07001412
1413 """
Fang Dengf8503532014-06-12 18:21:55 -07001414 suite_job = self._afe.get_jobs(id=self._suite_job_id)[0]
Fang Deng0454e632014-04-07 15:39:47 -07001415 views = self._tko.run(call='get_detailed_test_views',
1416 afe_job_id=self._suite_job_id)
Fang Dengdd20e452014-04-07 15:39:47 -07001417 relevant_views = []
1418 for v in views:
Simran Basi17ca77c2015-10-14 19:05:00 -07001419 v = TestView(v, suite_job, self._suite_name, self._build, self._user,
1420 solo_test_run=self._solo_test_run)
Fang Dengaeab6172014-05-07 17:17:04 -07001421 if v.is_relevant_suite_view():
David Rileydcd1a642017-03-01 23:15:08 -08001422 # If the test doesn't have results in TKO and is being
1423 # displayed in the suite view instead of the child view,
1424 # then afe_job_id is incorrect and from the suite.
1425 # Override it based on the AFE job id which was missing
1426 # results.
1427 # TODO: This is likely inaccurate if a test has multiple
1428 # tries which all fail TKO parse stage.
1429 if v['test_name'] in self._missing_results:
1430 v.override_afe_job_id(
1431 self._missing_results[v['test_name']][0])
Fang Dengdd20e452014-04-07 15:39:47 -07001432 relevant_views.append(v)
Fang Dengdd20e452014-04-07 15:39:47 -07001433 return relevant_views
1434
1435
Fang Dengaeab6172014-05-07 17:17:04 -07001436 def _compute_retry_count(self, view):
1437 """Return how many times the test has been retried.
1438
1439 @param view: A TestView instance.
1440 @returns: An int value indicating the retry count.
1441
1442 """
1443 old_job = view['job_keyvals'].get('retry_original_job_id')
1444 count = 0
1445 while old_job:
1446 count += 1
1447 views = self._tko.run(
1448 call='get_detailed_test_views', afe_job_id=old_job)
1449 old_job = (views[0]['job_keyvals'].get('retry_original_job_id')
1450 if views else None)
1451 return count
1452
1453
Simran Basi17ca77c2015-10-14 19:05:00 -07001454 def _fetch_test_views_of_child_jobs(self, jobs=None):
Fang Dengdd20e452014-04-07 15:39:47 -07001455 """Fetch test views of child jobs.
1456
David Rileydcd1a642017-03-01 23:15:08 -08001457 @returns: A tuple (child_views, retry_counts, missing_results)
Fang Dengaeab6172014-05-07 17:17:04 -07001458 child_views is list of TestView objects, representing
David Rileydcd1a642017-03-01 23:15:08 -08001459 all valid views.
1460 retry_counts is a dictionary that maps test_idx to retry
1461 counts. It only stores retry counts that are greater than 0.
1462 missing_results is a dictionary that maps test names to
1463 lists of job ids.
Fang Deng0454e632014-04-07 15:39:47 -07001464
Fang Dengdd20e452014-04-07 15:39:47 -07001465 """
Fang Dengdd20e452014-04-07 15:39:47 -07001466 child_views = []
Fang Dengaeab6172014-05-07 17:17:04 -07001467 retry_counts = {}
David Rileydcd1a642017-03-01 23:15:08 -08001468 missing_results = {}
Simran Basi17ca77c2015-10-14 19:05:00 -07001469 child_jobs = jobs or self._afe.get_jobs(parent_job_id=self._suite_job_id)
MK Ryu977a9752014-10-21 11:58:09 -07001470 if child_jobs:
1471 self._num_child_jobs = len(child_jobs)
Fang Dengf8503532014-06-12 18:21:55 -07001472 for job in child_jobs:
Simran Basi01984f52015-10-12 15:36:45 -07001473 views = [TestView(v, job, self._suite_name, self._build, self._user)
Fang Dengaeab6172014-05-07 17:17:04 -07001474 for v in self._tko.run(
Fang Dengf8503532014-06-12 18:21:55 -07001475 call='get_detailed_test_views', afe_job_id=job.id,
Fang Dengaeab6172014-05-07 17:17:04 -07001476 invalid=0)]
David Rileydcd1a642017-03-01 23:15:08 -08001477 if len(views) == 0:
1478 missing_results.setdefault(job.name, []).append(job.id)
Fang Dengdd20e452014-04-07 15:39:47 -07001479 contains_test_failure = any(
Fang Dengaeab6172014-05-07 17:17:04 -07001480 v.is_test() and v['status'] != 'GOOD' for v in views)
Fang Dengdd20e452014-04-07 15:39:47 -07001481 for v in views:
Fang Dengaeab6172014-05-07 17:17:04 -07001482 if (v.is_test() or
1483 v['status'] != 'GOOD' and not contains_test_failure):
1484 # For normal test view, just keep it.
1485 # For SERVER_JOB or CLIENT_JOB, only keep it
1486 # if it fails and no other test failure.
Fang Dengdd20e452014-04-07 15:39:47 -07001487 child_views.append(v)
Fang Dengaeab6172014-05-07 17:17:04 -07001488 retry_count = self._compute_retry_count(v)
1489 if retry_count > 0:
1490 retry_counts[v['test_idx']] = retry_count
David Rileydcd1a642017-03-01 23:15:08 -08001491 return child_views, retry_counts, missing_results
Fang Dengdd20e452014-04-07 15:39:47 -07001492
1493
1494 def _generate_web_and_buildbot_links(self):
1495 """Generate web links and buildbot links."""
1496 # TODO(fdeng): If a job was aborted before it reaches Running
1497 # state, we read the test view from the suite job
1498 # and thus this method generates a link pointing to the
1499 # suite job's page for the aborted job. Need a fix.
1500 self._web_links = []
Allen Li28be0642017-07-10 15:16:26 -07001501 self.buildbot_links = []
Aviv Kesheta336b9f2018-01-26 11:23:11 -08001502
Fang Dengdd20e452014-04-07 15:39:47 -07001503 # Bug info are stored in the suite job's keyvals.
Simran Basi17ca77c2015-10-14 19:05:00 -07001504 if self._solo_test_run:
1505 suite_job_keyvals = {}
Aviv Kesheta336b9f2018-01-26 11:23:11 -08001506 elif not self._suite_views:
1507 suite_job_keyvals = {}
Simran Basi17ca77c2015-10-14 19:05:00 -07001508 else:
1509 suite_job_keyvals = self._suite_views[0]['job_keyvals']
Aviv Kesheta336b9f2018-01-26 11:23:11 -08001510
Fang Dengdd20e452014-04-07 15:39:47 -07001511 for v in self._test_views:
Fang Dengaeab6172014-05-07 17:17:04 -07001512 retry_count = self._retry_counts.get(v['test_idx'], 0)
1513 bug_info = v.get_bug_info(suite_job_keyvals)
1514 job_id_owner = v.get_job_id_owner_str()
Fang Dengdd20e452014-04-07 15:39:47 -07001515 link = LogLink(
Allen Li34613242016-09-02 11:52:34 -07001516 anchor=v.get_testname(),
Fang Dengdd20e452014-04-07 15:39:47 -07001517 server=self._instance_server,
1518 job_string=job_id_owner,
Simran Basi7203d4e2015-02-03 15:50:18 -08001519 bug_info=bug_info, retry_count=retry_count,
Dan Shi9b620c22017-10-10 10:58:37 -07001520 testname=v.get_testname(),
1521 sponge_url=suite_job_keyvals.get('sponge_url'))
Fang Dengdd20e452014-04-07 15:39:47 -07001522 self._web_links.append(link)
1523
Fang Dengaeab6172014-05-07 17:17:04 -07001524 if v.should_display_buildbot_link():
1525 link.reason = v.get_buildbot_link_reason()
Allen Li28be0642017-07-10 15:16:26 -07001526 self.buildbot_links.append(link)
Fang Dengdd20e452014-04-07 15:39:47 -07001527
1528
1529 def _record_timings(self):
1530 """Record suite timings."""
1531 self.timings = Timings(self._suite_job_id)
1532 for v in self._test_views:
1533 self.timings.RecordTiming(v)
1534
1535
1536 def _compute_return_code(self):
1537 """Compute the exit code based on test results."""
Allen Licc205492017-07-10 17:26:04 -07001538 self.return_result = self._return_code_function(self._test_views)
Fang Dengdd20e452014-04-07 15:39:47 -07001539
1540
Allen Li34613242016-09-02 11:52:34 -07001541 def _make_test_results(self):
1542 """Make TestResults for collected tests.
1543
1544 @returns: List of TestResult instances.
1545 """
1546 test_results = []
1547 for test_view in self._test_views:
1548 test_result = TestResult(
1549 test_view=test_view,
1550 retry_count=self._retry_counts.get(test_view['test_idx'], 0))
1551 test_results.append(test_result)
1552 return test_results
1553
1554
Fang Dengdd20e452014-04-07 15:39:47 -07001555 def output_results(self):
1556 """Output test results, timings and web links."""
1557 # Output test results
Allen Li34613242016-09-02 11:52:34 -07001558 test_results = self._make_test_results()
Brian Norrisc7575d32017-09-25 17:08:19 -07001559 if len(test_results) == 0:
1560 max_name_length = 0
1561 else:
1562 max_name_length = max(len(t.name) for t in test_results)
Allen Li34613242016-09-02 11:52:34 -07001563 for test_result in test_results:
1564 test_result.log_using(logging.info, max_name_length + 3)
Fang Dengdd20e452014-04-07 15:39:47 -07001565 # Output suite timings
1566 logging.info(self.timings)
1567 # Output links to test logs
1568 logging.info('\nLinks to test logs:')
1569 for link in self._web_links:
Allen Lie082ced2016-09-14 15:19:20 -07001570 logging.info(link.text_link)
Fang Deng5a43be62014-05-07 17:17:04 -07001571 logging.info('\n')
Fang Dengdd20e452014-04-07 15:39:47 -07001572
1573
Shuqian Zhao2fecacd2015-08-05 22:56:30 -07001574 def get_results_dict(self):
1575 """Write test results, timings and web links into a dict.
1576
1577 @returns: A dict of results in the format like:
1578 {
1579 'tests': {
Aviv Keshet5f8da9d2019-06-26 17:04:05 -07001580 'test_1': {'status': 'GOOD', 'attributes': [1,2], ...}
1581 'test_2': {'status': 'FAIL', 'attributes': [1],...}
Shuqian Zhao2fecacd2015-08-05 22:56:30 -07001582 }
1583 'suite_timings': {
1584 'download_start': '1998-07-17 00:00:00',
1585 'payload_download_end': '1998-07-17 00:00:05',
1586 ...
1587 }
1588 }
1589 """
1590 output_dict = {}
1591 tests_dict = output_dict.setdefault('tests', {})
1592 for v in self._test_views:
Shuqian Zhaofae149c2017-01-30 16:46:53 -08001593 test_name = v.get_testname()
1594 test_info = tests_dict.setdefault(test_name, {})
1595 test_info.update({
1596 'status': v['status'],
1597 'attributes': v.get_control_file_attributes() or list(),
1598 'reason': v['reason'],
1599 'retry_count': self._retry_counts.get(v['test_idx'], 0),
Aviv Keshet5f8da9d2019-06-26 17:04:05 -07001600 'job_id': v['afe_job_id'],
Shuqian Zhaofae149c2017-01-30 16:46:53 -08001601 })
1602 # For aborted test, the control file will not be parsed and thus
1603 # fail to get the attributes info. Therefore, the subsystems the
1604 # abort test testing will be missing. For this case, we will assume
1605 # the aborted test will test all subsystems, set subsystem:default.
1606 if (test_info['status'] == 'ABORT' and
1607 not any('subsystem:' in a for a in test_info['attributes'])):
1608 test_info['attributes'].append('subsystem:default')
Shuqian Zhao2fecacd2015-08-05 22:56:30 -07001609
1610 # Write the links to test logs into the |tests_dict| of |output_dict|.
1611 # For test whose status is not 'GOOD', the link is also buildbot_link.
1612 for link in self._web_links:
Shuqian Zhaofae149c2017-01-30 16:46:53 -08001613 test_name = link.anchor.strip()
1614 test_info = tests_dict.get(test_name)
1615 if test_info:
1616 test_info['link_to_logs'] = link.url
Dan Shi9b620c22017-10-10 10:58:37 -07001617 test_info['sponge_url'] = link.sponge_url
Shuhei Takahashi18f56492017-11-14 16:23:46 +09001618 # Write the retry dashboard link into the dict.
Allen Li28be0642017-07-10 15:16:26 -07001619 if link in self.buildbot_links and link.testname:
Shuhei Takahashi18f56492017-11-14 16:23:46 +09001620 test_info['retry_dashboard_link'] \
Shuqian Zhaofae149c2017-01-30 16:46:53 -08001621 = reporting_utils.link_retry_url(link.testname)
Shuhei Takahashi18f56492017-11-14 16:23:46 +09001622 # Always write the wmatrix link for compatibility.
1623 test_info['wmatrix_link'] \
1624 = reporting_utils.link_wmatrix_retry_url(link.testname)
Shuqian Zhaofae149c2017-01-30 16:46:53 -08001625 # Write the bug url into the dict.
1626 if link.bug_id:
1627 test_info['bug_url'] = link.bug_url
Shuqian Zhao2fecacd2015-08-05 22:56:30 -07001628
1629 # Write the suite timings into |output_dict|
Allen Li2c5d44b2016-08-15 17:58:58 -07001630 timings = self.timings
1631 if timings is not None:
1632 time_dict = output_dict.setdefault('suite_timings', {})
1633 time_dict.update({
1634 'download_start' : str(timings.download_start_time),
1635 'payload_download_end' : str(timings.payload_end_time),
1636 'suite_start' : str(timings.suite_start_time),
1637 'artifact_download_end' : str(timings.artifact_end_time),
1638 'tests_start' : str(timings.tests_start_time),
1639 'tests_end' : str(timings.tests_end_time),
1640 })
Shuqian Zhao2fecacd2015-08-05 22:56:30 -07001641
1642 output_dict['suite_job_id'] = self._suite_job_id
1643
1644 return output_dict
1645
1646
Fang Dengdd20e452014-04-07 15:39:47 -07001647 def run(self):
1648 """Collect test results.
1649
1650 This method goes through the following steps:
1651 Fetch relevent test views of the suite job.
1652 Fetch test views of child jobs
1653 Check whether the suite was aborted.
Fang Dengaeab6172014-05-07 17:17:04 -07001654 Generate links.
Fang Dengdd20e452014-04-07 15:39:47 -07001655 Calculate suite timings.
1656 Compute return code based on the test result.
1657
1658 """
Simran Basi17ca77c2015-10-14 19:05:00 -07001659 if self._solo_test_run:
Allen Li29f5e242017-07-10 15:00:57 -07001660 self._test_views, self._retry_counts, self._missing_results = (
Simran Basi17ca77c2015-10-14 19:05:00 -07001661 self._fetch_test_views_of_child_jobs(
1662 jobs=self._afe.get_jobs(id=self._suite_job_id)))
1663 else:
David Rileydcd1a642017-03-01 23:15:08 -08001664 self._child_views, self._retry_counts, self._missing_results = (
Simran Basi17ca77c2015-10-14 19:05:00 -07001665 self._fetch_test_views_of_child_jobs())
David Rileydcd1a642017-03-01 23:15:08 -08001666 self._suite_views = self._fetch_relevant_test_views_of_suite()
Simran Basi17ca77c2015-10-14 19:05:00 -07001667 self._test_views = self._suite_views + self._child_views
Fang Dengdd20e452014-04-07 15:39:47 -07001668 # For hostless job in Starting status, there is no test view associated.
1669 # This can happen when a suite job in Starting status is aborted. When
1670 # the scheduler hits some limit, e.g., max_hostless_jobs_per_drone,
1671 # max_jobs_started_per_cycle, a suite job can stays in Starting status.
1672 if not self._test_views:
Allen Licc205492017-07-10 17:26:04 -07001673 self.return_result = _RETURN_RESULTS['test_views_missing']
Fang Dengdd20e452014-04-07 15:39:47 -07001674 return
1675 self.is_aborted = any([view['job_keyvals'].get('aborted_by')
1676 for view in self._suite_views])
Fang Dengdd20e452014-04-07 15:39:47 -07001677 self._generate_web_and_buildbot_links()
1678 self._record_timings()
1679 self._compute_return_code()
1680
1681
MK Ryu977a9752014-10-21 11:58:09 -07001682 def gather_timing_stats(self):
1683 """Collect timing related statistics."""
MK Ryu977a9752014-10-21 11:58:09 -07001684 # Record suite runtime in metadata db.
Prathmesh Prabhua3713a02015-03-11 13:50:55 -07001685 # Some failure modes can leave times unassigned, report sentinel value
1686 # in that case.
1687 runtime_in_secs = -1
1688 if (self.timings.tests_end_time is not None and
1689 self.timings.suite_start_time is not None):
Dan Shi0723bf52015-06-24 10:52:38 -07001690 runtime_in_secs = (self.timings.tests_end_time -
1691 self.timings.suite_start_time).total_seconds()
Prathmesh Prabhua3713a02015-03-11 13:50:55 -07001692
MK Ryu977a9752014-10-21 11:58:09 -07001693
Richard Barnetteae9eaa42018-09-12 10:28:07 -07001694def _make_child_dependencies(options):
Prathmesh Prabhu9b8e7ad2017-10-30 14:26:13 -07001695 """Creates a list of extra dependencies for child jobs.
1696
1697 @param options: Parsed arguments to run_suite.
1698
1699 @returns: A list of label strings if any dependencies should be added. None
1700 otherwise.
1701 """
1702 if not options.model:
1703 return ()
Richard Barnetteae9eaa42018-09-12 10:28:07 -07001704 return ('model:%s' % options.model,)
Prathmesh Prabhu9b8e7ad2017-10-30 14:26:13 -07001705
1706
Allen Li0716efa2016-12-08 13:51:31 -08001707@retry.retry(error.StageControlFileFailure, timeout_min=10)
1708def create_suite(afe, options):
1709 """Create a suite with retries.
1710
1711 @param afe: The afe object to insert the new suite job into.
1712 @param options: The options to use in creating the suite.
1713
1714 @return: The afe_job_id of the new suite job.
1715 """
Prashanth B6285f6a2014-05-08 18:01:27 -07001716 logging.info('%s Submitted create_suite_job rpc',
1717 diagnosis_utils.JobTimer.format_time(datetime.now()))
Allen Li02b46c52017-09-11 11:48:12 -07001718
Allen Li53121702016-12-08 12:50:22 -08001719 return afe.run(
1720 'create_suite_job',
1721 name=options.name,
1722 board=options.board,
Xixuan Wu7cc10e52018-04-25 17:04:51 -07001723 builds=suite_common.make_builds_from_options(options),
Allen Li53121702016-12-08 12:50:22 -08001724 test_source_build=options.test_source_build,
Allen Li0fd08892016-12-08 13:47:38 -08001725 check_hosts=not options.no_wait,
Allen Li53121702016-12-08 12:50:22 -08001726 pool=options.pool,
Allen Lid3758d42016-12-08 13:46:17 -08001727 file_bugs=options.file_bugs,
Allen Li603728a2016-12-08 13:58:11 -08001728 priority=options.priority,
Allen Li53121702016-12-08 12:50:22 -08001729 suite_args=options.suite_args,
Allen Li0fd08892016-12-08 13:47:38 -08001730 wait_for_results=not options.no_wait,
Allen Li53121702016-12-08 12:50:22 -08001731 timeout_mins=options.timeout_mins + options.delay_minutes,
1732 max_runtime_mins=options.max_runtime_mins + options.delay_minutes,
1733 job_retry=options.retry,
1734 max_retries=options.max_retries,
1735 suite_min_duts=options.suite_min_duts,
Allen Li40599a32016-12-08 13:23:35 -08001736 offload_failures_only=options.offload_failures_only,
Allen Li53121702016-12-08 12:50:22 -08001737 run_prod_code=options.run_prod_code,
1738 delay_minutes=options.delay_minutes,
Shuqian Zhao843ae5c72017-02-22 11:25:01 -08001739 job_keyvals=options.job_keyvals,
Shuqian Zhaoed0da862017-03-06 14:47:13 -08001740 test_args=options.test_args,
Richard Barnetteae9eaa42018-09-12 10:28:07 -07001741 child_dependencies=options.child_dependencies,
Allen Li53121702016-12-08 12:50:22 -08001742 )
Prashanth B6285f6a2014-05-08 18:01:27 -07001743
1744
Allen Li5e9c35f2017-07-05 14:24:18 -07001745def _run_suite(options):
Aviv Keshet1480c4a2013-03-21 16:38:31 -07001746 """
Shuqian Zhao2fecacd2015-08-05 22:56:30 -07001747 run_suite script without exception handling.
Shuqian Zhaod2351072015-08-06 01:48:23 +00001748
Shuqian Zhao2fecacd2015-08-05 22:56:30 -07001749 @param options: The parsed options.
1750
1751 @returns: A tuple contains the return_code of run_suite and the dictionary
1752 of the output.
1753
1754 """
Shuqian Zhaoab1bedc2015-06-02 11:12:28 -07001755 # If indicate to use the new style suite control file, convert the args
1756 if options.use_suite_attr:
1757 options = change_options_for_suite_attr(options)
1758
xixuan99eba0b2017-07-12 15:10:01 -07001759 log_name = _get_log_name(options)
1760 utils.setup_logging(logfile=log_name)
Alex Miller88762a82013-09-04 15:41:28 -07001761
John Carey1425d292016-09-30 15:25:09 -07001762 if not options.bypass_labstatus and not options.web:
Fang Deng6197da32014-09-25 10:18:48 -07001763 utils.check_lab_status(options.build)
xixuanae791b12017-06-29 15:40:19 -07001764
1765 afe = _create_afe(options)
1766 instance_server = afe.server
Chris Masone359c0fd2012-03-13 15:18:59 -07001767
Dan Shi20952c12014-05-14 17:07:38 -07001768 rpc_helper = diagnosis_utils.RPCHelper(afe)
Fang Deng6865aab2015-02-20 14:49:47 -08001769 is_real_time = True
Chris Masone986459e2012-04-11 11:36:48 -07001770 if options.mock_job_id:
1771 job_id = int(options.mock_job_id)
Fang Deng6865aab2015-02-20 14:49:47 -08001772 existing_job = afe.get_jobs(id=job_id, finished=True)
1773 if existing_job:
1774 is_real_time = False
1775 else:
1776 existing_job = afe.get_jobs(id=job_id)
1777 if existing_job:
1778 job_created_on = time_utils.date_string_to_epoch_time(
1779 existing_job[0].created_on)
1780 else:
1781 raise utils.TestLabException('Failed to retrieve job: %d' % job_id)
Chris Masone986459e2012-04-11 11:36:48 -07001782 else:
Fang Deng5a43be62014-05-07 17:17:04 -07001783 try:
Richard Barnetteae9eaa42018-09-12 10:28:07 -07001784 rpc_helper.check_dut_availability(options.dependencies,
Ningning Xiaf2c206c2016-04-13 14:15:51 -07001785 options.minimum_duts,
1786 options.skip_duts_check)
Prashanth B6285f6a2014-05-08 18:01:27 -07001787 job_id = create_suite(afe, options)
Fang Deng6865aab2015-02-20 14:49:47 -08001788 job_created_on = time.time()
Fang Deng5a43be62014-05-07 17:17:04 -07001789 except (error.CrosDynamicSuiteException,
1790 error.RPCException, proxy.JSONRPCException) as e:
Allen Lic3aa7692016-08-08 11:45:00 -07001791 logging.exception('Error Message: %s', e)
Xixuan Wu888ee7a2018-04-24 10:27:27 -07001792 return run_suite_common.SuiteResult(
1793 run_suite_common.RETURN_CODES.INFRA_FAILURE,
1794 {'return_message': str(e)})
Xixuan Wu610606b2017-11-13 14:03:33 -08001795 except AttributeError as e:
1796 logging.exception('Error Message: %s', e)
Xixuan Wu888ee7a2018-04-24 10:27:27 -07001797 return run_suite_common.SuiteResult(
1798 run_suite_common.RETURN_CODES.INVALID_OPTIONS)
Fang Deng5a43be62014-05-07 17:17:04 -07001799
Prashanth B923ca262014-03-14 12:36:29 -07001800 job_timer = diagnosis_utils.JobTimer(
Fang Deng6865aab2015-02-20 14:49:47 -08001801 job_created_on, float(options.timeout_mins))
Aviv Keshet9afee5e2014-10-09 16:33:09 -07001802 job_url = reporting_utils.link_job(job_id,
1803 instance_server=instance_server)
Xixuan Wuf7645262019-03-26 13:23:49 -07001804 _log_create_task(job_timer, job_url, job_id)
Aviv Keshetdb321de2015-04-10 19:09:58 -07001805
1806 if options.create_and_return:
Shuqian Zhao2fecacd2015-08-05 22:56:30 -07001807 msg = '--create_and_return was specified, terminating now.'
1808 logging.info(msg)
Xixuan Wu888ee7a2018-04-24 10:27:27 -07001809 return run_suite_common.SuiteResult(
1810 run_suite_common.RETURN_CODES.OK,
1811 {'return_message': msg})
Aviv Keshetdb321de2015-04-10 19:09:58 -07001812
Allen Li93f4db52016-09-14 14:44:59 -07001813 if options.no_wait:
Allen Li340414e2016-08-16 14:19:08 -07001814 return _handle_job_nowait(job_id, options, instance_server)
Allen Li93f4db52016-09-14 14:44:59 -07001815 else:
1816 return _handle_job_wait(afe, job_id, options, job_timer, is_real_time)
Allen Li340414e2016-08-16 14:19:08 -07001817
1818
xixuan99eba0b2017-07-12 15:10:01 -07001819def _get_log_name(options):
1820 """Return local log file's name.
1821
1822 @param options: Parsed options.
1823
1824 @return log_name, a string file name.
1825 """
1826 if options.require_logfile:
1827 # options.build is verified to exist in verify_options.
1828 # convert build name from containing / to containing only _.
1829 log_name = 'run_suite-%s.log' % options.build.replace('/', '_')
1830 log_dir = os.path.join(common.autotest_dir, 'logs')
1831 if os.path.exists(log_dir):
1832 log_name = os.path.join(log_dir, log_name)
1833
1834 return log_name
1835 else:
1836 return None
1837
1838
xixuanae791b12017-06-29 15:40:19 -07001839def _create_afe(options):
1840 """Return an afe instance based on options.
1841
1842 @param options Parsed options.
1843
1844 @return afe, an AFE instance.
1845 """
1846 instance_server = (options.web if options.web else
1847 instance_for_pool(options.pool))
1848 afe = frontend_wrappers.RetryingAFE(server=instance_server,
1849 timeout_min=options.afe_timeout_mins,
1850 delay_sec=options.delay_sec)
1851 logging.info('Autotest instance created: %s', instance_server)
1852 return afe
1853
1854
Allen Li340414e2016-08-16 14:19:08 -07001855def _handle_job_wait(afe, job_id, options, job_timer, is_real_time):
1856 """Handle suite job synchronously.
1857
1858 @param afe AFE instance.
1859 @param job_id Suite job id.
1860 @param options Parsed options.
1861 @param job_timer JobTimer for suite job.
1862 @param is_real_time Whether or not to handle job timeout.
1863
1864 @return SuiteResult of suite job.
1865 """
Allen Li340414e2016-08-16 14:19:08 -07001866 rpc_helper = diagnosis_utils.RPCHelper(afe)
1867 instance_server = afe.server
1868 while not afe.get_jobs(id=job_id, finished=True):
Allen Li425d91f2017-07-10 15:14:20 -07001869 _poke_buildbot_with_output(afe, job_id, job_timer)
Allen Li340414e2016-08-16 14:19:08 -07001870 if job_timer.debug_output_timer.poll():
1871 logging.info('The suite job has another %s till timeout.',
Allen Li425d91f2017-07-10 15:14:20 -07001872 job_timer.timeout_hours - job_timer.elapsed_time())
Allen Li340414e2016-08-16 14:19:08 -07001873 time.sleep(10)
xixuana96bd212017-01-13 12:51:22 +08001874 logging.info('%s Suite job is finished.',
1875 diagnosis_utils.JobTimer.format_time(datetime.now()))
Allen Li340414e2016-08-16 14:19:08 -07001876 # For most cases, ResultCollector should be able to determine whether
1877 # a suite has timed out by checking information in the test view.
1878 # However, occationally tko parser may fail on parsing the
1879 # job_finished time from the job's keyval file. So we add another
1880 # layer of timeout check in run_suite. We do the check right after
1881 # the suite finishes to make it as accurate as possible.
1882 # There is a minor race condition here where we might have aborted
1883 # for some reason other than a timeout, and the job_timer thinks
1884 # it's a timeout because of the jitter in waiting for results.
1885 # The consequence would be that run_suite exits with code
1886 # SUITE_TIMEOUT while it should have returned INFRA_FAILURE
1887 # instead, which should happen very rarely.
1888 # Note the timeout will have no sense when using -m option.
1889 is_suite_timeout = job_timer.is_suite_timeout()
1890
1891 # Extract the original suite name to record timing.
1892 original_suite_name = get_original_suite_name(options.name,
Allen Li425d91f2017-07-10 15:14:20 -07001893 options.suite_args)
Allen Li340414e2016-08-16 14:19:08 -07001894 # Start collecting test results.
Aseda Aboagyed72df752017-05-22 14:30:11 -07001895 logging.info('%s Start collecting test results and dump them to json.',
xixuana96bd212017-01-13 12:51:22 +08001896 diagnosis_utils.JobTimer.format_time(datetime.now()))
Alex Millerc7a59522013-10-30 15:18:57 -07001897 TKO = frontend_wrappers.RetryingTKO(server=instance_server,
Simran Basi25effe32013-11-26 13:02:11 -08001898 timeout_min=options.afe_timeout_mins,
Chris Masone8ac66712012-02-15 14:21:02 -08001899 delay_sec=options.delay_sec)
Allen Li637683b2017-11-06 17:36:27 -08001900 # TODO(crbug.com/672348): It needs to be possible for provision
1901 # suite to pass if only a few tests fail. Otherwise, a single
1902 # failing test will be reported as failure even if the suite reports
1903 # success.
Allen Li977760b2017-11-06 18:11:37 -08001904 if options.name == _PROVISION_SUITE:
Allen Li637683b2017-11-06 17:36:27 -08001905 # TODO(crbug.com/672348): Creating the suite job requires that
1906 # suite_args contains num_required.
Allen Li422f24b2017-07-12 15:15:43 -07001907 return_code_function = _ProvisionReturnCodeComputer(
1908 num_required=options.suite_args['num_required'])
1909 else:
1910 return_code_function = _ReturnCodeComputer()
Allen Li340414e2016-08-16 14:19:08 -07001911 collector = ResultCollector(instance_server=instance_server,
1912 afe=afe, tko=TKO, build=options.build,
Allen Li340414e2016-08-16 14:19:08 -07001913 suite_name=options.name,
1914 suite_job_id=job_id,
Allen Li422f24b2017-07-12 15:15:43 -07001915 return_code_function=return_code_function,
Allen Li340414e2016-08-16 14:19:08 -07001916 original_suite_name=original_suite_name)
1917 collector.run()
1918 # Dump test outputs into json.
1919 output_dict = collector.get_results_dict()
1920 output_dict['autotest_instance'] = instance_server
1921 if not options.json_dump:
1922 collector.output_results()
Allen Licc205492017-07-10 17:26:04 -07001923 result = collector.return_result
Allen Li340414e2016-08-16 14:19:08 -07001924 if is_real_time:
1925 # Do not record stats if the suite was aborted (either by a user
1926 # or through the golo rpc).
1927 # Also do not record stats if is_aborted is None, indicating
1928 # aborting status is unknown yet.
1929 if collector.is_aborted == False:
xixuana96bd212017-01-13 12:51:22 +08001930 logging.info('%s Gathering timing stats for the suite job.',
1931 diagnosis_utils.JobTimer.format_time(datetime.now()))
Allen Li340414e2016-08-16 14:19:08 -07001932 collector.gather_timing_stats()
J. Richard Barnette712eb402013-08-13 18:03:00 -07001933
Allen Li340414e2016-08-16 14:19:08 -07001934 if collector.is_aborted == True and is_suite_timeout:
1935 # There are two possible cases when a suite times out.
1936 # 1. the suite job was aborted due to timing out
1937 # 2. the suite job succeeded, but some child jobs
1938 # were already aborted before the suite job exited.
1939 # The case 2 was handled by ResultCollector,
1940 # here we handle case 1.
Allen Licc205492017-07-10 17:26:04 -07001941 result |= _RETURN_RESULTS['suite_timeout']
xixuana96bd212017-01-13 12:51:22 +08001942 logging.info('\n %s Attempting to display pool info: %s',
1943 diagnosis_utils.JobTimer.format_time(datetime.now()),
1944 options.pool)
Allen Li340414e2016-08-16 14:19:08 -07001945 try:
1946 # Add some jitter to make up for any latency in
1947 # aborting the suite or checking for results.
Allen Li0b675b62017-07-05 13:38:04 -07001948 cutoff = job_timer.timeout_hours + timedelta(hours=0.3)
Richard Barnetteae9eaa42018-09-12 10:28:07 -07001949 rpc_helper.diagnose_pool(options.dependencies, cutoff)
Allen Lid4aa2fb2016-12-08 14:03:54 -08001950 except proxy.JSONRPCException:
Allen Li340414e2016-08-16 14:19:08 -07001951 logging.warning('Unable to display pool info.')
Aviv Keshet6b1122d2016-06-20 13:29:52 -07001952
Allen Li340414e2016-08-16 14:19:08 -07001953 # And output return message.
Allen Licc205492017-07-10 17:26:04 -07001954 if result.message:
1955 logging.info('Reason: %s', result.message)
Fang Deng5a43be62014-05-07 17:17:04 -07001956
xixuana96bd212017-01-13 12:51:22 +08001957 logging.info('\n %s Output below this line is for buildbot consumption:',
1958 diagnosis_utils.JobTimer.format_time(datetime.now()))
Allen Li28be0642017-07-10 15:16:26 -07001959 log_buildbot_links(logging.info, collector.buildbot_links)
Allen Licc205492017-07-10 17:26:04 -07001960 return result.suite_result(output_dict)
Prashanth B923ca262014-03-14 12:36:29 -07001961
Allen Li340414e2016-08-16 14:19:08 -07001962
1963def _handle_job_nowait(job_id, options, instance_server):
1964 """Handle suite job asynchronously.
1965
1966 @param job_id Suite job id.
1967 @param options Parsed options.
1968 @param instance_server Autotest instance hostname.
1969
1970 @return SuiteResult of suite job.
1971 """
1972 logging.info('Created suite job: %r', job_id)
1973 link = LogLink(options.name, instance_server,
Allen Li0b675b62017-07-05 13:38:04 -07001974 '%s-%s' % (job_id, getpass.getuser()))
Allen Li340414e2016-08-16 14:19:08 -07001975 for generate_link in link.GenerateBuildbotLinks():
1976 logging.info(generate_link)
1977 logging.info('--no_wait specified; Exiting.')
Xixuan Wu888ee7a2018-04-24 10:27:27 -07001978 return run_suite_common.SuiteResult(
1979 run_suite_common.RETURN_CODES.OK,
1980 {'return_message': '--no_wait specified; Exiting.'})
Chris Masone24b80f12012-02-14 14:18:01 -08001981
Fang Dengdd20e452014-04-07 15:39:47 -07001982
xixuanae791b12017-06-29 15:40:19 -07001983def _should_run(options):
1984 """Check whether the suite should be run based on lab/job status checking.
1985
1986 @param options Parsed options.
1987 """
1988 try:
1989 site_utils.check_lab_status(options.test_source_build)
1990 except site_utils.TestLabException as ex:
1991 logging.exception('Lab is closed or build is blocked. Skipping '
1992 'suite %s, board %s, build %s: %s',
1993 options.name, options.board,
1994 options.test_source_build, str(ex))
1995 return False
1996
1997 start_time = str(datetime.now() -
1998 timedelta(days=_SEARCH_JOB_MAX_DAYS))
1999 afe = _create_afe(options)
Xixuan Wu2adcff42019-03-14 13:19:17 -07002000 afe_jobs = afe.get_jobs(
xixuanae791b12017-06-29 15:40:19 -07002001 name__istartswith=options.test_source_build,
2002 name__iendswith='control.'+options.name,
2003 created_on__gte=start_time,
2004 min_rpc_timeout=_MIN_RPC_TIMEOUT)
Xixuan Wu2adcff42019-03-14 13:19:17 -07002005 if options.model:
2006 model_tag = 'model:%s' % options.model
2007 filtered_jobs = [j for j in afe_jobs if model_tag in j.control_file]
2008 else:
2009 filtered_jobs = afe_jobs
2010
2011 if filtered_jobs:
Xixuan Wuf8ca7822017-09-07 17:28:09 -07002012 logging.info('Found duplicate suite %s scheduled in past.',
Xixuan Wu2adcff42019-03-14 13:19:17 -07002013 filtered_jobs)
Xixuan Wuf8ca7822017-09-07 17:28:09 -07002014 return False
xixuanae791b12017-06-29 15:40:19 -07002015
Xixuan Wu3ae6e8b2017-10-12 09:57:07 -07002016 return True
2017
Shuqian Zhao2fecacd2015-08-05 22:56:30 -07002018
Allen Li425d91f2017-07-10 15:14:20 -07002019def _poke_buildbot_with_output(afe, job_id, job_timer):
2020 """Poke buildbot so it doesn't timeout from silence.
2021
2022 @param afe AFE instance.
2023 @param job_id Suite job id.
2024 @param job_timer JobTimer for suite job.
2025 """
2026 rpc_helper = diagnosis_utils.RPCHelper(afe)
2027 # Note that this call logs output, preventing buildbot's
2028 # 9000 second silent timeout from kicking in. Let there be no
2029 # doubt, this is a hack. The timeout is from upstream buildbot and
2030 # this is the easiest work around.
2031 if job_timer.first_past_halftime():
2032 rpc_helper.diagnose_job(job_id, afe.server)
2033
2034
2035
Allen Li85ae5df2017-07-10 14:58:16 -07002036def _run_task(options):
Allen Li5e9c35f2017-07-05 14:24:18 -07002037 """Perform this script's function minus setup.
Aviv Keshet97bebd42017-05-24 21:02:32 -07002038
Allen Li5e9c35f2017-07-05 14:24:18 -07002039 Boilerplate like argument parsing, logging, output formatting happen
2040 elsewhere.
Allen Li85ae5df2017-07-10 14:58:16 -07002041
2042 Returns a SuiteResult instance.
2043
2044 TODO(ayatane): The try/except should be moved into _run_suite().
2045 Good luck trying to figure out which function calls are supposed to
2046 raise which of the exceptions.
Allen Li5e9c35f2017-07-05 14:24:18 -07002047 """
Fang Dengfb4a9492014-09-18 17:52:06 -07002048 try:
Allen Li85ae5df2017-07-10 14:58:16 -07002049 return _run_suite(options)
Richard Barnetteae9eaa42018-09-12 10:28:07 -07002050 except diagnosis_utils.DUTsNotAvailableError as e:
Xixuan Wu888ee7a2018-04-24 10:27:27 -07002051 result = run_suite_common.SuiteResult(
2052 run_suite_common.RETURN_CODES.BOARD_NOT_AVAILABLE,
Allen Li85ae5df2017-07-10 14:58:16 -07002053 {'return_message': 'Skipping testing: %s' % e.message})
2054 logging.info(result.output_dict['return_message'])
2055 return result
Shuqian Zhaoade6e7d2015-12-07 18:01:11 -08002056 except utils.TestLabException as e:
Xixuan Wu888ee7a2018-04-24 10:27:27 -07002057 result = run_suite_common.SuiteResult(
2058 run_suite_common.RETURN_CODES.INFRA_FAILURE,
Allen Li85ae5df2017-07-10 14:58:16 -07002059 {'return_message': 'TestLabException: %s' % e})
2060 logging.exception(result.output_dict['return_message'])
2061 return result
2062
2063
2064class _ExceptionHandler(object):
2065 """Global exception handler replacement."""
2066
2067 def __init__(self, dump_json):
2068 """Initialize instance.
2069
2070 @param dump_json: Whether to print a JSON dump of the result dict to
2071 stdout.
2072 """
2073 self._should_dump_json = dump_json
2074
2075 def __call__(self, exc_type, value, traceback):
2076 if self._should_dump_json:
Xixuan Wu888ee7a2018-04-24 10:27:27 -07002077 run_suite_common.dump_json(
2078 {'return_message': ('Unhandled run_suite exception: %s'
2079 % value)})
2080 sys.exit(run_suite_common.RETURN_CODES.INFRA_FAILURE)
Allen Li5e9c35f2017-07-05 14:24:18 -07002081
2082
Xixuan Wuf7645262019-03-26 13:23:49 -07002083def _log_create_task(job_timer, job_url, job_id):
2084 """Logging for task creation."""
2085 logging.info('%s Created suite job: %s',
2086 job_timer.format_time(job_timer.job_created_time),
2087 job_url)
2088 logging.info(annotations.StepLink(text='Link to suite', url=job_url))
2089 # For task id parsing of chromite HWTestStage.
2090 logging.info('Created task id: %s', job_id)
2091
2092
Xixuan Wuac8fe002019-04-22 11:20:20 -07002093def _if_run_in_skylab(options):
Xixuan Wu3290d222019-04-11 15:26:13 -07002094 """Detect whether to run suite in skylab.
2095
2096 Returns:
2097 A tuple of (bool, string, string) to indicate
2098 (if_use_skylab, override_pool, override_qs_account)
2099 """
Xixuan Wu2af0b062019-03-27 11:58:56 -07002100 # An autotest job id is a number of at least 9 digits, e.g. 296843118.
2101 # A skylab task id is of 16 chars, e.g. 43cabbb4e118ea10.
2102 if len(str(options.mock_job_id)) >= 16:
Xixuan Wu3290d222019-04-11 15:26:13 -07002103 # No override info is needed if mock_job_id is specified.
2104 return True, '', ''
Xixuan Wu2af0b062019-03-27 11:58:56 -07002105
Xixuan Wuc4d33662019-03-18 14:07:15 -07002106 if not _ENABLE_RUN_SUITE_TRAMPOLINE:
2107 logging.info('trampoline to skylab is not enabled.')
Xixuan Wu3290d222019-04-11 15:26:13 -07002108 return False, '', ''
Xixuan Wuc4d33662019-03-18 14:07:15 -07002109
2110 task_info = 'suite:%s, board:%s, model:%s, pool:%s' % (
2111 options.name, options.board, options.model, options.pool)
2112 ctx = gs.GSContext()
2113 with osutils.TempDir(prefix='trampoline_') as tempdir:
2114 temp_file = os.path.join(tempdir, _MIGRATION_CONFIG_FILE)
2115 ctx.Copy(_TRAMPOLINE_CONFIG, temp_file)
2116 _migration_config = config_reader.MigrationConfig(
2117 config_reader.ConfigReader(temp_file))
2118
2119 logging.info('Checking whether to run in skylab: Task(%s)', task_info)
2120 if skylab.should_run_in_skylab(_migration_config,
2121 options.board,
2122 options.model,
2123 options.name,
2124 options.pool):
2125 logging.info('Task (%s) Should run in skylab', task_info)
Xixuan Wu3290d222019-04-11 15:26:13 -07002126 override_pool, override_qs_account = skylab.get_override_info(
2127 _migration_config,
2128 options.board,
2129 options.model,
2130 options.name,
2131 options.pool)
2132 return True, override_pool, override_qs_account
Xixuan Wuc4d33662019-03-18 14:07:15 -07002133
2134 logging.info('Task (%s) Should run in autotest', task_info)
Xixuan Wu3290d222019-04-11 15:26:13 -07002135 return False, '', ''
Xixuan Wuc4d33662019-03-18 14:07:15 -07002136
2137
Xixuan Wu2af0b062019-03-27 11:58:56 -07002138def _get_skylab_suite_result(child_tasks):
2139 """Parse skylab task result to get final result for the suite.
2140
2141 @param child_tasks: A list of json dict of task result object, whose format
2142 is: {
2143 'name': ...,
2144 'state': ...,
2145 'failure': ...
2146 }.
2147 """
2148 _final_suite_states = run_suite_common.get_final_skylab_suite_states()
2149 for ct in child_tasks:
2150 logging.info('Parsing test %r', ct)
2151 state = run_suite_common.get_final_skylab_task_state(ct)
2152
2153 if (state not in run_suite_common.IGNORED_TEST_STATE and
2154 state in _final_suite_states):
2155 return _final_suite_states[state][1]
2156
2157 return run_suite_common.RETURN_CODES.OK
2158
2159
2160def _log_skylab_for_buildbot(stdout):
2161 """Output skylab logs to buildbot.
2162
2163 @param stdout: A string.
2164 """
2165 logging.info('\n %s Output below this line is for buildbot consumption:',
2166 diagnosis_utils.JobTimer.format_time(datetime.now()))
2167 logging.info(stdout)
2168
2169
Xixuan Wu07224482019-04-11 18:08:19 -07002170def _run_paygen_with_skylab(options, override_pool, override_qs_account):
2171 """Run paygen suites with skylab."""
2172 builds = suite_common.make_builds_from_options(options)
2173 skylab_tool = os.environ.get('SKYLAB_TOOL') or _SKYLAB_TOOL
2174 test_source_build = suite_common.get_test_source_build(builds)
2175 pool = ('DUT_POOL_%s' % options.pool.upper()
2176 if not override_pool else override_pool)
2177 paygen_tests = paygen.get_paygen_tests(test_source_build, options.name)
2178 for test in paygen_tests:
Aviv Keshetaf016472019-08-01 13:36:36 -07002179 cmd = [skylab_tool, 'create-test', '-bb=False']
Xixuan Wu07224482019-04-11 18:08:19 -07002180 cmd += paygen.paygen_skylab_args(
2181 test, options.name, test_source_build, pool, options.board,
2182 options.model, options.timeout_mins,
2183 override_qs_account, _SKYLAB_SERVICE_ACCOUNT)
2184 job_created_on = time.time()
2185 try:
2186 res = cros_build_lib.RunCommand(cmd, capture_output=True)
2187 except cros_build_lib.RunCommandError as e:
2188 logging.error(str(e))
2189 return run_suite_common.SuiteResult(
2190 run_suite_common.RETURN_CODES.INFRA_FAILURE)
2191
2192 logging.info(res.output)
2193 job_url = res.output.split()[-1]
2194 job_id = job_url.split('id=')[-1]
2195 job_timer = diagnosis_utils.JobTimer(
2196 job_created_on, float(options.timeout_mins))
2197 _log_create_task(job_timer, job_url, job_id)
2198
2199 return run_suite_common.SuiteResult(run_suite_common.RETURN_CODES.OK)
2200
2201
Xixuan Wu3290d222019-04-11 15:26:13 -07002202def _run_with_skylab(options, override_pool, override_qs_account):
Xixuan Wuc4d33662019-03-18 14:07:15 -07002203 """Run suite inside skylab."""
Xixuan Wu07224482019-04-11 18:08:19 -07002204 if paygen.is_paygen_suite(options.name):
2205 return _run_paygen_with_skylab(options, override_pool,
2206 override_qs_account)
2207
Xixuan Wu081c6de2019-03-26 10:50:17 -07002208 builds = suite_common.make_builds_from_options(options)
Xixuan Wu2af0b062019-03-27 11:58:56 -07002209 skylab_tool = os.environ.get('SKYLAB_TOOL') or _SKYLAB_TOOL
Xixuan Wu3290d222019-04-11 15:26:13 -07002210 pool = override_pool or options.pool
Xixuan Wu2af0b062019-03-27 11:58:56 -07002211 if options.mock_job_id:
2212 taskID = options.mock_job_id
Aviv Keshetaf016472019-08-01 13:36:36 -07002213 cmd = [skylab_tool, 'wait-task', '-bb=False',
Xixuan Wu2af0b062019-03-27 11:58:56 -07002214 '-timeout-mins', str(options.timeout_mins),
2215 '-service-account-json', _SKYLAB_SERVICE_ACCOUNT,
2216 taskID]
2217 try:
2218 res = cros_build_lib.RunCommand(cmd, capture_output=True)
2219 except cros_build_lib.RunCommandError as e:
2220 logging.error(str(e))
2221 return run_suite_common.SuiteResult(
2222 run_suite_common.RETURN_CODES.INFRA_FAILURE)
2223
Aviv Keshet5d543702019-04-30 14:00:10 -07002224 output = json.loads(res.output)
Aviv Keshet5d543702019-04-30 14:00:10 -07002225 child_tasks = output['child-results']
2226 task_stdout = output['stdout']
2227
Xixuan Wu2af0b062019-03-27 11:58:56 -07002228 return_code = _get_skylab_suite_result(child_tasks)
2229 _log_skylab_for_buildbot(task_stdout)
2230 return run_suite_common.SuiteResult(return_code)
2231 else:
Aviv Keshetaf016472019-08-01 13:36:36 -07002232 cmd = [skylab_tool, 'create-suite', '-bb=False',
Xixuan Wu081c6de2019-03-26 10:50:17 -07002233 '-board', options.board,
2234 '-image', builds[provision.CROS_VERSION_PREFIX],
Xixuan Wu3290d222019-04-11 15:26:13 -07002235 '-pool', pool,
Xixuan Wu081c6de2019-03-26 10:50:17 -07002236 '-timeout-mins', str(options.timeout_mins),
Xixuan Wucdc0d382019-04-19 14:58:44 -07002237 '-priority', str(skylab_priority_for(options.priority)),
Xixuan Wu2af0b062019-03-27 11:58:56 -07002238 '-service-account-json', _SKYLAB_SERVICE_ACCOUNT]
Xixuan Wu3290d222019-04-11 15:26:13 -07002239 if override_qs_account:
2240 cmd.extend(['-qs-account', override_qs_account])
Xixuan Wu5bba34c2019-04-10 13:07:30 -07002241
2242 if options.max_retries is not None:
2243 cmd.extend(['-max-retries', str(options.max_retries)])
2244
Xixuan Wu081c6de2019-03-26 10:50:17 -07002245 if options.model is not None:
2246 cmd.extend(['-model', options.model])
2247
2248 tags = ['skylab:run_suite_trampoline']
2249 for t in tags:
2250 cmd.extend(['-tag', t])
2251
2252 unsupported_skylab_keyvals = ['datastore_parent_key']
2253 if options.job_keyvals is not None:
2254 for k, v in options.job_keyvals.iteritems():
2255 if k in unsupported_skylab_keyvals:
2256 continue
2257
2258 cmd.extend(['-keyval', '%s:%s' % (k, v)])
2259
2260 cmd.extend([options.name])
2261 job_created_on = time.time()
2262 res = cros_build_lib.RunCommand(cmd, capture_output=True)
2263 # TODO (xixuan): The parsing will change with crbug.com/935244.
Xixuan Wu2af0b062019-03-27 11:58:56 -07002264 logging.info(res.output)
Xixuan Wu081c6de2019-03-26 10:50:17 -07002265 job_url = res.output.split()[-1]
Xixuan Wuf7645262019-03-26 13:23:49 -07002266 job_id = job_url.split('id=')[-1]
Xixuan Wu081c6de2019-03-26 10:50:17 -07002267 job_timer = diagnosis_utils.JobTimer(
2268 job_created_on, float(options.timeout_mins))
Xixuan Wuf7645262019-03-26 13:23:49 -07002269 _log_create_task(job_timer, job_url, job_id)
Xixuan Wu081c6de2019-03-26 10:50:17 -07002270 return run_suite_common.SuiteResult(run_suite_common.RETURN_CODES.OK)
2271
Xixuan Wuc4d33662019-03-18 14:07:15 -07002272
Xixuan Wuf85ac582019-03-14 17:18:10 -07002273def _run_with_autotest(options):
2274 """Run suite inside autotest."""
2275 if options.pre_check and not _should_run(options):
2276 logging.info('Suite %s-%s is terminated: Lab is closed, OR build is '
2277 'blocked, OR this suite has already been kicked off '
2278 'once in past %d days.',
2279 options.test_source_build, options.name,
2280 _SEARCH_JOB_MAX_DAYS)
2281 result = run_suite_common.SuiteResult(
2282 run_suite_common.RETURN_CODES.ERROR,
2283 {'return_message': ("Lab is closed OR other reason"
2284 " (see code, it's complicated)")})
2285 else:
2286 result = _run_task(options)
2287
Aviv Keshet6cbd9cb2019-06-26 12:54:19 -07002288 if options.json_dump or options.json_dump_postfix:
Xixuan Wuf85ac582019-03-14 17:18:10 -07002289 run_suite_common.dump_json(result.output_dict)
2290
2291 return result
2292
2293
Allen Li5e9c35f2017-07-05 14:24:18 -07002294def main():
2295 """Entry point."""
2296 utils.verify_not_root_user()
2297
2298 parser = make_parser()
2299 options = parser.parse_args()
2300 if options.do_nothing:
Allen Li85ae5df2017-07-10 14:58:16 -07002301 return 0
2302
Aviv Keshet576d9622019-06-24 16:26:38 -07002303 if options.suite_args_json and options.suite_args:
2304 raise ValueError("suite_args and suite_args_json may not both "
2305 "be specified.")
2306 if options.suite_args_json:
2307 options.suite_args = options.suite_args_json
2308
Allen Li85ae5df2017-07-10 14:58:16 -07002309 sys.exceptionhandler = _ExceptionHandler(dump_json=options.json_dump)
Allen Li5e9c35f2017-07-05 14:24:18 -07002310 if options.json_dump:
2311 logging.disable(logging.CRITICAL)
2312
Allen Li85ae5df2017-07-10 14:58:16 -07002313 options_okay = verify_and_clean_options(options)
Xixuan Wuf8ca7822017-09-07 17:28:09 -07002314 # Set StreamHandler first to capture error messages if suite is not run.
2315 utils.setup_logging()
Allen Li85ae5df2017-07-10 14:58:16 -07002316 if not options_okay:
2317 parser.print_help()
Xixuan Wu888ee7a2018-04-24 10:27:27 -07002318 result = run_suite_common.SuiteResult(
2319 run_suite_common.RETURN_CODES.INVALID_OPTIONS)
Allen Li85ae5df2017-07-10 14:58:16 -07002320 else:
Xixuan Wuac8fe002019-04-22 11:20:20 -07002321 try:
2322 is_skylab, ovrd_pool, ovrd_qs_account = _if_run_in_skylab(options)
2323 except Exception as e:
2324 logging.exception(str(e))
2325 logging.info('fall back to Autotest due to trampoline errors')
2326 is_skylab = False
2327 ovrd_pool = ''
2328 ovrd_qs_account = ''
2329
Xixuan Wu3290d222019-04-11 15:26:13 -07002330 if is_skylab:
Xixuan Wuac8fe002019-04-22 11:20:20 -07002331 result = _run_with_skylab(options, ovrd_pool, ovrd_qs_account)
Xixuan Wuc4d33662019-03-18 14:07:15 -07002332 else:
2333 result = _run_with_autotest(options)
Fang Deng6197da32014-09-25 10:18:48 -07002334
2335 logging.info('Will return from run_suite with status: %s',
Xixuan Wu888ee7a2018-04-24 10:27:27 -07002336 run_suite_common.RETURN_CODES.get_string(result.return_code))
Allen Li85ae5df2017-07-10 14:58:16 -07002337 return result.return_code
2338
2339
Chris Masone24b80f12012-02-14 14:18:01 -08002340if __name__ == "__main__":
2341 sys.exit(main())