blob: 50c08c05b3c9ffa703282b934ee0df9b25e8588f [file] [log] [blame]
Xixuan Wuc7bf77c2018-04-24 12:05:40 -07001# Copyright 2018 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5"""Module for CrOS dynamic test suite generation and execution."""
6
7from __future__ import absolute_import
8from __future__ import division
9from __future__ import print_function
10
Xixuan Wu80795c82018-06-12 11:56:17 -070011import contextlib
12import itertools
Xixuan Wu0bea9522018-05-08 17:49:19 -070013import json
Xixuan Wu6c041332018-05-07 16:04:36 -070014import logging
Xixuan Wue71c8932018-05-07 17:18:34 -070015import os
Aviv Keshetf0951212019-03-18 14:54:32 -070016import re
Xixuan Wu0bea9522018-05-08 17:49:19 -070017import time
Xixuan Wu6c041332018-05-07 16:04:36 -070018
Xixuan Wue71c8932018-05-07 17:18:34 -070019from lucifer import autotest
Xixuan Wu9af95a22018-05-18 10:46:42 -070020from skylab_suite import cros_suite
Xixuan Wu0bea9522018-05-08 17:49:19 -070021from skylab_suite import swarming_lib
Xixuan Wuc7bf77c2018-04-24 12:05:40 -070022
Xixuan Wu0bea9522018-05-08 17:49:19 -070023
Allen Li1ccca8f2018-08-29 12:11:06 -070024SKYLAB_DRONE_SWARMING_WORKER = '/opt/infra-tools/skylab_swarming_worker'
Aviv Keshetf0951212019-03-18 14:54:32 -070025SKYLAB_SUITE_USER = 'skylab_suite_runner'
26SKYLAB_TOOL = '/opt/infra-tools/skylab'
Xixuan Wuc7bf77c2018-04-24 12:05:40 -070027
Xixuan Wu80795c82018-06-12 11:56:17 -070028SUITE_WAIT_SLEEP_INTERVAL_SECONDS = 30
29
Xixuan Wu79d14662018-08-20 11:15:41 -070030# See #5 in crbug.com/873886 for more details.
31_NOT_SUPPORTED_DEPENDENCIES = ['skip_provision', 'cleanup-reboot', 'rpm',
32 'modem_repair']
33
Xixuan Wue71c8932018-05-07 17:18:34 -070034
Aviv Keshetd3adbfa2019-03-19 11:43:24 -070035def run(client, test_specs, suite_handler, dry_run=False):
Xixuan Wu56424bc2018-05-15 11:03:27 -070036 """Run a CrOS dynamic test suite.
37
Aviv Keshetd3adbfa2019-03-19 11:43:24 -070038 @param client: A swarming_lib.Client instance.
Xixuan Wu5811e832018-07-12 11:56:24 -070039 @param test_specs: A list of cros_suite.TestSpec objects.
Xixuan Wu9af95a22018-05-18 10:46:42 -070040 @param suite_handler: A cros_suite.SuiteHandler object.
Xixuan Wu56424bc2018-05-15 11:03:27 -070041 @param dry_run: Whether to kick off dry runs of the tests.
42 """
Aviv Keshetd3adbfa2019-03-19 11:43:24 -070043 assert isinstance(client, swarming_lib.Client)
Xixuan Wuc7430712018-07-10 12:04:34 -070044 if suite_handler.suite_id:
45 # Resume an existing suite.
Aviv Keshetd3adbfa2019-03-19 11:43:24 -070046 _resume_suite(client, test_specs, suite_handler, dry_run)
Xixuan Wuc7430712018-07-10 12:04:34 -070047 else:
48 # Make a new suite.
Xixuan Wu5811e832018-07-12 11:56:24 -070049 _run_suite(test_specs, suite_handler, dry_run)
Xixuan Wuc7430712018-07-10 12:04:34 -070050
51
Aviv Keshetd3adbfa2019-03-19 11:43:24 -070052def _resume_suite(client, test_specs, suite_handler, dry_run=False):
Xixuan Wuc7430712018-07-10 12:04:34 -070053 """Resume a suite and its child tasks by given suite id."""
Aviv Keshetd3adbfa2019-03-19 11:43:24 -070054 assert isinstance(client, swarming_lib.Client)
Xixuan Wuc7430712018-07-10 12:04:34 -070055 suite_id = suite_handler.suite_id
Aviv Keshetd3adbfa2019-03-19 11:43:24 -070056 all_tasks = client.get_child_tasks(suite_id)
Xixuan Wuc7430712018-07-10 12:04:34 -070057 not_yet_scheduled = _get_unscheduled_test_specs(
Xixuan Wu6c1866b2018-07-12 17:04:39 -070058 test_specs, suite_handler, all_tasks)
Xixuan Wuc7430712018-07-10 12:04:34 -070059
60 logging.info('Not yet scheduled test_specs: %r', not_yet_scheduled)
Aviv Keshetd9935102019-03-18 14:44:28 -070061 _create_test_tasks(not_yet_scheduled, suite_handler, suite_id, dry_run)
Xixuan Wuc7430712018-07-10 12:04:34 -070062
63 if suite_id is not None and suite_handler.should_wait():
64 _wait_for_results(suite_handler, dry_run=dry_run)
65
66
Xixuan Wu5811e832018-07-12 11:56:24 -070067def _get_unscheduled_test_specs(test_specs, suite_handler, all_tasks):
Xixuan Wuc7430712018-07-10 12:04:34 -070068 not_yet_scheduled = []
Xixuan Wu5811e832018-07-12 11:56:24 -070069 for test_spec in test_specs:
Xixuan Wuc7430712018-07-10 12:04:34 -070070 if suite_handler.is_provision():
Xixuan Wu58bbb642018-07-12 14:12:14 -070071 # We cannot check bot_id because pending tasks do not have it yet.
72 bot_id_tag = 'id:%s' % test_spec.bot_id
73 tasks = [t for t in all_tasks if bot_id_tag in t['tags']]
Xixuan Wuc7430712018-07-10 12:04:34 -070074 else:
Xixuan Wu5811e832018-07-12 11:56:24 -070075 tasks = [t for t in all_tasks if t['name']==test_spec.test.name]
Xixuan Wuc7430712018-07-10 12:04:34 -070076
77 if not tasks:
Xixuan Wu5811e832018-07-12 11:56:24 -070078 not_yet_scheduled.append(test_spec)
Xixuan Wuc7430712018-07-10 12:04:34 -070079 continue
80
81 current_task = _get_current_task(tasks)
82 test_task_id = (current_task['task_id'] if current_task
83 else tasks[0]['task_id'])
Xixuan Wu5811e832018-07-12 11:56:24 -070084 remaining_retries = test_spec.test.job_retries - len(tasks)
Xixuan Wuc7430712018-07-10 12:04:34 -070085 previous_retried_ids = [t['task_id'] for t in tasks
86 if t['task_id'] != test_task_id]
87 suite_handler.add_test_by_task_id(
88 test_task_id,
Xixuan Wu9d5d7032018-07-12 16:44:02 -070089 cros_suite.TestHandlerSpec(
Xixuan Wu5811e832018-07-12 11:56:24 -070090 test_spec=test_spec,
Xixuan Wuc7430712018-07-10 12:04:34 -070091 remaining_retries=remaining_retries,
92 previous_retried_ids=previous_retried_ids))
93
94 return not_yet_scheduled
95
96
97def _get_current_task(tasks):
98 """Get current running task.
99
100 @param tasks: A list of task dicts including task_id, state, etc.
101
102 @return a dict representing the current running task.
103 """
104 current_task = None
105 for t in tasks:
106 if t['state'] not in swarming_lib.TASK_FINISHED_STATUS:
107 if current_task:
108 raise ValueError(
109 'Parent task has 2 same running child tasks: %s, %s'
110 % (current_task['task_id'], t['task_id']))
111
112 current_task = t
113
114 return current_task
115
116
Xixuan Wu5811e832018-07-12 11:56:24 -0700117def _run_suite(test_specs, suite_handler, dry_run=False):
Xixuan Wuc7430712018-07-10 12:04:34 -0700118 """Make a new suite."""
Xixuan Wu56424bc2018-05-15 11:03:27 -0700119 suite_id = os.environ.get('SWARMING_TASK_ID')
Aviv Keshet73b90662019-03-28 14:01:58 -0700120 if not suite_id:
121 raise ValueError("Unable to determine suite's task id from env var "
122 "SWARMING_TASK_ID.")
Aviv Keshetd9935102019-03-18 14:44:28 -0700123 _create_test_tasks(test_specs, suite_handler, suite_id, dry_run)
Xixuan Wua79b5f72018-12-26 12:29:39 -0800124 suite_handler.set_suite_id(suite_id)
Xixuan Wuc7430712018-07-10 12:04:34 -0700125
Aviv Keshet73b90662019-03-28 14:01:58 -0700126 if suite_handler.should_wait():
Xixuan Wuc7430712018-07-10 12:04:34 -0700127 _wait_for_results(suite_handler, dry_run=dry_run)
128
129
Aviv Keshetd9935102019-03-18 14:44:28 -0700130def _create_test_tasks(test_specs, suite_handler, suite_id, dry_run=False):
131 """Create test tasks for a list of tests (TestSpecs).
Xixuan Wuc7430712018-07-10 12:04:34 -0700132
Xixuan Wu5811e832018-07-12 11:56:24 -0700133 Given a list of TestSpec object, this function will schedule them on
Xixuan Wuc7430712018-07-10 12:04:34 -0700134 swarming one by one, and add them to the swarming_task_id-to-test map
135 of suite_handler to keep monitoring them.
136
Xixuan Wu5811e832018-07-12 11:56:24 -0700137 @param test_specs: A list of cros_suite.TestSpec objects to schedule.
Xixuan Wuc7430712018-07-10 12:04:34 -0700138 @param suite_handler: A cros_suite.SuiteHandler object to monitor the
139 test_specs' progress.
140 @param suite_id: A string ID for a suite task, it's the parent task id for
141 these to-be-scheduled test_specs.
142 @param dry_run: Whether to kick off dry runs of the tests.
143 """
Xixuan Wu5811e832018-07-12 11:56:24 -0700144 for test_spec in test_specs:
Aviv Keshetd9935102019-03-18 14:44:28 -0700145 test_task_id = _create_test_task(
Xixuan Wu5811e832018-07-12 11:56:24 -0700146 test_spec,
Xixuan Wu5cb5a402018-06-04 16:37:23 -0700147 suite_id=suite_id,
148 dry_run=dry_run)
Xixuan Wu9af95a22018-05-18 10:46:42 -0700149 suite_handler.add_test_by_task_id(
150 test_task_id,
Xixuan Wu9d5d7032018-07-12 16:44:02 -0700151 cros_suite.TestHandlerSpec(
Xixuan Wu5811e832018-07-12 11:56:24 -0700152 test_spec=test_spec,
153 remaining_retries=test_spec.test.job_retries - 1,
Xixuan Wu9af95a22018-05-18 10:46:42 -0700154 previous_retried_ids=[]))
Xixuan Wu56424bc2018-05-15 11:03:27 -0700155
Xixuan Wu56424bc2018-05-15 11:03:27 -0700156
Aviv Keshetea09ac32019-04-05 15:47:44 -0700157def _create_test_task(test_spec, suite_id=None, dry_run=False):
Aviv Keshetd9935102019-03-18 14:44:28 -0700158 """Create a test task for a given test spec.
Xixuan Wuc7bf77c2018-04-24 12:05:40 -0700159
Xixuan Wu5811e832018-07-12 11:56:24 -0700160 @param test_spec: A cros_suite.TestSpec object.
Xixuan Wu56424bc2018-05-15 11:03:27 -0700161 @param suite_id: the suite task id of the test.
Aviv Keshetf0951212019-03-18 14:54:32 -0700162 @param dry_run: If true, don't actually create task.
Xixuan Wu56424bc2018-05-15 11:03:27 -0700163
164 @return the swarming task id of this task.
Xixuan Wuc7bf77c2018-04-24 12:05:40 -0700165 """
Aviv Keshetd9935102019-03-18 14:44:28 -0700166 logging.info('Creating task for test %s', test_spec.test.name)
Aviv Keshet2c25d062019-03-19 13:18:23 -0700167 skylab_tool_path = os.environ.get('SKYLAB_TOOL', SKYLAB_TOOL)
Aviv Keshetf0951212019-03-18 14:54:32 -0700168
169 cmd = [
Aviv Keshet2c25d062019-03-19 13:18:23 -0700170 skylab_tool_path, 'create-test',
Aviv Keshetf0951212019-03-18 14:54:32 -0700171 '-board', test_spec.board,
172 '-image', test_spec.build,
Aviv Keshet399692f2019-04-02 12:17:51 -0700173 '-service-account-json', os.environ['SWARMING_CREDS'],
Aviv Keshetf0f1b492019-04-03 15:30:48 -0700174 '-timeout-mins', str(test_spec.execution_timeout_mins),
Aviv Keshetf0951212019-03-18 14:54:32 -0700175 ]
Aviv Keshetb70c7092019-04-02 12:05:56 -0700176 if _is_dev():
177 cmd += ['-dev']
Aviv Keshetf0951212019-03-18 14:54:32 -0700178 if test_spec.pool:
179 # TODO(akeshet): Clean up this hack around pool name translation.
180 autotest_pool_label = 'pool:%s' % test_spec.pool
181 pool_dependency_value = swarming_lib.task_dependencies_from_labels(
182 [autotest_pool_label])['label-pool']
183 cmd += ['-pool', pool_dependency_value]
184
185 if test_spec.model:
186 cmd += ['-model', test_spec.model]
187 if test_spec.quota_account:
188 cmd += ['-qs-account', test_spec.quota_account]
189 if test_spec.test.test_type.lower() == 'client':
190 cmd += ['-client-test']
191
Aviv Keshetf0f1b492019-04-03 15:30:48 -0700192
Aviv Keshetf0951212019-03-18 14:54:32 -0700193 tags = _compute_tags(test_spec.build, suite_id)
194 dimensions = _compute_dimensions(
195 test_spec.bot_id, test_spec.test.dependencies)
196 keyvals_flat = _compute_job_keyvals_flat(test_spec.keyvals, suite_id)
197
198 for tag in tags:
199 cmd += ['-tag', tag]
200 for keyval in keyvals_flat:
201 cmd += ['-keyval', keyval]
202 cmd += [test_spec.test.name]
203 cmd += dimensions
204
Xixuan Wue71c8932018-05-07 17:18:34 -0700205 if dry_run:
Aviv Keshetf0951212019-03-18 14:54:32 -0700206 logging.info('Would have created task with command %s', cmd)
207 return
Xixuan Wue71c8932018-05-07 17:18:34 -0700208
Aviv Keshetf0951212019-03-18 14:54:32 -0700209 # TODO(akeshet): Avoid this late chromite import.
210 cros_build_lib = autotest.chromite_load('cros_build_lib')
211 result = cros_build_lib.RunCommand(cmd, capture_output=True)
212 # TODO(akeshet): Use -json flag and json-parse output of the command instead
213 # of regex matching to determine task_id.
214 m = re.match('.*id=(.*)$', result.output)
215 task_id = m.group(1)
216 logging.info('Created task with id %s', task_id)
217 return task_id
Xixuan Wu3dea7cf2018-12-10 17:50:45 -0800218
Aviv Keshetf0951212019-03-18 14:54:32 -0700219
Aviv Keshetb70c7092019-04-02 12:05:56 -0700220# TODO(akeshet): Eliminate the need for this, by either adding an explicit
221# swarming_server argument to skylab tool, or having the tool respect the
222# SWARMING_SERVER environment variable. See crbug.com/948774
223def _is_dev():
224 """Detect whether skylab tool should be invoked with -dev flag."""
Aviv Keshetaa3839d2019-04-02 16:13:42 -0700225 return 'chromium-swarm-dev' in os.environ['SWARMING_SERVER']
Aviv Keshetb70c7092019-04-02 12:05:56 -0700226
Aviv Keshetf0951212019-03-18 14:54:32 -0700227def _compute_tags(build, suite_id):
228 tags = [
229 'build:%s' % build,
230 ]
231 if suite_id is not None:
232 tags += ['parent_task_id:%s' % suite_id]
233 return tags
234
235
236def _compute_dimensions(bot_id, dependencies):
237 dimensions = []
238 if bot_id:
239 dimensions += ['id:%s' % bot_id]
240 deps = _filter_unsupported_dependencies(dependencies)
241 flattened_swarming_deps = sorted([
242 '%s:%s' % (k, v) for
243 k, v in swarming_lib.task_dependencies_from_labels(deps).items()
244 ])
245 dimensions += flattened_swarming_deps
246 return dimensions
247
248
249def _compute_job_keyvals_flat(keyvals, suite_id):
250 # Job keyvals calculation.
251 job_keyvals = keyvals.copy()
252 if suite_id is not None:
253 # TODO(akeshet): Avoid this late autotest constants import.
254 constants = autotest.load('server.cros.dynamic_suite.constants')
255 job_keyvals[constants.PARENT_JOB_ID] = suite_id
256 keyvals_flat = sorted(
257 ['%s:%s' % (k, v) for k, v in job_keyvals.items()])
258 return keyvals_flat
259
260
261def _filter_unsupported_dependencies(dependencies):
262 """Filter out Skylab-unsupported test dependencies, with a warning."""
Xixuan Wudb053c82019-01-31 20:07:06 -0800263 deps = []
Aviv Keshetf0951212019-03-18 14:54:32 -0700264 for dep in dependencies:
Xixuan Wu79d14662018-08-20 11:15:41 -0700265 if dep in _NOT_SUPPORTED_DEPENDENCIES:
266 logging.warning('Dependency %s is not supported in skylab', dep)
Aviv Keshetf0951212019-03-18 14:54:32 -0700267 else:
268 deps.append(dep)
269 return deps
Xixuan Wu0bea9522018-05-08 17:49:19 -0700270
271
Xixuan Wu80795c82018-06-12 11:56:17 -0700272@contextlib.contextmanager
Xixuan Wuf52e40d2018-06-14 12:10:44 -0700273def disable_logging(logging_level):
274 """Context manager for disabling logging of a given logging level."""
Xixuan Wu80795c82018-06-12 11:56:17 -0700275 try:
Xixuan Wuf52e40d2018-06-14 12:10:44 -0700276 logging.disable(logging_level)
Xixuan Wu80795c82018-06-12 11:56:17 -0700277 yield
278 finally:
279 logging.disable(logging.NOTSET)
280
281
Xixuan Wuf52e40d2018-06-14 12:10:44 -0700282def _loop_and_wait_forever(suite_handler, dry_run):
283 """Wait for child tasks to finish or break."""
284 for iterations in itertools.count(0):
285 # Log progress every 300 seconds.
286 no_logging = bool(iterations * SUITE_WAIT_SLEEP_INTERVAL_SECONDS % 300)
287 with disable_logging(logging.INFO if no_logging else logging.NOTSET):
Xixuan Wuc6e28d32018-08-27 14:48:14 -0700288 suite_handler.handle_results(suite_handler.suite_id)
Xixuan Wuf52e40d2018-06-14 12:10:44 -0700289 if suite_handler.is_finished_waiting():
290 break
Xixuan Wu80795c82018-06-12 11:56:17 -0700291
Xixuan Wu4d5d0142018-08-27 15:26:58 -0700292 for t in suite_handler.retried_tasks:
293 _retry_test(suite_handler, t['task_id'], dry_run=dry_run)
294
Xixuan Wuf52e40d2018-06-14 12:10:44 -0700295 time.sleep(SUITE_WAIT_SLEEP_INTERVAL_SECONDS)
Xixuan Wu80795c82018-06-12 11:56:17 -0700296
297
Xixuan Wu9af95a22018-05-18 10:46:42 -0700298def _wait_for_results(suite_handler, dry_run=False):
Xixuan Wu2406be32018-05-14 13:51:30 -0700299 """Wait for child tasks to finish and return their results.
Xixuan Wu0bea9522018-05-08 17:49:19 -0700300
Xixuan Wu9af95a22018-05-18 10:46:42 -0700301 @param suite_handler: a cros_suite.SuiteHandler object.
Xixuan Wu2406be32018-05-14 13:51:30 -0700302 """
303 timeout_util = autotest.chromite_load('timeout_util')
Xixuan Wuf52e40d2018-06-14 12:10:44 -0700304 try:
Xixuan Wub0983632018-08-17 17:54:42 -0700305 with timeout_util.Timeout(suite_handler.timeout_mins * 60 -
306 suite_handler.passed_mins * 60):
Xixuan Wuf52e40d2018-06-14 12:10:44 -0700307 _loop_and_wait_forever(suite_handler, dry_run)
308 except timeout_util.TimeoutError:
309 logging.error('Timeout in waiting for child tasks.')
310 return
Xixuan Wu0bea9522018-05-08 17:49:19 -0700311
312 logging.info('Finished to wait for child tasks.')
Xixuan Wu56424bc2018-05-15 11:03:27 -0700313
314
Xixuan Wu9af95a22018-05-18 10:46:42 -0700315def _retry_test(suite_handler, task_id, dry_run=False):
Xixuan Wu56424bc2018-05-15 11:03:27 -0700316 """Retry test for a suite.
317
318 We will execute the following actions for retrying a test:
319 1. Schedule the test.
320 2. Add the test with the new swarming task id to the suite's
321 retry handler, but reduce its remaining retries by 1.
322 3. Reduce the suite-level max retries by 1.
323 4. Remove prevous failed test from retry handler since it's not
324 actively monitored by the suite.
325
Xixuan Wu9af95a22018-05-18 10:46:42 -0700326 @param suite_handler: a cros_suite.SuiteHandler object.
Xixuan Wu56424bc2018-05-15 11:03:27 -0700327 @param task_id: The swarming task id for the retried test.
328 @param dry_run: Whether to retry a dry run of the test.
329 """
Xixuan Wu5811e832018-07-12 11:56:24 -0700330 last_retry_spec = suite_handler.get_test_by_task_id(task_id)
Xixuan Wu56424bc2018-05-15 11:03:27 -0700331 logging.info('Retrying test %s, remaining %d retries.',
Xixuan Wu5811e832018-07-12 11:56:24 -0700332 last_retry_spec.test_spec.test.name,
333 last_retry_spec.remaining_retries - 1)
Aviv Keshetd9935102019-03-18 14:44:28 -0700334 retried_task_id = _create_test_task(
Xixuan Wub60c1432019-03-07 17:15:39 +0000335 last_retry_spec.test_spec,
336 suite_id=suite_handler.suite_id,
Xixuan Wub60c1432019-03-07 17:15:39 +0000337 dry_run=dry_run)
Xixuan Wu5811e832018-07-12 11:56:24 -0700338 previous_retried_ids = last_retry_spec.previous_retried_ids + [task_id]
Xixuan Wu9af95a22018-05-18 10:46:42 -0700339 suite_handler.add_test_by_task_id(
340 retried_task_id,
Xixuan Wu9d5d7032018-07-12 16:44:02 -0700341 cros_suite.TestHandlerSpec(
Xixuan Wu5811e832018-07-12 11:56:24 -0700342 test_spec=last_retry_spec.test_spec,
343 remaining_retries=last_retry_spec.remaining_retries - 1,
Xixuan Wu9af95a22018-05-18 10:46:42 -0700344 previous_retried_ids=previous_retried_ids))
345 suite_handler.set_max_retries(suite_handler.max_retries - 1)
346 suite_handler.remove_test_by_task_id(task_id)
Xixuan Wu7a450c52018-07-20 15:07:51 -0700347
348
349def _convert_dict_to_string(input_dict):
350 """Convert dictionary to a string.
351
352 @param input_dict: A dictionary.
353 """
354 for k, v in input_dict.iteritems():
355 if isinstance(v, dict):
356 input_dict[k] = _convert_dict_to_string(v)
357 else:
358 input_dict[k] = str(v)
359
360 return json.dumps(input_dict)