blob: 6d8403007f94384e6a3f3999242d9fab68bc33fb [file] [log] [blame]
Xixuan Wuc7bf77c2018-04-24 12:05:40 -07001# Copyright 2018 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5"""Module for CrOS dynamic test suite generation and execution."""
6
7from __future__ import absolute_import
8from __future__ import division
9from __future__ import print_function
10
Xixuan Wu80795c82018-06-12 11:56:17 -070011import contextlib
12import itertools
Xixuan Wu0bea9522018-05-08 17:49:19 -070013import json
Xixuan Wu6c041332018-05-07 16:04:36 -070014import logging
Xixuan Wue71c8932018-05-07 17:18:34 -070015import os
Aviv Keshetf0951212019-03-18 14:54:32 -070016import re
Xixuan Wu0bea9522018-05-08 17:49:19 -070017import time
Xixuan Wu6c041332018-05-07 16:04:36 -070018
Xixuan Wue71c8932018-05-07 17:18:34 -070019from lucifer import autotest
Xixuan Wu9af95a22018-05-18 10:46:42 -070020from skylab_suite import cros_suite
Xixuan Wu0bea9522018-05-08 17:49:19 -070021from skylab_suite import swarming_lib
Xixuan Wuc7bf77c2018-04-24 12:05:40 -070022
Xixuan Wu0bea9522018-05-08 17:49:19 -070023
Allen Li1ccca8f2018-08-29 12:11:06 -070024SKYLAB_DRONE_SWARMING_WORKER = '/opt/infra-tools/skylab_swarming_worker'
Aviv Keshetf0951212019-03-18 14:54:32 -070025SKYLAB_SUITE_USER = 'skylab_suite_runner'
26SKYLAB_TOOL = '/opt/infra-tools/skylab'
Xixuan Wuc7bf77c2018-04-24 12:05:40 -070027
Xixuan Wu80795c82018-06-12 11:56:17 -070028SUITE_WAIT_SLEEP_INTERVAL_SECONDS = 30
29
Xixuan Wu79d14662018-08-20 11:15:41 -070030# See #5 in crbug.com/873886 for more details.
31_NOT_SUPPORTED_DEPENDENCIES = ['skip_provision', 'cleanup-reboot', 'rpm',
32 'modem_repair']
33
Xixuan Wue71c8932018-05-07 17:18:34 -070034
Aviv Keshetd3adbfa2019-03-19 11:43:24 -070035def run(client, test_specs, suite_handler, dry_run=False):
Xixuan Wu56424bc2018-05-15 11:03:27 -070036 """Run a CrOS dynamic test suite.
37
Aviv Keshetd3adbfa2019-03-19 11:43:24 -070038 @param client: A swarming_lib.Client instance.
Xixuan Wu5811e832018-07-12 11:56:24 -070039 @param test_specs: A list of cros_suite.TestSpec objects.
Xixuan Wu9af95a22018-05-18 10:46:42 -070040 @param suite_handler: A cros_suite.SuiteHandler object.
Xixuan Wu56424bc2018-05-15 11:03:27 -070041 @param dry_run: Whether to kick off dry runs of the tests.
42 """
Aviv Keshetd3adbfa2019-03-19 11:43:24 -070043 assert isinstance(client, swarming_lib.Client)
Xixuan Wuc7430712018-07-10 12:04:34 -070044 if suite_handler.suite_id:
45 # Resume an existing suite.
Aviv Keshetd3adbfa2019-03-19 11:43:24 -070046 _resume_suite(client, test_specs, suite_handler, dry_run)
Xixuan Wuc7430712018-07-10 12:04:34 -070047 else:
48 # Make a new suite.
Xixuan Wu5811e832018-07-12 11:56:24 -070049 _run_suite(test_specs, suite_handler, dry_run)
Xixuan Wuc7430712018-07-10 12:04:34 -070050
51
Aviv Keshetd3adbfa2019-03-19 11:43:24 -070052def _resume_suite(client, test_specs, suite_handler, dry_run=False):
Xixuan Wuc7430712018-07-10 12:04:34 -070053 """Resume a suite and its child tasks by given suite id."""
Aviv Keshetd3adbfa2019-03-19 11:43:24 -070054 assert isinstance(client, swarming_lib.Client)
Xixuan Wuc7430712018-07-10 12:04:34 -070055 suite_id = suite_handler.suite_id
Aviv Keshetd3adbfa2019-03-19 11:43:24 -070056 all_tasks = client.get_child_tasks(suite_id)
Xixuan Wuc7430712018-07-10 12:04:34 -070057 not_yet_scheduled = _get_unscheduled_test_specs(
Xixuan Wu6c1866b2018-07-12 17:04:39 -070058 test_specs, suite_handler, all_tasks)
Xixuan Wuc7430712018-07-10 12:04:34 -070059
60 logging.info('Not yet scheduled test_specs: %r', not_yet_scheduled)
Aviv Keshetd9935102019-03-18 14:44:28 -070061 _create_test_tasks(not_yet_scheduled, suite_handler, suite_id, dry_run)
Xixuan Wuc7430712018-07-10 12:04:34 -070062
63 if suite_id is not None and suite_handler.should_wait():
64 _wait_for_results(suite_handler, dry_run=dry_run)
65
66
Xixuan Wu5811e832018-07-12 11:56:24 -070067def _get_unscheduled_test_specs(test_specs, suite_handler, all_tasks):
Xixuan Wuc7430712018-07-10 12:04:34 -070068 not_yet_scheduled = []
Xixuan Wu5811e832018-07-12 11:56:24 -070069 for test_spec in test_specs:
Xixuan Wuc7430712018-07-10 12:04:34 -070070 if suite_handler.is_provision():
Xixuan Wu58bbb642018-07-12 14:12:14 -070071 # We cannot check bot_id because pending tasks do not have it yet.
72 bot_id_tag = 'id:%s' % test_spec.bot_id
73 tasks = [t for t in all_tasks if bot_id_tag in t['tags']]
Xixuan Wuc7430712018-07-10 12:04:34 -070074 else:
Xixuan Wu5811e832018-07-12 11:56:24 -070075 tasks = [t for t in all_tasks if t['name']==test_spec.test.name]
Xixuan Wuc7430712018-07-10 12:04:34 -070076
77 if not tasks:
Xixuan Wu5811e832018-07-12 11:56:24 -070078 not_yet_scheduled.append(test_spec)
Xixuan Wuc7430712018-07-10 12:04:34 -070079 continue
80
81 current_task = _get_current_task(tasks)
82 test_task_id = (current_task['task_id'] if current_task
83 else tasks[0]['task_id'])
Xixuan Wu5811e832018-07-12 11:56:24 -070084 remaining_retries = test_spec.test.job_retries - len(tasks)
Xixuan Wuc7430712018-07-10 12:04:34 -070085 previous_retried_ids = [t['task_id'] for t in tasks
86 if t['task_id'] != test_task_id]
87 suite_handler.add_test_by_task_id(
88 test_task_id,
Xixuan Wu9d5d7032018-07-12 16:44:02 -070089 cros_suite.TestHandlerSpec(
Xixuan Wu5811e832018-07-12 11:56:24 -070090 test_spec=test_spec,
Xixuan Wuc7430712018-07-10 12:04:34 -070091 remaining_retries=remaining_retries,
92 previous_retried_ids=previous_retried_ids))
93
94 return not_yet_scheduled
95
96
97def _get_current_task(tasks):
98 """Get current running task.
99
100 @param tasks: A list of task dicts including task_id, state, etc.
101
102 @return a dict representing the current running task.
103 """
104 current_task = None
105 for t in tasks:
106 if t['state'] not in swarming_lib.TASK_FINISHED_STATUS:
107 if current_task:
108 raise ValueError(
109 'Parent task has 2 same running child tasks: %s, %s'
110 % (current_task['task_id'], t['task_id']))
111
112 current_task = t
113
114 return current_task
115
116
Xixuan Wu5811e832018-07-12 11:56:24 -0700117def _run_suite(test_specs, suite_handler, dry_run=False):
Xixuan Wuc7430712018-07-10 12:04:34 -0700118 """Make a new suite."""
Xixuan Wu56424bc2018-05-15 11:03:27 -0700119 suite_id = os.environ.get('SWARMING_TASK_ID')
Aviv Keshet73b90662019-03-28 14:01:58 -0700120 if not suite_id:
121 raise ValueError("Unable to determine suite's task id from env var "
122 "SWARMING_TASK_ID.")
Aviv Keshetd9935102019-03-18 14:44:28 -0700123 _create_test_tasks(test_specs, suite_handler, suite_id, dry_run)
Xixuan Wua79b5f72018-12-26 12:29:39 -0800124 suite_handler.set_suite_id(suite_id)
Xixuan Wuc7430712018-07-10 12:04:34 -0700125
Aviv Keshet73b90662019-03-28 14:01:58 -0700126 if suite_handler.should_wait():
Xixuan Wuc7430712018-07-10 12:04:34 -0700127 _wait_for_results(suite_handler, dry_run=dry_run)
128
129
Aviv Keshetd9935102019-03-18 14:44:28 -0700130def _create_test_tasks(test_specs, suite_handler, suite_id, dry_run=False):
131 """Create test tasks for a list of tests (TestSpecs).
Xixuan Wuc7430712018-07-10 12:04:34 -0700132
Xixuan Wu5811e832018-07-12 11:56:24 -0700133 Given a list of TestSpec object, this function will schedule them on
Xixuan Wuc7430712018-07-10 12:04:34 -0700134 swarming one by one, and add them to the swarming_task_id-to-test map
135 of suite_handler to keep monitoring them.
136
Xixuan Wu5811e832018-07-12 11:56:24 -0700137 @param test_specs: A list of cros_suite.TestSpec objects to schedule.
Xixuan Wuc7430712018-07-10 12:04:34 -0700138 @param suite_handler: A cros_suite.SuiteHandler object to monitor the
139 test_specs' progress.
140 @param suite_id: A string ID for a suite task, it's the parent task id for
141 these to-be-scheduled test_specs.
142 @param dry_run: Whether to kick off dry runs of the tests.
143 """
Xixuan Wu5811e832018-07-12 11:56:24 -0700144 for test_spec in test_specs:
Aviv Keshetd9935102019-03-18 14:44:28 -0700145 test_task_id = _create_test_task(
Xixuan Wu5811e832018-07-12 11:56:24 -0700146 test_spec,
Xixuan Wu5cb5a402018-06-04 16:37:23 -0700147 suite_id=suite_id,
Xixuan Wu814ceb62018-08-27 15:47:34 -0700148 is_provision=suite_handler.is_provision(),
Xixuan Wu5cb5a402018-06-04 16:37:23 -0700149 dry_run=dry_run)
Xixuan Wu9af95a22018-05-18 10:46:42 -0700150 suite_handler.add_test_by_task_id(
151 test_task_id,
Xixuan Wu9d5d7032018-07-12 16:44:02 -0700152 cros_suite.TestHandlerSpec(
Xixuan Wu5811e832018-07-12 11:56:24 -0700153 test_spec=test_spec,
154 remaining_retries=test_spec.test.job_retries - 1,
Xixuan Wu9af95a22018-05-18 10:46:42 -0700155 previous_retried_ids=[]))
Xixuan Wu56424bc2018-05-15 11:03:27 -0700156
Xixuan Wu56424bc2018-05-15 11:03:27 -0700157
Aviv Keshetd9935102019-03-18 14:44:28 -0700158def _create_test_task(test_spec, suite_id=None,
159 is_provision=False, dry_run=False):
160 """Create a test task for a given test spec.
Xixuan Wuc7bf77c2018-04-24 12:05:40 -0700161
Xixuan Wu5811e832018-07-12 11:56:24 -0700162 @param test_spec: A cros_suite.TestSpec object.
Xixuan Wu56424bc2018-05-15 11:03:27 -0700163 @param suite_id: the suite task id of the test.
Aviv Keshetf0951212019-03-18 14:54:32 -0700164 @param dry_run: If true, don't actually create task.
Xixuan Wu56424bc2018-05-15 11:03:27 -0700165
166 @return the swarming task id of this task.
Xixuan Wuc7bf77c2018-04-24 12:05:40 -0700167 """
Aviv Keshetd9935102019-03-18 14:44:28 -0700168 logging.info('Creating task for test %s', test_spec.test.name)
Aviv Keshet2c25d062019-03-19 13:18:23 -0700169 skylab_tool_path = os.environ.get('SKYLAB_TOOL', SKYLAB_TOOL)
Aviv Keshetf0951212019-03-18 14:54:32 -0700170
171 cmd = [
Aviv Keshet2c25d062019-03-19 13:18:23 -0700172 skylab_tool_path, 'create-test',
Aviv Keshetf0951212019-03-18 14:54:32 -0700173 '-board', test_spec.board,
174 '-image', test_spec.build,
Aviv Keshet399692f2019-04-02 12:17:51 -0700175 '-service-account-json', os.environ['SWARMING_CREDS'],
Aviv Keshetf0f1b492019-04-03 15:30:48 -0700176 '-timeout-mins', str(test_spec.execution_timeout_mins),
Aviv Keshetf0951212019-03-18 14:54:32 -0700177 ]
Aviv Keshetb70c7092019-04-02 12:05:56 -0700178 if _is_dev():
179 cmd += ['-dev']
Aviv Keshetf0951212019-03-18 14:54:32 -0700180 if test_spec.pool:
181 # TODO(akeshet): Clean up this hack around pool name translation.
182 autotest_pool_label = 'pool:%s' % test_spec.pool
183 pool_dependency_value = swarming_lib.task_dependencies_from_labels(
184 [autotest_pool_label])['label-pool']
185 cmd += ['-pool', pool_dependency_value]
186
187 if test_spec.model:
188 cmd += ['-model', test_spec.model]
189 if test_spec.quota_account:
190 cmd += ['-qs-account', test_spec.quota_account]
191 if test_spec.test.test_type.lower() == 'client':
192 cmd += ['-client-test']
193
Aviv Keshetf0f1b492019-04-03 15:30:48 -0700194
Aviv Keshetf0951212019-03-18 14:54:32 -0700195 tags = _compute_tags(test_spec.build, suite_id)
196 dimensions = _compute_dimensions(
197 test_spec.bot_id, test_spec.test.dependencies)
198 keyvals_flat = _compute_job_keyvals_flat(test_spec.keyvals, suite_id)
199
200 for tag in tags:
201 cmd += ['-tag', tag]
202 for keyval in keyvals_flat:
203 cmd += ['-keyval', keyval]
204 cmd += [test_spec.test.name]
205 cmd += dimensions
206
Xixuan Wue71c8932018-05-07 17:18:34 -0700207 if dry_run:
Aviv Keshetf0951212019-03-18 14:54:32 -0700208 logging.info('Would have created task with command %s', cmd)
209 return
Xixuan Wue71c8932018-05-07 17:18:34 -0700210
Aviv Keshetf0951212019-03-18 14:54:32 -0700211 # TODO(akeshet): Avoid this late chromite import.
212 cros_build_lib = autotest.chromite_load('cros_build_lib')
213 result = cros_build_lib.RunCommand(cmd, capture_output=True)
214 # TODO(akeshet): Use -json flag and json-parse output of the command instead
215 # of regex matching to determine task_id.
216 m = re.match('.*id=(.*)$', result.output)
217 task_id = m.group(1)
218 logging.info('Created task with id %s', task_id)
219 return task_id
Xixuan Wu3dea7cf2018-12-10 17:50:45 -0800220
Aviv Keshetf0951212019-03-18 14:54:32 -0700221
Aviv Keshetb70c7092019-04-02 12:05:56 -0700222# TODO(akeshet): Eliminate the need for this, by either adding an explicit
223# swarming_server argument to skylab tool, or having the tool respect the
224# SWARMING_SERVER environment variable. See crbug.com/948774
225def _is_dev():
226 """Detect whether skylab tool should be invoked with -dev flag."""
Aviv Keshetaa3839d2019-04-02 16:13:42 -0700227 return 'chromium-swarm-dev' in os.environ['SWARMING_SERVER']
Aviv Keshetb70c7092019-04-02 12:05:56 -0700228
Aviv Keshetf0951212019-03-18 14:54:32 -0700229def _compute_tags(build, suite_id):
230 tags = [
231 'build:%s' % build,
232 ]
233 if suite_id is not None:
234 tags += ['parent_task_id:%s' % suite_id]
235 return tags
236
237
238def _compute_dimensions(bot_id, dependencies):
239 dimensions = []
240 if bot_id:
241 dimensions += ['id:%s' % bot_id]
242 deps = _filter_unsupported_dependencies(dependencies)
243 flattened_swarming_deps = sorted([
244 '%s:%s' % (k, v) for
245 k, v in swarming_lib.task_dependencies_from_labels(deps).items()
246 ])
247 dimensions += flattened_swarming_deps
248 return dimensions
249
250
251def _compute_job_keyvals_flat(keyvals, suite_id):
252 # Job keyvals calculation.
253 job_keyvals = keyvals.copy()
254 if suite_id is not None:
255 # TODO(akeshet): Avoid this late autotest constants import.
256 constants = autotest.load('server.cros.dynamic_suite.constants')
257 job_keyvals[constants.PARENT_JOB_ID] = suite_id
258 keyvals_flat = sorted(
259 ['%s:%s' % (k, v) for k, v in job_keyvals.items()])
260 return keyvals_flat
261
262
263def _filter_unsupported_dependencies(dependencies):
264 """Filter out Skylab-unsupported test dependencies, with a warning."""
Xixuan Wudb053c82019-01-31 20:07:06 -0800265 deps = []
Aviv Keshetf0951212019-03-18 14:54:32 -0700266 for dep in dependencies:
Xixuan Wu79d14662018-08-20 11:15:41 -0700267 if dep in _NOT_SUPPORTED_DEPENDENCIES:
268 logging.warning('Dependency %s is not supported in skylab', dep)
Aviv Keshetf0951212019-03-18 14:54:32 -0700269 else:
270 deps.append(dep)
271 return deps
Xixuan Wu0bea9522018-05-08 17:49:19 -0700272
273
Xixuan Wu80795c82018-06-12 11:56:17 -0700274@contextlib.contextmanager
Xixuan Wuf52e40d2018-06-14 12:10:44 -0700275def disable_logging(logging_level):
276 """Context manager for disabling logging of a given logging level."""
Xixuan Wu80795c82018-06-12 11:56:17 -0700277 try:
Xixuan Wuf52e40d2018-06-14 12:10:44 -0700278 logging.disable(logging_level)
Xixuan Wu80795c82018-06-12 11:56:17 -0700279 yield
280 finally:
281 logging.disable(logging.NOTSET)
282
283
Xixuan Wuf52e40d2018-06-14 12:10:44 -0700284def _loop_and_wait_forever(suite_handler, dry_run):
285 """Wait for child tasks to finish or break."""
286 for iterations in itertools.count(0):
287 # Log progress every 300 seconds.
288 no_logging = bool(iterations * SUITE_WAIT_SLEEP_INTERVAL_SECONDS % 300)
289 with disable_logging(logging.INFO if no_logging else logging.NOTSET):
Xixuan Wuc6e28d32018-08-27 14:48:14 -0700290 suite_handler.handle_results(suite_handler.suite_id)
Xixuan Wuf52e40d2018-06-14 12:10:44 -0700291 if suite_handler.is_finished_waiting():
292 break
Xixuan Wu80795c82018-06-12 11:56:17 -0700293
Xixuan Wu4d5d0142018-08-27 15:26:58 -0700294 for t in suite_handler.retried_tasks:
295 _retry_test(suite_handler, t['task_id'], dry_run=dry_run)
296
Xixuan Wuf52e40d2018-06-14 12:10:44 -0700297 time.sleep(SUITE_WAIT_SLEEP_INTERVAL_SECONDS)
Xixuan Wu80795c82018-06-12 11:56:17 -0700298
299
Xixuan Wu9af95a22018-05-18 10:46:42 -0700300def _wait_for_results(suite_handler, dry_run=False):
Xixuan Wu2406be32018-05-14 13:51:30 -0700301 """Wait for child tasks to finish and return their results.
Xixuan Wu0bea9522018-05-08 17:49:19 -0700302
Xixuan Wu9af95a22018-05-18 10:46:42 -0700303 @param suite_handler: a cros_suite.SuiteHandler object.
Xixuan Wu2406be32018-05-14 13:51:30 -0700304 """
305 timeout_util = autotest.chromite_load('timeout_util')
Xixuan Wuf52e40d2018-06-14 12:10:44 -0700306 try:
Xixuan Wub0983632018-08-17 17:54:42 -0700307 with timeout_util.Timeout(suite_handler.timeout_mins * 60 -
308 suite_handler.passed_mins * 60):
Xixuan Wuf52e40d2018-06-14 12:10:44 -0700309 _loop_and_wait_forever(suite_handler, dry_run)
310 except timeout_util.TimeoutError:
311 logging.error('Timeout in waiting for child tasks.')
312 return
Xixuan Wu0bea9522018-05-08 17:49:19 -0700313
314 logging.info('Finished to wait for child tasks.')
Xixuan Wu56424bc2018-05-15 11:03:27 -0700315
316
Xixuan Wu9af95a22018-05-18 10:46:42 -0700317def _retry_test(suite_handler, task_id, dry_run=False):
Xixuan Wu56424bc2018-05-15 11:03:27 -0700318 """Retry test for a suite.
319
320 We will execute the following actions for retrying a test:
321 1. Schedule the test.
322 2. Add the test with the new swarming task id to the suite's
323 retry handler, but reduce its remaining retries by 1.
324 3. Reduce the suite-level max retries by 1.
325 4. Remove prevous failed test from retry handler since it's not
326 actively monitored by the suite.
327
Xixuan Wu9af95a22018-05-18 10:46:42 -0700328 @param suite_handler: a cros_suite.SuiteHandler object.
Xixuan Wu56424bc2018-05-15 11:03:27 -0700329 @param task_id: The swarming task id for the retried test.
330 @param dry_run: Whether to retry a dry run of the test.
331 """
Xixuan Wu5811e832018-07-12 11:56:24 -0700332 last_retry_spec = suite_handler.get_test_by_task_id(task_id)
Xixuan Wu56424bc2018-05-15 11:03:27 -0700333 logging.info('Retrying test %s, remaining %d retries.',
Xixuan Wu5811e832018-07-12 11:56:24 -0700334 last_retry_spec.test_spec.test.name,
335 last_retry_spec.remaining_retries - 1)
Aviv Keshetd9935102019-03-18 14:44:28 -0700336 retried_task_id = _create_test_task(
Xixuan Wub60c1432019-03-07 17:15:39 +0000337 last_retry_spec.test_spec,
338 suite_id=suite_handler.suite_id,
339 is_provision=suite_handler.is_provision(),
340 dry_run=dry_run)
Xixuan Wu5811e832018-07-12 11:56:24 -0700341 previous_retried_ids = last_retry_spec.previous_retried_ids + [task_id]
Xixuan Wu9af95a22018-05-18 10:46:42 -0700342 suite_handler.add_test_by_task_id(
343 retried_task_id,
Xixuan Wu9d5d7032018-07-12 16:44:02 -0700344 cros_suite.TestHandlerSpec(
Xixuan Wu5811e832018-07-12 11:56:24 -0700345 test_spec=last_retry_spec.test_spec,
346 remaining_retries=last_retry_spec.remaining_retries - 1,
Xixuan Wu9af95a22018-05-18 10:46:42 -0700347 previous_retried_ids=previous_retried_ids))
348 suite_handler.set_max_retries(suite_handler.max_retries - 1)
349 suite_handler.remove_test_by_task_id(task_id)
Xixuan Wu7a450c52018-07-20 15:07:51 -0700350
351
352def _convert_dict_to_string(input_dict):
353 """Convert dictionary to a string.
354
355 @param input_dict: A dictionary.
356 """
357 for k, v in input_dict.iteritems():
358 if isinstance(v, dict):
359 input_dict[k] = _convert_dict_to_string(v)
360 else:
361 input_dict[k] = str(v)
362
363 return json.dumps(input_dict)