blob: c5325e617432b6f335bf1fc808cf1bef1f175ea2 [file] [log] [blame]
Xixuan Wuc7bf77c2018-04-24 12:05:40 -07001# Copyright 2018 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5"""Module for CrOS dynamic test suite generation and execution."""
6
7from __future__ import absolute_import
8from __future__ import division
9from __future__ import print_function
10
Xixuan Wu80795c82018-06-12 11:56:17 -070011import contextlib
12import itertools
Xixuan Wu0bea9522018-05-08 17:49:19 -070013import json
Xixuan Wu6c041332018-05-07 16:04:36 -070014import logging
Xixuan Wue71c8932018-05-07 17:18:34 -070015import os
Aviv Keshetf0951212019-03-18 14:54:32 -070016import re
Xixuan Wu0bea9522018-05-08 17:49:19 -070017import time
Xixuan Wu6c041332018-05-07 16:04:36 -070018
Xixuan Wue71c8932018-05-07 17:18:34 -070019from lucifer import autotest
Xixuan Wu9af95a22018-05-18 10:46:42 -070020from skylab_suite import cros_suite
Xixuan Wu0bea9522018-05-08 17:49:19 -070021from skylab_suite import swarming_lib
Xixuan Wuc7bf77c2018-04-24 12:05:40 -070022
Xixuan Wu0bea9522018-05-08 17:49:19 -070023
Allen Li1ccca8f2018-08-29 12:11:06 -070024SKYLAB_DRONE_SWARMING_WORKER = '/opt/infra-tools/skylab_swarming_worker'
Aviv Keshetf0951212019-03-18 14:54:32 -070025SKYLAB_SUITE_USER = 'skylab_suite_runner'
26SKYLAB_TOOL = '/opt/infra-tools/skylab'
Xixuan Wuc7bf77c2018-04-24 12:05:40 -070027
Xixuan Wu80795c82018-06-12 11:56:17 -070028SUITE_WAIT_SLEEP_INTERVAL_SECONDS = 30
29
Xixuan Wu79d14662018-08-20 11:15:41 -070030# See #5 in crbug.com/873886 for more details.
31_NOT_SUPPORTED_DEPENDENCIES = ['skip_provision', 'cleanup-reboot', 'rpm',
32 'modem_repair']
33
Xixuan Wue71c8932018-05-07 17:18:34 -070034
Aviv Keshetd3adbfa2019-03-19 11:43:24 -070035def run(client, test_specs, suite_handler, dry_run=False):
Xixuan Wu56424bc2018-05-15 11:03:27 -070036 """Run a CrOS dynamic test suite.
37
Aviv Keshetd3adbfa2019-03-19 11:43:24 -070038 @param client: A swarming_lib.Client instance.
Xixuan Wu5811e832018-07-12 11:56:24 -070039 @param test_specs: A list of cros_suite.TestSpec objects.
Xixuan Wu9af95a22018-05-18 10:46:42 -070040 @param suite_handler: A cros_suite.SuiteHandler object.
Xixuan Wu56424bc2018-05-15 11:03:27 -070041 @param dry_run: Whether to kick off dry runs of the tests.
42 """
Aviv Keshetd3adbfa2019-03-19 11:43:24 -070043 assert isinstance(client, swarming_lib.Client)
Xixuan Wuc7430712018-07-10 12:04:34 -070044 if suite_handler.suite_id:
45 # Resume an existing suite.
Aviv Keshetd3adbfa2019-03-19 11:43:24 -070046 _resume_suite(client, test_specs, suite_handler, dry_run)
Xixuan Wuc7430712018-07-10 12:04:34 -070047 else:
48 # Make a new suite.
Xixuan Wu5811e832018-07-12 11:56:24 -070049 _run_suite(test_specs, suite_handler, dry_run)
Xixuan Wuc7430712018-07-10 12:04:34 -070050
51
Aviv Keshetd3adbfa2019-03-19 11:43:24 -070052def _resume_suite(client, test_specs, suite_handler, dry_run=False):
Xixuan Wuc7430712018-07-10 12:04:34 -070053 """Resume a suite and its child tasks by given suite id."""
Aviv Keshetd3adbfa2019-03-19 11:43:24 -070054 assert isinstance(client, swarming_lib.Client)
Xixuan Wuc7430712018-07-10 12:04:34 -070055 suite_id = suite_handler.suite_id
Aviv Keshetd3adbfa2019-03-19 11:43:24 -070056 all_tasks = client.get_child_tasks(suite_id)
Xixuan Wuc7430712018-07-10 12:04:34 -070057 not_yet_scheduled = _get_unscheduled_test_specs(
Xixuan Wu6c1866b2018-07-12 17:04:39 -070058 test_specs, suite_handler, all_tasks)
Xixuan Wuc7430712018-07-10 12:04:34 -070059
60 logging.info('Not yet scheduled test_specs: %r', not_yet_scheduled)
Aviv Keshetd9935102019-03-18 14:44:28 -070061 _create_test_tasks(not_yet_scheduled, suite_handler, suite_id, dry_run)
Xixuan Wuc7430712018-07-10 12:04:34 -070062
63 if suite_id is not None and suite_handler.should_wait():
64 _wait_for_results(suite_handler, dry_run=dry_run)
65
66
Xixuan Wu5811e832018-07-12 11:56:24 -070067def _get_unscheduled_test_specs(test_specs, suite_handler, all_tasks):
Xixuan Wuc7430712018-07-10 12:04:34 -070068 not_yet_scheduled = []
Xixuan Wu5811e832018-07-12 11:56:24 -070069 for test_spec in test_specs:
Xixuan Wuc7430712018-07-10 12:04:34 -070070 if suite_handler.is_provision():
Xixuan Wu58bbb642018-07-12 14:12:14 -070071 # We cannot check bot_id because pending tasks do not have it yet.
72 bot_id_tag = 'id:%s' % test_spec.bot_id
73 tasks = [t for t in all_tasks if bot_id_tag in t['tags']]
Xixuan Wuc7430712018-07-10 12:04:34 -070074 else:
Xixuan Wu5811e832018-07-12 11:56:24 -070075 tasks = [t for t in all_tasks if t['name']==test_spec.test.name]
Xixuan Wuc7430712018-07-10 12:04:34 -070076
77 if not tasks:
Xixuan Wu5811e832018-07-12 11:56:24 -070078 not_yet_scheduled.append(test_spec)
Xixuan Wuc7430712018-07-10 12:04:34 -070079 continue
80
81 current_task = _get_current_task(tasks)
82 test_task_id = (current_task['task_id'] if current_task
83 else tasks[0]['task_id'])
Xixuan Wu5811e832018-07-12 11:56:24 -070084 remaining_retries = test_spec.test.job_retries - len(tasks)
Xixuan Wuc7430712018-07-10 12:04:34 -070085 previous_retried_ids = [t['task_id'] for t in tasks
86 if t['task_id'] != test_task_id]
87 suite_handler.add_test_by_task_id(
88 test_task_id,
Xixuan Wu9d5d7032018-07-12 16:44:02 -070089 cros_suite.TestHandlerSpec(
Xixuan Wu5811e832018-07-12 11:56:24 -070090 test_spec=test_spec,
Xixuan Wuc7430712018-07-10 12:04:34 -070091 remaining_retries=remaining_retries,
92 previous_retried_ids=previous_retried_ids))
93
94 return not_yet_scheduled
95
96
97def _get_current_task(tasks):
98 """Get current running task.
99
100 @param tasks: A list of task dicts including task_id, state, etc.
101
102 @return a dict representing the current running task.
103 """
104 current_task = None
105 for t in tasks:
106 if t['state'] not in swarming_lib.TASK_FINISHED_STATUS:
107 if current_task:
108 raise ValueError(
109 'Parent task has 2 same running child tasks: %s, %s'
110 % (current_task['task_id'], t['task_id']))
111
112 current_task = t
113
114 return current_task
115
116
Xixuan Wu5811e832018-07-12 11:56:24 -0700117def _run_suite(test_specs, suite_handler, dry_run=False):
Xixuan Wuc7430712018-07-10 12:04:34 -0700118 """Make a new suite."""
Xixuan Wu56424bc2018-05-15 11:03:27 -0700119 suite_id = os.environ.get('SWARMING_TASK_ID')
Aviv Keshetd9935102019-03-18 14:44:28 -0700120 _create_test_tasks(test_specs, suite_handler, suite_id, dry_run)
Xixuan Wua79b5f72018-12-26 12:29:39 -0800121 suite_handler.set_suite_id(suite_id)
Xixuan Wuc7430712018-07-10 12:04:34 -0700122
123 if suite_id is not None and suite_handler.should_wait():
Xixuan Wuc7430712018-07-10 12:04:34 -0700124 _wait_for_results(suite_handler, dry_run=dry_run)
125
126
Aviv Keshetd9935102019-03-18 14:44:28 -0700127def _create_test_tasks(test_specs, suite_handler, suite_id, dry_run=False):
128 """Create test tasks for a list of tests (TestSpecs).
Xixuan Wuc7430712018-07-10 12:04:34 -0700129
Xixuan Wu5811e832018-07-12 11:56:24 -0700130 Given a list of TestSpec object, this function will schedule them on
Xixuan Wuc7430712018-07-10 12:04:34 -0700131 swarming one by one, and add them to the swarming_task_id-to-test map
132 of suite_handler to keep monitoring them.
133
Xixuan Wu5811e832018-07-12 11:56:24 -0700134 @param test_specs: A list of cros_suite.TestSpec objects to schedule.
Xixuan Wuc7430712018-07-10 12:04:34 -0700135 @param suite_handler: A cros_suite.SuiteHandler object to monitor the
136 test_specs' progress.
137 @param suite_id: A string ID for a suite task, it's the parent task id for
138 these to-be-scheduled test_specs.
139 @param dry_run: Whether to kick off dry runs of the tests.
140 """
Xixuan Wu5811e832018-07-12 11:56:24 -0700141 for test_spec in test_specs:
Aviv Keshetd9935102019-03-18 14:44:28 -0700142 test_task_id = _create_test_task(
Xixuan Wu5811e832018-07-12 11:56:24 -0700143 test_spec,
Xixuan Wu5cb5a402018-06-04 16:37:23 -0700144 suite_id=suite_id,
Xixuan Wu814ceb62018-08-27 15:47:34 -0700145 is_provision=suite_handler.is_provision(),
Xixuan Wu5cb5a402018-06-04 16:37:23 -0700146 dry_run=dry_run)
Xixuan Wu9af95a22018-05-18 10:46:42 -0700147 suite_handler.add_test_by_task_id(
148 test_task_id,
Xixuan Wu9d5d7032018-07-12 16:44:02 -0700149 cros_suite.TestHandlerSpec(
Xixuan Wu5811e832018-07-12 11:56:24 -0700150 test_spec=test_spec,
151 remaining_retries=test_spec.test.job_retries - 1,
Xixuan Wu9af95a22018-05-18 10:46:42 -0700152 previous_retried_ids=[]))
Xixuan Wu56424bc2018-05-15 11:03:27 -0700153
Xixuan Wu56424bc2018-05-15 11:03:27 -0700154
Aviv Keshetd9935102019-03-18 14:44:28 -0700155def _create_test_task(test_spec, suite_id=None,
156 is_provision=False, dry_run=False):
157 """Create a test task for a given test spec.
Xixuan Wuc7bf77c2018-04-24 12:05:40 -0700158
Xixuan Wu5811e832018-07-12 11:56:24 -0700159 @param test_spec: A cros_suite.TestSpec object.
Xixuan Wu56424bc2018-05-15 11:03:27 -0700160 @param suite_id: the suite task id of the test.
Aviv Keshetf0951212019-03-18 14:54:32 -0700161 @param dry_run: If true, don't actually create task.
Xixuan Wu56424bc2018-05-15 11:03:27 -0700162
163 @return the swarming task id of this task.
Xixuan Wuc7bf77c2018-04-24 12:05:40 -0700164 """
Aviv Keshetd9935102019-03-18 14:44:28 -0700165 logging.info('Creating task for test %s', test_spec.test.name)
Aviv Keshet2c25d062019-03-19 13:18:23 -0700166 skylab_tool_path = os.environ.get('SKYLAB_TOOL', SKYLAB_TOOL)
Aviv Keshetf0951212019-03-18 14:54:32 -0700167
168 cmd = [
Aviv Keshet2c25d062019-03-19 13:18:23 -0700169 skylab_tool_path, 'create-test',
Aviv Keshetf0951212019-03-18 14:54:32 -0700170 '-board', test_spec.board,
171 '-image', test_spec.build,
172 ]
173 if test_spec.pool:
174 # TODO(akeshet): Clean up this hack around pool name translation.
175 autotest_pool_label = 'pool:%s' % test_spec.pool
176 pool_dependency_value = swarming_lib.task_dependencies_from_labels(
177 [autotest_pool_label])['label-pool']
178 cmd += ['-pool', pool_dependency_value]
179
180 if test_spec.model:
181 cmd += ['-model', test_spec.model]
182 if test_spec.quota_account:
183 cmd += ['-qs-account', test_spec.quota_account]
184 if test_spec.test.test_type.lower() == 'client':
185 cmd += ['-client-test']
186
187 tags = _compute_tags(test_spec.build, suite_id)
188 dimensions = _compute_dimensions(
189 test_spec.bot_id, test_spec.test.dependencies)
190 keyvals_flat = _compute_job_keyvals_flat(test_spec.keyvals, suite_id)
191
192 for tag in tags:
193 cmd += ['-tag', tag]
194 for keyval in keyvals_flat:
195 cmd += ['-keyval', keyval]
196 cmd += [test_spec.test.name]
197 cmd += dimensions
198
Xixuan Wue71c8932018-05-07 17:18:34 -0700199 if dry_run:
Aviv Keshetf0951212019-03-18 14:54:32 -0700200 logging.info('Would have created task with command %s', cmd)
201 return
Xixuan Wue71c8932018-05-07 17:18:34 -0700202
Aviv Keshetf0951212019-03-18 14:54:32 -0700203 # TODO(akeshet): Avoid this late chromite import.
204 cros_build_lib = autotest.chromite_load('cros_build_lib')
205 result = cros_build_lib.RunCommand(cmd, capture_output=True)
206 # TODO(akeshet): Use -json flag and json-parse output of the command instead
207 # of regex matching to determine task_id.
208 m = re.match('.*id=(.*)$', result.output)
209 task_id = m.group(1)
210 logging.info('Created task with id %s', task_id)
211 return task_id
Xixuan Wu3dea7cf2018-12-10 17:50:45 -0800212
Aviv Keshetf0951212019-03-18 14:54:32 -0700213
214def _compute_tags(build, suite_id):
215 tags = [
216 'build:%s' % build,
217 ]
218 if suite_id is not None:
219 tags += ['parent_task_id:%s' % suite_id]
220 return tags
221
222
223def _compute_dimensions(bot_id, dependencies):
224 dimensions = []
225 if bot_id:
226 dimensions += ['id:%s' % bot_id]
227 deps = _filter_unsupported_dependencies(dependencies)
228 flattened_swarming_deps = sorted([
229 '%s:%s' % (k, v) for
230 k, v in swarming_lib.task_dependencies_from_labels(deps).items()
231 ])
232 dimensions += flattened_swarming_deps
233 return dimensions
234
235
236def _compute_job_keyvals_flat(keyvals, suite_id):
237 # Job keyvals calculation.
238 job_keyvals = keyvals.copy()
239 if suite_id is not None:
240 # TODO(akeshet): Avoid this late autotest constants import.
241 constants = autotest.load('server.cros.dynamic_suite.constants')
242 job_keyvals[constants.PARENT_JOB_ID] = suite_id
243 keyvals_flat = sorted(
244 ['%s:%s' % (k, v) for k, v in job_keyvals.items()])
245 return keyvals_flat
246
247
248def _filter_unsupported_dependencies(dependencies):
249 """Filter out Skylab-unsupported test dependencies, with a warning."""
Xixuan Wudb053c82019-01-31 20:07:06 -0800250 deps = []
Aviv Keshetf0951212019-03-18 14:54:32 -0700251 for dep in dependencies:
Xixuan Wu79d14662018-08-20 11:15:41 -0700252 if dep in _NOT_SUPPORTED_DEPENDENCIES:
253 logging.warning('Dependency %s is not supported in skylab', dep)
Aviv Keshetf0951212019-03-18 14:54:32 -0700254 else:
255 deps.append(dep)
256 return deps
Xixuan Wu0bea9522018-05-08 17:49:19 -0700257
258
Xixuan Wu80795c82018-06-12 11:56:17 -0700259@contextlib.contextmanager
Xixuan Wuf52e40d2018-06-14 12:10:44 -0700260def disable_logging(logging_level):
261 """Context manager for disabling logging of a given logging level."""
Xixuan Wu80795c82018-06-12 11:56:17 -0700262 try:
Xixuan Wuf52e40d2018-06-14 12:10:44 -0700263 logging.disable(logging_level)
Xixuan Wu80795c82018-06-12 11:56:17 -0700264 yield
265 finally:
266 logging.disable(logging.NOTSET)
267
268
Xixuan Wuf52e40d2018-06-14 12:10:44 -0700269def _loop_and_wait_forever(suite_handler, dry_run):
270 """Wait for child tasks to finish or break."""
271 for iterations in itertools.count(0):
272 # Log progress every 300 seconds.
273 no_logging = bool(iterations * SUITE_WAIT_SLEEP_INTERVAL_SECONDS % 300)
274 with disable_logging(logging.INFO if no_logging else logging.NOTSET):
Xixuan Wuc6e28d32018-08-27 14:48:14 -0700275 suite_handler.handle_results(suite_handler.suite_id)
Xixuan Wuf52e40d2018-06-14 12:10:44 -0700276 if suite_handler.is_finished_waiting():
277 break
Xixuan Wu80795c82018-06-12 11:56:17 -0700278
Xixuan Wu4d5d0142018-08-27 15:26:58 -0700279 for t in suite_handler.retried_tasks:
280 _retry_test(suite_handler, t['task_id'], dry_run=dry_run)
281
Xixuan Wuf52e40d2018-06-14 12:10:44 -0700282 time.sleep(SUITE_WAIT_SLEEP_INTERVAL_SECONDS)
Xixuan Wu80795c82018-06-12 11:56:17 -0700283
284
Xixuan Wu9af95a22018-05-18 10:46:42 -0700285def _wait_for_results(suite_handler, dry_run=False):
Xixuan Wu2406be32018-05-14 13:51:30 -0700286 """Wait for child tasks to finish and return their results.
Xixuan Wu0bea9522018-05-08 17:49:19 -0700287
Xixuan Wu9af95a22018-05-18 10:46:42 -0700288 @param suite_handler: a cros_suite.SuiteHandler object.
Xixuan Wu2406be32018-05-14 13:51:30 -0700289 """
290 timeout_util = autotest.chromite_load('timeout_util')
Xixuan Wuf52e40d2018-06-14 12:10:44 -0700291 try:
Xixuan Wub0983632018-08-17 17:54:42 -0700292 with timeout_util.Timeout(suite_handler.timeout_mins * 60 -
293 suite_handler.passed_mins * 60):
Xixuan Wuf52e40d2018-06-14 12:10:44 -0700294 _loop_and_wait_forever(suite_handler, dry_run)
295 except timeout_util.TimeoutError:
296 logging.error('Timeout in waiting for child tasks.')
297 return
Xixuan Wu0bea9522018-05-08 17:49:19 -0700298
299 logging.info('Finished to wait for child tasks.')
Xixuan Wu56424bc2018-05-15 11:03:27 -0700300
301
Xixuan Wu9af95a22018-05-18 10:46:42 -0700302def _retry_test(suite_handler, task_id, dry_run=False):
Xixuan Wu56424bc2018-05-15 11:03:27 -0700303 """Retry test for a suite.
304
305 We will execute the following actions for retrying a test:
306 1. Schedule the test.
307 2. Add the test with the new swarming task id to the suite's
308 retry handler, but reduce its remaining retries by 1.
309 3. Reduce the suite-level max retries by 1.
310 4. Remove prevous failed test from retry handler since it's not
311 actively monitored by the suite.
312
Xixuan Wu9af95a22018-05-18 10:46:42 -0700313 @param suite_handler: a cros_suite.SuiteHandler object.
Xixuan Wu56424bc2018-05-15 11:03:27 -0700314 @param task_id: The swarming task id for the retried test.
315 @param dry_run: Whether to retry a dry run of the test.
316 """
Xixuan Wu5811e832018-07-12 11:56:24 -0700317 last_retry_spec = suite_handler.get_test_by_task_id(task_id)
Xixuan Wu56424bc2018-05-15 11:03:27 -0700318 logging.info('Retrying test %s, remaining %d retries.',
Xixuan Wu5811e832018-07-12 11:56:24 -0700319 last_retry_spec.test_spec.test.name,
320 last_retry_spec.remaining_retries - 1)
Aviv Keshetd9935102019-03-18 14:44:28 -0700321 retried_task_id = _create_test_task(
Xixuan Wub60c1432019-03-07 17:15:39 +0000322 last_retry_spec.test_spec,
323 suite_id=suite_handler.suite_id,
324 is_provision=suite_handler.is_provision(),
325 dry_run=dry_run)
Xixuan Wu5811e832018-07-12 11:56:24 -0700326 previous_retried_ids = last_retry_spec.previous_retried_ids + [task_id]
Xixuan Wu9af95a22018-05-18 10:46:42 -0700327 suite_handler.add_test_by_task_id(
328 retried_task_id,
Xixuan Wu9d5d7032018-07-12 16:44:02 -0700329 cros_suite.TestHandlerSpec(
Xixuan Wu5811e832018-07-12 11:56:24 -0700330 test_spec=last_retry_spec.test_spec,
331 remaining_retries=last_retry_spec.remaining_retries - 1,
Xixuan Wu9af95a22018-05-18 10:46:42 -0700332 previous_retried_ids=previous_retried_ids))
333 suite_handler.set_max_retries(suite_handler.max_retries - 1)
334 suite_handler.remove_test_by_task_id(task_id)
Xixuan Wu7a450c52018-07-20 15:07:51 -0700335
336
337def _convert_dict_to_string(input_dict):
338 """Convert dictionary to a string.
339
340 @param input_dict: A dictionary.
341 """
342 for k, v in input_dict.iteritems():
343 if isinstance(v, dict):
344 input_dict[k] = _convert_dict_to_string(v)
345 else:
346 input_dict[k] = str(v)
347
348 return json.dumps(input_dict)