blob: da4cf45fd5a27207887aabd324ff58705dc71673 [file] [log] [blame]
Xixuan Wuc7bf77c2018-04-24 12:05:40 -07001# Copyright 2018 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5"""Definition of a CrOS suite in skylab.
6
7This file is a simplicication of dynamic_suite.suite without any useless
8features for skylab suite.
9
10Suite class in this file mainly has 2 features:
11 1. Integrate parameters from control file & passed in arguments.
12 2. Find proper child tests for a given suite.
13
14Use case:
15 See _run_suite() in skylab_suite.run_suite_skylab.
16"""
17
18from __future__ import absolute_import
19from __future__ import division
20from __future__ import print_function
21
Xixuan Wu7cc10e52018-04-25 17:04:51 -070022import collections
Xixuan Wu2406be32018-05-14 13:51:30 -070023import logging
Xixuan Wu7cc10e52018-04-25 17:04:51 -070024
Xixuan Wu48c45b92018-04-26 11:09:35 -070025from lucifer import autotest
Xixuan Wu2406be32018-05-14 13:51:30 -070026from skylab_suite import swarming_lib
Xixuan Wu48c45b92018-04-26 11:09:35 -070027
Xixuan Wu7cc10e52018-04-25 17:04:51 -070028
Xixuan Wu9287fda2018-07-12 16:22:06 -070029SuiteSpec = collections.namedtuple(
30 'SuiteSpec',
Xixuan Wu7cc10e52018-04-25 17:04:51 -070031 [
32 'builds',
Xixuan Wu6c041332018-05-07 16:04:36 -070033 'suite_name',
Xixuan Wue3e362f2018-04-26 16:34:28 -070034 'suite_file_name',
Xixuan Wu7cc10e52018-04-25 17:04:51 -070035 'test_source_build',
Xixuan Wu2406be32018-05-14 13:51:30 -070036 'suite_args',
Xixuan Wu70217a92018-06-04 16:43:42 -070037 'priority',
Xixuan Wu77d4a592018-06-08 10:40:57 -070038 'board',
39 'pool',
Xixuan Wub2795662018-06-28 16:02:53 -070040 'job_keyvals',
Xixuan Wu56424bc2018-05-15 11:03:27 -070041 ])
42
Xixuan Wu834cb4b2018-07-12 16:33:49 -070043SuiteHandlerSpec = collections.namedtuple(
44 'SuiteHandlerSpec',
Xixuan Wu56424bc2018-05-15 11:03:27 -070045 [
Xixuan Wu08354a02018-08-01 09:15:26 -070046 'suite_name',
Xixuan Wuf2da1952018-07-10 10:19:42 -070047 'wait',
Xixuan Wuc7430712018-07-10 12:04:34 -070048 'suite_id',
Xixuan Wu2406be32018-05-14 13:51:30 -070049 'timeout_mins',
Xixuan Wu56424bc2018-05-15 11:03:27 -070050 'test_retry',
51 'max_retries',
52 'provision_num_required',
Xixuan Wu7cc10e52018-04-25 17:04:51 -070053 ])
54
Xixuan Wu9d5d7032018-07-12 16:44:02 -070055TestHandlerSpec = collections.namedtuple(
56 'TestHandlerSpec',
Xixuan Wu5cb5a402018-06-04 16:37:23 -070057 [
Xixuan Wu5811e832018-07-12 11:56:24 -070058 'test_spec',
Xixuan Wu5cb5a402018-06-04 16:37:23 -070059 'remaining_retries',
60 'previous_retried_ids',
61 ])
62
Xixuan Wu9d5d7032018-07-12 16:44:02 -070063TestSpec = collections.namedtuple(
Xixuan Wu5811e832018-07-12 11:56:24 -070064 'TestSpec',
Xixuan Wu9af95a22018-05-18 10:46:42 -070065 [
66 'test',
Xixuan Wu70217a92018-06-04 16:43:42 -070067 'priority',
Xixuan Wu77d4a592018-06-08 10:40:57 -070068 'board',
69 'pool',
Xixuan Wu5cb5a402018-06-04 16:37:23 -070070 'build',
Xixuan Wub2795662018-06-28 16:02:53 -070071 'keyvals',
Xixuan Wu0c01b092018-06-13 14:12:55 -070072 'bot_id',
Xixuan Wuff19abe2018-06-20 10:44:45 -070073 'dut_name',
Xixuan Wu5cb5a402018-06-04 16:37:23 -070074 'expiration_secs',
75 'grace_period_secs',
76 'execution_timeout_secs',
77 'io_timeout_secs',
Xixuan Wu9af95a22018-05-18 10:46:42 -070078 ])
79
Xixuan Wu7cc10e52018-04-25 17:04:51 -070080
Xixuan Wue3e362f2018-04-26 16:34:28 -070081class NonValidPropertyError(Exception):
82 """Raised if a suite's property is not valid."""
83
84
Xixuan Wu9af95a22018-05-18 10:46:42 -070085class SuiteHandler(object):
86 """The class for handling a CrOS suite run.
87
88 Its responsibility includes handling retries for child tests.
89 """
Xixuan Wu2406be32018-05-14 13:51:30 -070090
Xixuan Wu56424bc2018-05-15 11:03:27 -070091 def __init__(self, specs):
Xixuan Wu08354a02018-08-01 09:15:26 -070092 self._suite_name = specs.suite_name
Xixuan Wuf2da1952018-07-10 10:19:42 -070093 self._wait = specs.wait
Xixuan Wu9af95a22018-05-18 10:46:42 -070094 self._timeout_mins = specs.timeout_mins
95 self._provision_num_required = specs.provision_num_required
96 self._test_retry = specs.test_retry
97 self._max_retries = specs.max_retries
Xixuan Wu56424bc2018-05-15 11:03:27 -070098
Xixuan Wuc7430712018-07-10 12:04:34 -070099 self._suite_id = specs.suite_id
Xixuan Wu9af95a22018-05-18 10:46:42 -0700100 self._task_to_test_maps = {}
Xixuan Wu415e8212018-06-04 17:01:12 -0700101 self.successfully_provisioned_duts = set()
Xixuan Wu9af95a22018-05-18 10:46:42 -0700102
103 # It only maintains the swarming task of the final run of each
104 # child task, i.e. it doesn't include failed swarming tasks of
105 # each child task which will get retried later.
Xixuan Wu2406be32018-05-14 13:51:30 -0700106 self._active_child_tasks = []
107
Xixuan Wu9af95a22018-05-18 10:46:42 -0700108 def should_wait(self):
109 """Return whether to wait for a suite's result."""
110 return self._wait
111
Xixuan Wu415e8212018-06-04 17:01:12 -0700112 def is_provision(self):
113 """Return whether the suite handler is for provision suite."""
Xixuan Wu08354a02018-08-01 09:15:26 -0700114 return self._suite_name == 'provision'
Xixuan Wu415e8212018-06-04 17:01:12 -0700115
Xixuan Wu9af95a22018-05-18 10:46:42 -0700116 def set_suite_id(self, suite_id):
117 """Set swarming task id for a suite.
118
119 @param suite_id: The swarming task id of this suite.
120 """
121 self._suite_id = suite_id
122
Xixuan Wu9d5d7032018-07-12 16:44:02 -0700123 def add_test_by_task_id(self, task_id, test_handler_spec):
Xixuan Wu9af95a22018-05-18 10:46:42 -0700124 """Record a child test and its swarming task id.
125
126 @param task_id: the swarming task id of a child test.
Xixuan Wu9d5d7032018-07-12 16:44:02 -0700127 @param test_handler_spec: a TestHandlerSpec object.
Xixuan Wu9af95a22018-05-18 10:46:42 -0700128 """
Xixuan Wu9d5d7032018-07-12 16:44:02 -0700129 self._task_to_test_maps[task_id] = test_handler_spec
Xixuan Wu9af95a22018-05-18 10:46:42 -0700130
131 def get_test_by_task_id(self, task_id):
132 """Get a child test by its swarming task id.
133
134 @param task_id: the swarming task id of a child test.
135 """
136 return self._task_to_test_maps[task_id]
137
138 def remove_test_by_task_id(self, task_id):
139 """Delete a child test by its swarming task id.
140
141 @param task_id: the swarming task id of a child test.
142 """
143 self._task_to_test_maps.pop(task_id, None)
144
145 def set_max_retries(self, max_retries):
146 """Set the max retries for a suite.
147
148 @param max_retries: The current maximum retries to set.
149 """
150 self._max_retries = max_retries
151
152 @property
153 def timeout_mins(self):
154 """Get the timeout minutes of a suite."""
155 return self._timeout_mins
156
157 @property
158 def suite_id(self):
159 """Get the swarming task id of a suite."""
160 return self._suite_id
161
162 @property
163 def max_retries(self):
164 """Get the max num of retries of a suite."""
165 return self._max_retries
166
167 @property
168 def active_child_tasks(self):
169 """Get the child tasks which is actively monitored by a suite.
170
171 The active child tasks list includes tasks which are currently running
172 or finished without following retries. E.g.
173 Suite task X:
174 child task 1: x1 (first try x1_1, second try x1_2)
175 child task 2: x2 (first try: x2_1)
176 The final active child task list will include task x1_2 and x2_1, won't
177 include x1_1 since it's a task which is finished but get retried later.
178 """
179 return self._active_child_tasks
Xixuan Wu2406be32018-05-14 13:51:30 -0700180
181 def handle_results(self, all_tasks):
182 """Handle child tasks' results."""
Xixuan Wu56424bc2018-05-15 11:03:27 -0700183 self._active_child_tasks = [t for t in all_tasks if t['task_id'] in
Xixuan Wu9af95a22018-05-18 10:46:42 -0700184 self._task_to_test_maps]
Xixuan Wu56424bc2018-05-15 11:03:27 -0700185 self.retried_tasks = [t for t in all_tasks if self._should_retry(t)]
186 logging.info('Found %d tests to be retried.', len(self.retried_tasks))
Xixuan Wu2406be32018-05-14 13:51:30 -0700187
Xixuan Wu415e8212018-06-04 17:01:12 -0700188 def _check_all_tasks_finished(self):
189 """Check whether all tasks are finished, including retried tasks."""
Xixuan Wu56424bc2018-05-15 11:03:27 -0700190 finished_tasks = [t for t in self._active_child_tasks if
191 t['state'] in swarming_lib.TASK_FINISHED_STATUS]
192 logging.info('%d/%d child tasks finished, %d got retried.',
193 len(finished_tasks), len(self._active_child_tasks),
194 len(self.retried_tasks))
195 return (len(finished_tasks) == len(self._active_child_tasks)
196 and not self.retried_tasks)
197
Xixuan Wu415e8212018-06-04 17:01:12 -0700198 def _set_successful_provisioned_duts(self):
199 """Set successfully provisioned duts."""
200 for t in self._active_child_tasks:
201 if (swarming_lib.get_task_final_state(t) ==
202 swarming_lib.TASK_COMPLETED_SUCCESS):
Xixuan Wuff19abe2018-06-20 10:44:45 -0700203 dut_name = self.get_test_by_task_id(
Xixuan Wu5811e832018-07-12 11:56:24 -0700204 t['task_id']).test_spec.dut_name
Xixuan Wuff19abe2018-06-20 10:44:45 -0700205 if dut_name:
Xixuan Wu415e8212018-06-04 17:01:12 -0700206 self.successfully_provisioned_duts.add(dut_name)
207
208 def is_provision_successfully_finished(self):
209 """Check whether provision succeeds."""
210 logging.info('Found %d successfully provisioned duts, '
211 'the minimum requirement is %d',
212 len(self.successfully_provisioned_duts),
213 self._provision_num_required)
214 return (len(self.successfully_provisioned_duts) >=
215 self._provision_num_required)
216
217 def is_finished_waiting(self):
218 """Check whether the suite should finish its waiting."""
219 if self.is_provision():
220 self._set_successful_provisioned_duts()
221 return (self.is_provision_successfully_finished() or
222 self._check_all_tasks_finished())
223
224 return self._check_all_tasks_finished()
225
Xixuan Wu56424bc2018-05-15 11:03:27 -0700226 def _should_retry(self, test_result):
227 """Check whether a test should be retried.
228
229 We will retry a test if:
230 1. The test-level retry is enabled for this suite.
231 2. The test fails.
232 3. The test is currently monitored by the suite, i.e.
233 it's not a previous retried test.
234 4. The test has remaining retries based on JOB_RETRIES in
235 its control file.
236 5. The suite-level max retries isn't hit.
237
238 @param test_result: A json test result from swarming API.
239
240 @return True if we should retry the test.
241 """
242 task_id = test_result['task_id']
243 state = test_result['state']
244 is_failure = test_result['failure']
Xixuan Wu9af95a22018-05-18 10:46:42 -0700245 return (self._test_retry and
Xixuan Wu56424bc2018-05-15 11:03:27 -0700246 ((state == swarming_lib.TASK_COMPLETED and is_failure)
Xixuan Wuaff23c72018-06-14 12:10:44 -0700247 or (state in swarming_lib.TASK_STATUS_TO_RETRY))
Xixuan Wu9af95a22018-05-18 10:46:42 -0700248 and (task_id in self._task_to_test_maps)
249 and (self._task_to_test_maps[task_id].remaining_retries > 0)
250 and (self._max_retries > 0))
Xixuan Wu2406be32018-05-14 13:51:30 -0700251
252
Xixuan Wuc7bf77c2018-04-24 12:05:40 -0700253class Suite(object):
254 """The class for a CrOS suite."""
Xixuan Wu606e2182018-06-14 11:30:35 -0700255 EXPIRATION_SECS = swarming_lib.DEFAULT_EXPIRATION_SECS
Xixuan Wuc7bf77c2018-04-24 12:05:40 -0700256
Xixuan Wu9287fda2018-07-12 16:22:06 -0700257 def __init__(self, spec):
Xixuan Wu7cc10e52018-04-25 17:04:51 -0700258 """Initialize a suite.
259
Xixuan Wu9287fda2018-07-12 16:22:06 -0700260 @param spec: A SuiteSpec object.
Xixuan Wu7cc10e52018-04-25 17:04:51 -0700261 """
Xixuan Wue3e362f2018-04-26 16:34:28 -0700262 self._ds = None
263
264 self.control_file = ''
Xixuan Wu5811e832018-07-12 11:56:24 -0700265 self.test_specs = []
Xixuan Wu9287fda2018-07-12 16:22:06 -0700266 self.builds = spec.builds
267 self.test_source_build = spec.test_source_build
268 self.suite_name = spec.suite_name
269 self.suite_file_name = spec.suite_file_name
270 self.priority = spec.priority
271 self.board = spec.board
272 self.pool = spec.pool
273 self.job_keyvals = spec.job_keyvals
Xixuan Wu48c45b92018-04-26 11:09:35 -0700274
Xixuan Wue3e362f2018-04-26 16:34:28 -0700275 @property
276 def ds(self):
277 """Getter for private |self._ds| property.
278
279 This ensures that once self.ds is called, there's a devserver ready
280 for it.
281 """
282 if self._ds is None:
283 raise NonValidPropertyError(
284 'Property self.ds is None. Please call stage_suite_artifacts() '
285 'before calling it.')
286
287 return self._ds
288
Xixuan Wub2795662018-06-28 16:02:53 -0700289 def _get_cros_build(self):
290 provision = autotest.load('server.cros.provision')
291 return self.builds.get(provision.CROS_VERSION_PREFIX,
292 self.builds.values()[0])
293
294 def _create_suite_keyvals(self):
295 constants = autotest.load('server.cros.dynamic_suite.constants')
296 provision = autotest.load('server.cros.provision')
297 cros_build = self._get_cros_build()
298 keyvals = {
299 constants.JOB_BUILD_KEY: cros_build,
300 constants.JOB_SUITE_KEY: self.suite_name,
301 constants.JOB_BUILDS_KEY: self.builds
302 }
303 if (cros_build != self.test_source_build or
304 len(self.builds) > 1):
305 keyvals[constants.JOB_TEST_SOURCE_BUILD_KEY] = (
306 self.test_source_build)
307 for prefix, build in self.builds.iteritems():
308 if prefix == provision.FW_RW_VERSION_PREFIX:
309 keyvals[constants.FWRW_BUILD]= build
310 elif prefix == provision.FW_RO_VERSION_PREFIX:
311 keyvals[constants.FWRO_BUILD] = build
312
313 for key in self.job_keyvals:
314 if key in constants.INHERITED_KEYVALS:
315 keyvals[key] = self.job_keyvals[key]
316
317 return keyvals
318
Xixuan Wue3e362f2018-04-26 16:34:28 -0700319 def prepare(self):
320 """Prepare a suite job for execution."""
321 self._stage_suite_artifacts()
322 self._parse_suite_args()
Xixuan Wub2795662018-06-28 16:02:53 -0700323 keyvals = self._create_suite_keyvals()
Xixuan Wu0c01b092018-06-13 14:12:55 -0700324 available_bots = self._get_available_bots()
325 tests = self._find_tests(available_bots_num=len(available_bots))
Xixuan Wu5811e832018-07-12 11:56:24 -0700326 self.test_specs = self._get_test_specs(tests, available_bots, keyvals)
Xixuan Wue3e362f2018-04-26 16:34:28 -0700327
Xixuan Wub2795662018-06-28 16:02:53 -0700328 def _get_test_specs(self, tests, available_bots, keyvals):
Xixuan Wu5811e832018-07-12 11:56:24 -0700329 test_specs = []
Xixuan Wu0c01b092018-06-13 14:12:55 -0700330 for idx, test in enumerate(tests):
Xixuan Wuff19abe2018-06-20 10:44:45 -0700331 if idx < len(available_bots):
332 bot_id = available_bots[idx]['bot_id']
333 dut_name = swarming_lib.get_task_dut_name(
334 available_bots[idx]['dimensions'])
335 else:
336 bot_id = ''
337 dut_name = ''
Xixuan Wu5811e832018-07-12 11:56:24 -0700338 test_specs.append(TestSpec(
Xixuan Wu5cb5a402018-06-04 16:37:23 -0700339 test=test,
Xixuan Wu70217a92018-06-04 16:43:42 -0700340 priority=self.priority,
Xixuan Wu77d4a592018-06-08 10:40:57 -0700341 board=self.board,
342 pool=self.pool,
Xixuan Wu5cb5a402018-06-04 16:37:23 -0700343 build=self.test_source_build,
Xixuan Wu0c01b092018-06-13 14:12:55 -0700344 bot_id=bot_id,
Xixuan Wuff19abe2018-06-20 10:44:45 -0700345 dut_name=dut_name,
Xixuan Wub2795662018-06-28 16:02:53 -0700346 keyvals=keyvals,
Xixuan Wu606e2182018-06-14 11:30:35 -0700347 expiration_secs=self.EXPIRATION_SECS,
Xixuan Wu5cb5a402018-06-04 16:37:23 -0700348 grace_period_secs=swarming_lib.DEFAULT_TIMEOUT_SECS,
349 execution_timeout_secs=swarming_lib.DEFAULT_TIMEOUT_SECS,
350 io_timeout_secs=swarming_lib.DEFAULT_TIMEOUT_SECS))
Xixuan Wue3e362f2018-04-26 16:34:28 -0700351
Xixuan Wu5811e832018-07-12 11:56:24 -0700352 return test_specs
Xixuan Wu0c01b092018-06-13 14:12:55 -0700353
Xixuan Wue3e362f2018-04-26 16:34:28 -0700354 def _stage_suite_artifacts(self):
Xixuan Wu48c45b92018-04-26 11:09:35 -0700355 """Stage suite control files and suite-to-tests mapping file.
356
357 @param build: The build to stage artifacts.
358 """
359 suite_common = autotest.load('server.cros.dynamic_suite.suite_common')
360 ds, _ = suite_common.stage_build_artifacts(self.test_source_build)
Xixuan Wue3e362f2018-04-26 16:34:28 -0700361 self._ds = ds
Xixuan Wu48c45b92018-04-26 11:09:35 -0700362
Xixuan Wue3e362f2018-04-26 16:34:28 -0700363 def _parse_suite_args(self):
Xixuan Wu48c45b92018-04-26 11:09:35 -0700364 """Get the suite args.
365
366 The suite args includes:
367 a. suite args in suite control file.
368 b. passed-in suite args by user.
369 """
Xixuan Wue3e362f2018-04-26 16:34:28 -0700370 suite_common = autotest.load('server.cros.dynamic_suite.suite_common')
371 self.control_file = suite_common.get_control_file_by_build(
372 self.test_source_build, self.ds, self.suite_file_name)
Xixuan Wu6c041332018-05-07 16:04:36 -0700373
Xixuan Wu0c01b092018-06-13 14:12:55 -0700374 def _find_tests(self, available_bots_num=0):
Xixuan Wu6c041332018-05-07 16:04:36 -0700375 """Fetch the child tests."""
376 control_file_getter = autotest.load(
377 'server.cros.dynamic_suite.control_file_getter')
378 suite_common = autotest.load('server.cros.dynamic_suite.suite_common')
Xixuan Wu6c041332018-05-07 16:04:36 -0700379 cf_getter = control_file_getter.DevServerGetter(
380 self.test_source_build, self.ds)
381 tests = suite_common.retrieve_for_suite(
382 cf_getter, self.suite_name)
Xixuan Wu5cb5a402018-06-04 16:37:23 -0700383 return suite_common.filter_tests(tests)
Xixuan Wu2406be32018-05-14 13:51:30 -0700384
Xixuan Wu0c01b092018-06-13 14:12:55 -0700385 def _get_available_bots(self):
386 """Get available bots for normal suites."""
387 return []
388
Xixuan Wu2406be32018-05-14 13:51:30 -0700389
390class ProvisionSuite(Suite):
391 """The class for a CrOS provision suite."""
Xixuan Wu6bd67ea2018-08-01 09:24:59 -0700392 EXPIRATION_SECS = swarming_lib.DEFAULT_EXPIRATION_SECS
Xixuan Wu2406be32018-05-14 13:51:30 -0700393
Xixuan Wu9287fda2018-07-12 16:22:06 -0700394 def __init__(self, spec):
395 super(ProvisionSuite, self).__init__(spec)
396 self._num_required = spec.suite_args['num_required']
Xixuan Wu2406be32018-05-14 13:51:30 -0700397
Xixuan Wu0c01b092018-06-13 14:12:55 -0700398 def _find_tests(self, available_bots_num=0):
Xixuan Wu2406be32018-05-14 13:51:30 -0700399 """Fetch the child tests for provision suite."""
400 control_file_getter = autotest.load(
401 'server.cros.dynamic_suite.control_file_getter')
402 suite_common = autotest.load('server.cros.dynamic_suite.suite_common')
Xixuan Wu2406be32018-05-14 13:51:30 -0700403 cf_getter = control_file_getter.DevServerGetter(
404 self.test_source_build, self.ds)
405 dummy_test = suite_common.retrieve_control_data_for_test(
406 cf_getter, 'dummy_Pass')
Xixuan Wu0c01b092018-06-13 14:12:55 -0700407 logging.info('Get %d available DUTs for provision.', available_bots_num)
408 return [dummy_test] * max(self._num_required, available_bots_num)
Xixuan Wucb469512018-06-08 15:17:23 -0700409
Xixuan Wu0c01b092018-06-13 14:12:55 -0700410 def _get_available_bots(self):
411 """Get available bots for provision suites."""
412 bots = swarming_lib.query_bots_list({
Xixuan Wucb469512018-06-08 15:17:23 -0700413 'pool': swarming_lib.SKYLAB_DRONE_POOL,
414 'label-pool': swarming_lib.SWARMING_DUT_POOL_MAP.get(self.pool),
415 'label-board': self.board})
Xixuan Wu0c01b092018-06-13 14:12:55 -0700416 return [bot for bot in bots if swarming_lib.bot_available(bot)]