blob: bc0eedf22f5ddf46922fde51173cb776a1c24d61 [file] [log] [blame]
Xixuan Wuc7bf77c2018-04-24 12:05:40 -07001# Copyright 2018 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5"""Definition of a CrOS suite in skylab.
6
7This file is a simplicication of dynamic_suite.suite without any useless
8features for skylab suite.
9
10Suite class in this file mainly has 2 features:
11 1. Integrate parameters from control file & passed in arguments.
12 2. Find proper child tests for a given suite.
13
14Use case:
15 See _run_suite() in skylab_suite.run_suite_skylab.
16"""
17
18from __future__ import absolute_import
19from __future__ import division
20from __future__ import print_function
21
Xixuan Wu7cc10e52018-04-25 17:04:51 -070022import collections
Xixuan Wu2406be32018-05-14 13:51:30 -070023import logging
Xixuan Wu7cc10e52018-04-25 17:04:51 -070024
Xixuan Wu48c45b92018-04-26 11:09:35 -070025from lucifer import autotest
Xixuan Wu2406be32018-05-14 13:51:30 -070026from skylab_suite import swarming_lib
Xixuan Wu48c45b92018-04-26 11:09:35 -070027
Xixuan Wu7cc10e52018-04-25 17:04:51 -070028
29SuiteSpecs = collections.namedtuple(
30 'SuiteSpecs',
31 [
32 'builds',
Xixuan Wu6c041332018-05-07 16:04:36 -070033 'suite_name',
Xixuan Wue3e362f2018-04-26 16:34:28 -070034 'suite_file_name',
Xixuan Wu7cc10e52018-04-25 17:04:51 -070035 'test_source_build',
Xixuan Wu2406be32018-05-14 13:51:30 -070036 'suite_args',
Xixuan Wu70217a92018-06-04 16:43:42 -070037 'priority',
Xixuan Wu77d4a592018-06-08 10:40:57 -070038 'board',
39 'pool',
Xixuan Wu56424bc2018-05-15 11:03:27 -070040 ])
41
Xixuan Wu9af95a22018-05-18 10:46:42 -070042SuiteHandlerSpecs = collections.namedtuple(
43 'SuiteHandlerSpecs',
Xixuan Wu56424bc2018-05-15 11:03:27 -070044 [
Xixuan Wu2406be32018-05-14 13:51:30 -070045 'timeout_mins',
Xixuan Wu56424bc2018-05-15 11:03:27 -070046 'test_retry',
47 'max_retries',
48 'provision_num_required',
Xixuan Wu7cc10e52018-04-25 17:04:51 -070049 ])
50
Xixuan Wu5cb5a402018-06-04 16:37:23 -070051TestHandlerSpecs= collections.namedtuple(
52 'TestHandlerSpecs',
53 [
54 'test_specs',
55 'remaining_retries',
56 'previous_retried_ids',
57 ])
58
Xixuan Wu9af95a22018-05-18 10:46:42 -070059TestSpecs= collections.namedtuple(
60 'TestSpecs',
61 [
62 'test',
Xixuan Wu70217a92018-06-04 16:43:42 -070063 'priority',
Xixuan Wu77d4a592018-06-08 10:40:57 -070064 'board',
65 'pool',
Xixuan Wu5cb5a402018-06-04 16:37:23 -070066 'build',
Xixuan Wu0c01b092018-06-13 14:12:55 -070067 'bot_id',
Xixuan Wu5cb5a402018-06-04 16:37:23 -070068 'expiration_secs',
69 'grace_period_secs',
70 'execution_timeout_secs',
71 'io_timeout_secs',
Xixuan Wu9af95a22018-05-18 10:46:42 -070072 ])
73
Xixuan Wu7cc10e52018-04-25 17:04:51 -070074
Xixuan Wue3e362f2018-04-26 16:34:28 -070075class NonValidPropertyError(Exception):
76 """Raised if a suite's property is not valid."""
77
78
Xixuan Wu9af95a22018-05-18 10:46:42 -070079class SuiteHandler(object):
80 """The class for handling a CrOS suite run.
81
82 Its responsibility includes handling retries for child tests.
83 """
Xixuan Wu2406be32018-05-14 13:51:30 -070084
Xixuan Wu56424bc2018-05-15 11:03:27 -070085 def __init__(self, specs):
Xixuan Wu9af95a22018-05-18 10:46:42 -070086 self._wait = True
87 self._timeout_mins = specs.timeout_mins
88 self._provision_num_required = specs.provision_num_required
89 self._test_retry = specs.test_retry
90 self._max_retries = specs.max_retries
Xixuan Wu56424bc2018-05-15 11:03:27 -070091
Xixuan Wu9af95a22018-05-18 10:46:42 -070092 self._suite_id = None
93 self._task_to_test_maps = {}
Xixuan Wu415e8212018-06-04 17:01:12 -070094 self.successfully_provisioned_duts = set()
Xixuan Wu9af95a22018-05-18 10:46:42 -070095
96 # It only maintains the swarming task of the final run of each
97 # child task, i.e. it doesn't include failed swarming tasks of
98 # each child task which will get retried later.
Xixuan Wu2406be32018-05-14 13:51:30 -070099 self._active_child_tasks = []
100
Xixuan Wu9af95a22018-05-18 10:46:42 -0700101 def should_wait(self):
102 """Return whether to wait for a suite's result."""
103 return self._wait
104
Xixuan Wu415e8212018-06-04 17:01:12 -0700105 def is_provision(self):
106 """Return whether the suite handler is for provision suite."""
107 return self._provision_num_required > 0
108
Xixuan Wu9af95a22018-05-18 10:46:42 -0700109 def set_suite_id(self, suite_id):
110 """Set swarming task id for a suite.
111
112 @param suite_id: The swarming task id of this suite.
113 """
114 self._suite_id = suite_id
115
Xixuan Wu5cb5a402018-06-04 16:37:23 -0700116 def add_test_by_task_id(self, task_id, test_handler_specs):
Xixuan Wu9af95a22018-05-18 10:46:42 -0700117 """Record a child test and its swarming task id.
118
119 @param task_id: the swarming task id of a child test.
Xixuan Wu5cb5a402018-06-04 16:37:23 -0700120 @param test_handler_specs: a TestHandlerSpecs object.
Xixuan Wu9af95a22018-05-18 10:46:42 -0700121 """
Xixuan Wu5cb5a402018-06-04 16:37:23 -0700122 self._task_to_test_maps[task_id] = test_handler_specs
Xixuan Wu9af95a22018-05-18 10:46:42 -0700123
124 def get_test_by_task_id(self, task_id):
125 """Get a child test by its swarming task id.
126
127 @param task_id: the swarming task id of a child test.
128 """
129 return self._task_to_test_maps[task_id]
130
131 def remove_test_by_task_id(self, task_id):
132 """Delete a child test by its swarming task id.
133
134 @param task_id: the swarming task id of a child test.
135 """
136 self._task_to_test_maps.pop(task_id, None)
137
138 def set_max_retries(self, max_retries):
139 """Set the max retries for a suite.
140
141 @param max_retries: The current maximum retries to set.
142 """
143 self._max_retries = max_retries
144
145 @property
146 def timeout_mins(self):
147 """Get the timeout minutes of a suite."""
148 return self._timeout_mins
149
150 @property
151 def suite_id(self):
152 """Get the swarming task id of a suite."""
153 return self._suite_id
154
155 @property
156 def max_retries(self):
157 """Get the max num of retries of a suite."""
158 return self._max_retries
159
160 @property
161 def active_child_tasks(self):
162 """Get the child tasks which is actively monitored by a suite.
163
164 The active child tasks list includes tasks which are currently running
165 or finished without following retries. E.g.
166 Suite task X:
167 child task 1: x1 (first try x1_1, second try x1_2)
168 child task 2: x2 (first try: x2_1)
169 The final active child task list will include task x1_2 and x2_1, won't
170 include x1_1 since it's a task which is finished but get retried later.
171 """
172 return self._active_child_tasks
Xixuan Wu2406be32018-05-14 13:51:30 -0700173
174 def handle_results(self, all_tasks):
175 """Handle child tasks' results."""
Xixuan Wu56424bc2018-05-15 11:03:27 -0700176 self._active_child_tasks = [t for t in all_tasks if t['task_id'] in
Xixuan Wu9af95a22018-05-18 10:46:42 -0700177 self._task_to_test_maps]
Xixuan Wu56424bc2018-05-15 11:03:27 -0700178 self.retried_tasks = [t for t in all_tasks if self._should_retry(t)]
179 logging.info('Found %d tests to be retried.', len(self.retried_tasks))
Xixuan Wu2406be32018-05-14 13:51:30 -0700180
Xixuan Wu415e8212018-06-04 17:01:12 -0700181 def _check_all_tasks_finished(self):
182 """Check whether all tasks are finished, including retried tasks."""
Xixuan Wu56424bc2018-05-15 11:03:27 -0700183 finished_tasks = [t for t in self._active_child_tasks if
184 t['state'] in swarming_lib.TASK_FINISHED_STATUS]
185 logging.info('%d/%d child tasks finished, %d got retried.',
186 len(finished_tasks), len(self._active_child_tasks),
187 len(self.retried_tasks))
188 return (len(finished_tasks) == len(self._active_child_tasks)
189 and not self.retried_tasks)
190
Xixuan Wu415e8212018-06-04 17:01:12 -0700191 def _set_successful_provisioned_duts(self):
192 """Set successfully provisioned duts."""
193 for t in self._active_child_tasks:
194 if (swarming_lib.get_task_final_state(t) ==
195 swarming_lib.TASK_COMPLETED_SUCCESS):
196 dut_name = swarming_lib.get_task_dut_name(t)
197 if dut_name is not None:
198 self.successfully_provisioned_duts.add(dut_name)
199
200 def is_provision_successfully_finished(self):
201 """Check whether provision succeeds."""
202 logging.info('Found %d successfully provisioned duts, '
203 'the minimum requirement is %d',
204 len(self.successfully_provisioned_duts),
205 self._provision_num_required)
206 return (len(self.successfully_provisioned_duts) >=
207 self._provision_num_required)
208
209 def is_finished_waiting(self):
210 """Check whether the suite should finish its waiting."""
211 if self.is_provision():
212 self._set_successful_provisioned_duts()
213 return (self.is_provision_successfully_finished() or
214 self._check_all_tasks_finished())
215
216 return self._check_all_tasks_finished()
217
Xixuan Wu56424bc2018-05-15 11:03:27 -0700218 def _should_retry(self, test_result):
219 """Check whether a test should be retried.
220
221 We will retry a test if:
222 1. The test-level retry is enabled for this suite.
223 2. The test fails.
224 3. The test is currently monitored by the suite, i.e.
225 it's not a previous retried test.
226 4. The test has remaining retries based on JOB_RETRIES in
227 its control file.
228 5. The suite-level max retries isn't hit.
229
230 @param test_result: A json test result from swarming API.
231
232 @return True if we should retry the test.
233 """
234 task_id = test_result['task_id']
235 state = test_result['state']
236 is_failure = test_result['failure']
Xixuan Wu9af95a22018-05-18 10:46:42 -0700237 return (self._test_retry and
Xixuan Wu56424bc2018-05-15 11:03:27 -0700238 ((state == swarming_lib.TASK_COMPLETED and is_failure)
239 or (state in swarming_lib.TASK_FAILED_STATUS))
Xixuan Wu9af95a22018-05-18 10:46:42 -0700240 and (task_id in self._task_to_test_maps)
241 and (self._task_to_test_maps[task_id].remaining_retries > 0)
242 and (self._max_retries > 0))
Xixuan Wu2406be32018-05-14 13:51:30 -0700243
244
Xixuan Wuc7bf77c2018-04-24 12:05:40 -0700245class Suite(object):
246 """The class for a CrOS suite."""
247
Xixuan Wu7cc10e52018-04-25 17:04:51 -0700248 def __init__(self, specs):
249 """Initialize a suite.
250
251 @param specs: A SuiteSpecs object.
252 """
Xixuan Wue3e362f2018-04-26 16:34:28 -0700253 self._ds = None
254
255 self.control_file = ''
Xixuan Wu5cb5a402018-06-04 16:37:23 -0700256 self.tests_specs = []
Xixuan Wu7cc10e52018-04-25 17:04:51 -0700257 self.builds = specs.builds
258 self.test_source_build = specs.test_source_build
Xixuan Wu6c041332018-05-07 16:04:36 -0700259 self.suite_name = specs.suite_name
Xixuan Wue3e362f2018-04-26 16:34:28 -0700260 self.suite_file_name = specs.suite_file_name
Xixuan Wu70217a92018-06-04 16:43:42 -0700261 self.priority = specs.priority
Xixuan Wu77d4a592018-06-08 10:40:57 -0700262 self.board = specs.board
263 self.pool = specs.pool
Xixuan Wu48c45b92018-04-26 11:09:35 -0700264
Xixuan Wue3e362f2018-04-26 16:34:28 -0700265 @property
266 def ds(self):
267 """Getter for private |self._ds| property.
268
269 This ensures that once self.ds is called, there's a devserver ready
270 for it.
271 """
272 if self._ds is None:
273 raise NonValidPropertyError(
274 'Property self.ds is None. Please call stage_suite_artifacts() '
275 'before calling it.')
276
277 return self._ds
278
Xixuan Wue3e362f2018-04-26 16:34:28 -0700279 def prepare(self):
280 """Prepare a suite job for execution."""
281 self._stage_suite_artifacts()
282 self._parse_suite_args()
Xixuan Wu0c01b092018-06-13 14:12:55 -0700283 available_bots = self._get_available_bots()
284 tests = self._find_tests(available_bots_num=len(available_bots))
285 self.tests_specs = self._get_test_specs(tests, available_bots)
Xixuan Wue3e362f2018-04-26 16:34:28 -0700286
Xixuan Wu0c01b092018-06-13 14:12:55 -0700287 def _get_test_specs(self, tests, available_bots):
288 tests_specs = []
289 for idx, test in enumerate(tests):
290 bot_id = (available_bots[idx]['bot_id'] if idx < len(available_bots)
291 else '')
292 tests_specs.append(TestSpecs(
Xixuan Wu5cb5a402018-06-04 16:37:23 -0700293 test=test,
Xixuan Wu70217a92018-06-04 16:43:42 -0700294 priority=self.priority,
Xixuan Wu77d4a592018-06-08 10:40:57 -0700295 board=self.board,
296 pool=self.pool,
Xixuan Wu5cb5a402018-06-04 16:37:23 -0700297 build=self.test_source_build,
Xixuan Wu0c01b092018-06-13 14:12:55 -0700298 bot_id=bot_id,
Xixuan Wu5cb5a402018-06-04 16:37:23 -0700299 expiration_secs=swarming_lib.DEFAULT_EXPIRATION_SECS,
300 grace_period_secs=swarming_lib.DEFAULT_TIMEOUT_SECS,
301 execution_timeout_secs=swarming_lib.DEFAULT_TIMEOUT_SECS,
302 io_timeout_secs=swarming_lib.DEFAULT_TIMEOUT_SECS))
Xixuan Wue3e362f2018-04-26 16:34:28 -0700303
Xixuan Wu0c01b092018-06-13 14:12:55 -0700304 return tests_specs
305
Xixuan Wue3e362f2018-04-26 16:34:28 -0700306 def _stage_suite_artifacts(self):
Xixuan Wu48c45b92018-04-26 11:09:35 -0700307 """Stage suite control files and suite-to-tests mapping file.
308
309 @param build: The build to stage artifacts.
310 """
311 suite_common = autotest.load('server.cros.dynamic_suite.suite_common')
312 ds, _ = suite_common.stage_build_artifacts(self.test_source_build)
Xixuan Wue3e362f2018-04-26 16:34:28 -0700313 self._ds = ds
Xixuan Wu48c45b92018-04-26 11:09:35 -0700314
Xixuan Wue3e362f2018-04-26 16:34:28 -0700315 def _parse_suite_args(self):
Xixuan Wu48c45b92018-04-26 11:09:35 -0700316 """Get the suite args.
317
318 The suite args includes:
319 a. suite args in suite control file.
320 b. passed-in suite args by user.
321 """
Xixuan Wue3e362f2018-04-26 16:34:28 -0700322 suite_common = autotest.load('server.cros.dynamic_suite.suite_common')
323 self.control_file = suite_common.get_control_file_by_build(
324 self.test_source_build, self.ds, self.suite_file_name)
Xixuan Wu6c041332018-05-07 16:04:36 -0700325
Xixuan Wu0c01b092018-06-13 14:12:55 -0700326 def _find_tests(self, available_bots_num=0):
Xixuan Wu6c041332018-05-07 16:04:36 -0700327 """Fetch the child tests."""
328 control_file_getter = autotest.load(
329 'server.cros.dynamic_suite.control_file_getter')
330 suite_common = autotest.load('server.cros.dynamic_suite.suite_common')
Xixuan Wu6c041332018-05-07 16:04:36 -0700331 cf_getter = control_file_getter.DevServerGetter(
332 self.test_source_build, self.ds)
333 tests = suite_common.retrieve_for_suite(
334 cf_getter, self.suite_name)
Xixuan Wu5cb5a402018-06-04 16:37:23 -0700335 return suite_common.filter_tests(tests)
Xixuan Wu2406be32018-05-14 13:51:30 -0700336
Xixuan Wu0c01b092018-06-13 14:12:55 -0700337 def _get_available_bots(self):
338 """Get available bots for normal suites."""
339 return []
340
Xixuan Wu2406be32018-05-14 13:51:30 -0700341
342class ProvisionSuite(Suite):
343 """The class for a CrOS provision suite."""
344
345 def __init__(self, specs):
346 super(ProvisionSuite, self).__init__(specs)
347 self._num_required = specs.suite_args['num_required']
Xixuan Wu2406be32018-05-14 13:51:30 -0700348
Xixuan Wu0c01b092018-06-13 14:12:55 -0700349 def _find_tests(self, available_bots_num=0):
Xixuan Wu2406be32018-05-14 13:51:30 -0700350 """Fetch the child tests for provision suite."""
351 control_file_getter = autotest.load(
352 'server.cros.dynamic_suite.control_file_getter')
353 suite_common = autotest.load('server.cros.dynamic_suite.suite_common')
Xixuan Wu2406be32018-05-14 13:51:30 -0700354 cf_getter = control_file_getter.DevServerGetter(
355 self.test_source_build, self.ds)
356 dummy_test = suite_common.retrieve_control_data_for_test(
357 cf_getter, 'dummy_Pass')
Xixuan Wu0c01b092018-06-13 14:12:55 -0700358 logging.info('Get %d available DUTs for provision.', available_bots_num)
359 return [dummy_test] * max(self._num_required, available_bots_num)
Xixuan Wucb469512018-06-08 15:17:23 -0700360
Xixuan Wu0c01b092018-06-13 14:12:55 -0700361 def _get_available_bots(self):
362 """Get available bots for provision suites."""
363 bots = swarming_lib.query_bots_list({
Xixuan Wucb469512018-06-08 15:17:23 -0700364 'pool': swarming_lib.SKYLAB_DRONE_POOL,
365 'label-pool': swarming_lib.SWARMING_DUT_POOL_MAP.get(self.pool),
366 'label-board': self.board})
Xixuan Wu0c01b092018-06-13 14:12:55 -0700367 return [bot for bot in bots if swarming_lib.bot_available(bot)]