blob: fbca31578e0daa6ee8bcd196fb265e9c51638150 [file] [log] [blame]
Chris Masone44e4d6c2012-08-15 14:25:53 -07001# Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
Allen Liee36ab82017-07-07 15:46:40 -07005import abc
Fang Deng443f1952015-01-02 14:51:49 -08006import datetime
7import difflib
Allen Li98a26a42017-02-28 18:43:24 -08008import functools
Fang Deng443f1952015-01-02 14:51:49 -08009import hashlib
10import logging
11import operator
12import os
13import re
Fang Deng443f1952015-01-02 14:51:49 -080014import sys
Allen Li98a26a42017-02-28 18:43:24 -080015import warnings
Chris Masone44e4d6c2012-08-15 14:25:53 -070016
17import common
18
J. Richard Barnetteb592fbc2014-04-02 10:27:33 -070019from autotest_lib.frontend.afe.json_rpc import proxy
Fang Denge3bc24b2014-03-17 15:19:46 -070020from autotest_lib.client.common_lib import enum
Dan Shidfea3682014-08-10 23:38:40 -070021from autotest_lib.client.common_lib import error
Simran Basi5ace6f22016-01-06 17:30:44 -080022from autotest_lib.client.common_lib import global_config
Alex Miller7d658cf2013-09-04 16:00:35 -070023from autotest_lib.client.common_lib import priorities
Dan Shidfea3682014-08-10 23:38:40 -070024from autotest_lib.client.common_lib import time_utils
25from autotest_lib.client.common_lib import utils
Xixuan Wueb1acc42017-11-22 15:46:03 -080026from autotest_lib.frontend.afe import model_attributes
Fang Denge3bc24b2014-03-17 15:19:46 -070027from autotest_lib.frontend.afe.json_rpc import proxy
Dan Shi36cfd832014-10-10 13:38:51 -070028from autotest_lib.server.cros import provision
Chris Masone44e4d6c2012-08-15 14:25:53 -070029from autotest_lib.server.cros.dynamic_suite import constants
30from autotest_lib.server.cros.dynamic_suite import control_file_getter
31from autotest_lib.server.cros.dynamic_suite import frontend_wrappers
Alex Miller3a69adc2012-12-19 13:38:31 -080032from autotest_lib.server.cros.dynamic_suite import job_status
Xixuan Wu7cc10e52018-04-25 17:04:51 -070033from autotest_lib.server.cros.dynamic_suite import suite_common
J. Richard Barnettee7b98bb2013-08-21 16:34:16 -070034from autotest_lib.server.cros.dynamic_suite import tools
35from autotest_lib.server.cros.dynamic_suite.job_status import Status
Chris Masone44e4d6c2012-08-15 14:25:53 -070036
Shuqian Zhaoab468812015-04-08 14:40:38 -070037try:
38 from chromite.lib import boolparse_lib
39 from chromite.lib import cros_logging as logging
40except ImportError:
41 print 'Unable to import chromite.'
42 print 'This script must be either:'
43 print ' - Be run in the chroot.'
44 print ' - (not yet supported) be run after running '
45 print ' ../utils/build_externals.py'
Fang Denge3bc24b2014-03-17 15:19:46 -070046
Shuqian Zhao490f78f2016-01-20 13:18:40 -080047_FILE_BUG_SUITES = ['au', 'bvt', 'bvt-cq', 'bvt-inline', 'paygen_au_beta',
48 'paygen_au_canary', 'paygen_au_dev', 'paygen_au_stable',
49 'sanity', 'push_to_prod']
Simran Basi5ace6f22016-01-06 17:30:44 -080050_AUTOTEST_DIR = global_config.global_config.get_config_value(
51 'SCHEDULER', 'drone_installation_directory')
Xixuan Wu92249042018-04-30 17:17:10 -070052
Shuqian Zhaoe33ba4a2015-09-11 18:51:43 -070053
Fang Denge3bc24b2014-03-17 15:19:46 -070054class RetryHandler(object):
55 """Maintain retry information.
56
57 @var _retry_map: A dictionary that stores retry history.
58 The key is afe job id. The value is a dictionary.
59 {job_id: {'state':RetryHandler.States, 'retry_max':int}}
60 - state:
61 The retry state of a job.
62 NOT_ATTEMPTED:
63 We haven't done anything about the job.
64 ATTEMPTED:
65 We've made an attempt to schedule a retry job. The
66 scheduling may or may not be successful, e.g.
67 it might encounter an rpc error. Note failure
68 in scheduling a retry is different from a retry job failure.
69 For each job, we only attempt to schedule a retry once.
70 For example, assume we have a test with JOB_RETRIES=5 and
71 its second retry job failed. When we attempt to create
72 a third retry job to retry the second, we hit an rpc
73 error. In such case, we will give up on all following
74 retries.
75 RETRIED:
76 A retry job has already been successfully
77 scheduled.
78 - retry_max:
79 The maximum of times the job can still
80 be retried, taking into account retries
81 that have occurred.
82 @var _retry_level: A retry might be triggered only if the result
83 is worse than the level.
Fang Deng443f1952015-01-02 14:51:49 -080084 @var _max_retries: Maximum retry limit at suite level.
85 Regardless how many times each individual test
86 has been retried, the total number of retries happening in
87 the suite can't exceed _max_retries.
Fang Denge3bc24b2014-03-17 15:19:46 -070088 """
89
90 States = enum.Enum('NOT_ATTEMPTED', 'ATTEMPTED', 'RETRIED',
91 start_value=1, step=1)
92
Fang Deng443f1952015-01-02 14:51:49 -080093 def __init__(self, initial_jobs_to_tests, retry_level='WARN',
94 max_retries=None):
Fang Denge3bc24b2014-03-17 15:19:46 -070095 """Initialize RetryHandler.
96
97 @param initial_jobs_to_tests: A dictionary that maps a job id to
98 a ControlData object. This dictionary should contain
99 jobs that are originally scheduled by the suite.
100 @param retry_level: A retry might be triggered only if the result is
101 worse than the level.
Fang Deng443f1952015-01-02 14:51:49 -0800102 @param max_retries: Integer, maxmium total retries allowed
103 for the suite. Default to None, no max.
Fang Denge3bc24b2014-03-17 15:19:46 -0700104 """
105 self._retry_map = {}
106 self._retry_level = retry_level
Fang Deng443f1952015-01-02 14:51:49 -0800107 self._max_retries = (max_retries
108 if max_retries is not None else sys.maxint)
Fang Denge3bc24b2014-03-17 15:19:46 -0700109 for job_id, test in initial_jobs_to_tests.items():
110 if test.job_retries > 0:
Allen Lifb89e2b2017-01-03 12:47:58 -0800111 self._add_job(new_job_id=job_id,
112 retry_max=test.job_retries)
Jacob Kopczynski2cefa1f2018-01-10 17:25:38 -0800113 else:
114 logging.debug("Test %s has no retries", test.name)
Fang Denge3bc24b2014-03-17 15:19:46 -0700115
116
Allen Lifb89e2b2017-01-03 12:47:58 -0800117 def _add_job(self, new_job_id, retry_max):
Fang Denge3bc24b2014-03-17 15:19:46 -0700118 """Add a newly-created job to the retry map.
119
120 @param new_job_id: The afe_job_id of a newly created job.
121 @param retry_max: The maximum of times that we could retry
122 the test if the job fails.
123
124 @raises ValueError if new_job_id is already in retry map.
125
126 """
127 if new_job_id in self._retry_map:
128 raise ValueError('add_job called when job is already in retry map.')
129
130 self._retry_map[new_job_id] = {
131 'state': self.States.NOT_ATTEMPTED,
132 'retry_max': retry_max}
133
134
Allen Li0cd19262017-01-03 12:56:08 -0800135 def _suite_max_reached(self):
Fang Deng443f1952015-01-02 14:51:49 -0800136 """Return whether maximum retry limit for a suite has been reached."""
Fang Denge4326d62015-01-06 13:15:15 -0800137 return self._max_retries <= 0
Fang Deng443f1952015-01-02 14:51:49 -0800138
139
Fang Denge3bc24b2014-03-17 15:19:46 -0700140 def add_retry(self, old_job_id, new_job_id):
141 """Record a retry.
142
143 Update retry map with the retry information.
144
145 @param old_job_id: The afe_job_id of the job that is retried.
146 @param new_job_id: The afe_job_id of the retry job.
147
148 @raises KeyError if old_job_id isn't in the retry map.
149 @raises ValueError if we have already retried or made an attempt
150 to retry the old job.
151
152 """
153 old_record = self._retry_map[old_job_id]
154 if old_record['state'] != self.States.NOT_ATTEMPTED:
155 raise ValueError(
156 'We have already retried or attempted to retry job %d' %
157 old_job_id)
158 old_record['state'] = self.States.RETRIED
Allen Lifb89e2b2017-01-03 12:47:58 -0800159 self._add_job(new_job_id=new_job_id,
160 retry_max=old_record['retry_max'] - 1)
Fang Deng443f1952015-01-02 14:51:49 -0800161 self._max_retries -= 1
Fang Denge3bc24b2014-03-17 15:19:46 -0700162
163
164 def set_attempted(self, job_id):
165 """Set the state of the job to ATTEMPTED.
166
167 @param job_id: afe_job_id of a job.
168
169 @raises KeyError if job_id isn't in the retry map.
170 @raises ValueError if the current state is not NOT_ATTEMPTED.
171
172 """
173 current_state = self._retry_map[job_id]['state']
174 if current_state != self.States.NOT_ATTEMPTED:
175 # We are supposed to retry or attempt to retry each job
176 # only once. Raise an error if this is not the case.
177 raise ValueError('Unexpected state transition: %s -> %s' %
178 (self.States.get_string(current_state),
179 self.States.get_string(self.States.ATTEMPTED)))
180 else:
181 self._retry_map[job_id]['state'] = self.States.ATTEMPTED
182
183
184 def has_following_retry(self, result):
185 """Check whether there will be a following retry.
186
187 We have the following cases for a given job id (result.id),
188 - no retry map entry -> retry not required, no following retry
189 - has retry map entry:
190 - already retried -> has following retry
191 - has not retried
192 (this branch can be handled by checking should_retry(result))
193 - retry_max == 0 --> the last retry job, no more retry
194 - retry_max > 0
195 - attempted, but has failed in scheduling a
196 following retry due to rpc error --> no more retry
197 - has not attempped --> has following retry if test failed.
198
199 @param result: A result, encapsulating the status of the job.
200
201 @returns: True, if there will be a following retry.
202 False otherwise.
203
204 """
Allen Li2ee2a262017-01-03 13:21:10 -0800205 return (result.test_executed
206 and result.id in self._retry_map
207 and (self._retry_map[result.id]['state'] == self.States.RETRIED
208 or self._should_retry(result)))
Allen Li5cb00652017-01-03 13:06:30 -0800209
210
211 def _should_retry(self, result):
212 """Check whether we should retry a job based on its result.
213
214 We will retry the job that corresponds to the result
215 when all of the following are true.
216 a) The test was actually executed, meaning that if
217 a job was aborted before it could ever reach the state
218 of 'Running', the job will not be retried.
219 b) The result is worse than |self._retry_level| which
220 defaults to 'WARN'.
221 c) The test requires retry, i.e. the job has an entry in the retry map.
222 d) We haven't made any retry attempt yet, i.e. state == NOT_ATTEMPTED
223 Note that if a test has JOB_RETRIES=5, and the second time
224 it was retried it hit an rpc error, we will give up on
225 all following retries.
226 e) The job has not reached its retry max, i.e. retry_max > 0
227
228 @param result: A result, encapsulating the status of the job.
229
230 @returns: True if we should retry the job.
231
232 """
233 return (
xixuanbf854f82017-04-20 10:40:15 -0700234 result.test_executed
235 and result.id in self._retry_map
236 and not self._suite_max_reached()
Allen Li5cb00652017-01-03 13:06:30 -0800237 and result.is_worse_than(
238 job_status.Status(self._retry_level, '', 'reason'))
Allen Li5cb00652017-01-03 13:06:30 -0800239 and self._retry_map[result.id]['state'] == self.States.NOT_ATTEMPTED
240 and self._retry_map[result.id]['retry_max'] > 0
241 )
Fang Denge3bc24b2014-03-17 15:19:46 -0700242
Jacob Kopczynski2cefa1f2018-01-10 17:25:38 -0800243 def _should_retry_local_job(self, job_id):
244 """Check whether we should retry a job based on information available
245 for a local job without a Result object.
246
247 We will retry the job that corresponds to the result
248 when all of the following are true.
249 a) The test requires retry, i.e. the job has an entry in the retry map.
250 b) We haven't made any retry attempt yet for this job, i.e.
251 state == NOT_ATTEMPTED
252 If the job is aborted, we will give up on all following retries,
253 regardless of max_retries.
254 c) The job has not reached its retry max, i.e. retry_max > 0
255
256 @param job_id: the id for the job, to look up relevant information.
257
258 @returns: True if we should retry the job.
259
260 """
261 if self._suite_max_reached():
262 logging.debug('suite max_retries reached, not retrying.')
263 return False
264 if job_id not in self._retry_map:
265 logging.debug('job_id not in retry map, not retrying.')
266 return False
267 if self._retry_map[job_id]['state'] != self.States.NOT_ATTEMPTED:
268 logging.debug("job state was %s not 'Not Attempted', not retrying",
269 self._retry_map[job_id]['state'])
270 return False
271 if self._retry_map[job_id]['retry_max'] <= 0:
272 logging.debug('test-level retries exhausted, not retrying')
273 return False
274 return True
275
276
277 def job_present(self, job_id):
278 """Check whether a job id present in the retry map.
279
280 @param job_id: afe_job_id of a job.
281
282 @returns: A True if the job is present, False if not.
283 """
284 return bool(self._retry_map.get(job_id))
285
286
Fang Denge3bc24b2014-03-17 15:19:46 -0700287
288 def get_retry_max(self, job_id):
289 """Get the maximum times the job can still be retried.
290
291 @param job_id: afe_job_id of a job.
292
293 @returns: An int, representing the maximum times the job can still be
294 retried.
295 @raises KeyError if job_id isn't in the retry map.
296
297 """
298 return self._retry_map[job_id]['retry_max']
299
300
Allen Lida198fd2017-03-29 17:22:13 -0700301class _SuiteChildJobCreator(object):
302 """Create test jobs for a suite."""
303
Allen Li010c0412017-03-29 17:31:35 -0700304 def __init__(
305 self,
Allen Li55de3402017-03-29 17:48:46 -0700306 tag,
Allen Li27f72a22017-03-29 17:37:43 -0700307 builds,
Allen Li010c0412017-03-29 17:31:35 -0700308 board,
Allen Li388b7a12017-03-29 17:58:23 -0700309 afe=None,
Allen Li388b7a12017-03-29 17:58:23 -0700310 max_runtime_mins=24*60,
311 timeout_mins=24*60,
Allen Li55de3402017-03-29 17:48:46 -0700312 suite_job_id=None,
Allen Li010c0412017-03-29 17:31:35 -0700313 ignore_deps=False,
Allen Li37e1a292017-02-28 18:28:41 -0800314 extra_deps=(),
Allen Li388b7a12017-03-29 17:58:23 -0700315 priority=priorities.Priority.DEFAULT,
Allen Li55de3402017-03-29 17:48:46 -0700316 offload_failures_only=False,
Shuhei Takahashif8659c62017-06-14 20:02:26 +0900317 test_source_build=None,
Aviv Keshetd5a83f72017-10-30 12:53:01 -0700318 job_keyvals=None,
319 ):
Allen Li010c0412017-03-29 17:31:35 -0700320 """
321 Constructor
322
Allen Li55de3402017-03-29 17:48:46 -0700323 @param tag: a string with which to tag jobs run in this suite.
Allen Li27f72a22017-03-29 17:37:43 -0700324 @param builds: the builds on which we're running this suite.
Allen Li010c0412017-03-29 17:31:35 -0700325 @param board: the board on which we're running this suite.
Allen Li388b7a12017-03-29 17:58:23 -0700326 @param afe: an instance of AFE as defined in server/frontend.py.
Allen Li388b7a12017-03-29 17:58:23 -0700327 @param max_runtime_mins: Maximum suite runtime, in minutes.
328 @param timeout_mins: Maximum job lifetime, in minutes.
Allen Li55de3402017-03-29 17:48:46 -0700329 @param suite_job_id: Job id that will act as parent id to all sub jobs.
330 Default: None
Allen Li010c0412017-03-29 17:31:35 -0700331 @param ignore_deps: True if jobs should ignore the DEPENDENCIES
332 attribute and skip applying of dependency labels.
333 (Default:False)
334 @param extra_deps: A list of strings which are the extra DEPENDENCIES
335 to add to each test being scheduled.
Allen Li388b7a12017-03-29 17:58:23 -0700336 @param priority: Integer priority level. Higher is more important.
Allen Li55de3402017-03-29 17:48:46 -0700337 @param offload_failures_only: Only enable gs_offloading for failed
338 jobs.
339 @param test_source_build: Build that contains the server-side test code.
Shuhei Takahashif8659c62017-06-14 20:02:26 +0900340 @param job_keyvals: General job keyvals to be inserted into keyval file,
341 which will be used by tko/parse later.
Allen Li010c0412017-03-29 17:31:35 -0700342 """
Allen Li55de3402017-03-29 17:48:46 -0700343 self._tag = tag
Allen Li27f72a22017-03-29 17:37:43 -0700344 self._builds = builds
Allen Li010c0412017-03-29 17:31:35 -0700345 self._board = board
Allen Li388b7a12017-03-29 17:58:23 -0700346 self._afe = afe or frontend_wrappers.RetryingAFE(timeout_min=30,
347 delay_sec=10,
348 debug=False)
Allen Li388b7a12017-03-29 17:58:23 -0700349 self._max_runtime_mins = max_runtime_mins
350 self._timeout_mins = timeout_mins
Allen Li55de3402017-03-29 17:48:46 -0700351 self._suite_job_id = suite_job_id
Allen Li010c0412017-03-29 17:31:35 -0700352 self._ignore_deps = ignore_deps
Allen Li37e1a292017-02-28 18:28:41 -0800353 self._extra_deps = tuple(extra_deps)
Allen Li388b7a12017-03-29 17:58:23 -0700354 self._priority = priority
Allen Li55de3402017-03-29 17:48:46 -0700355 self._offload_failures_only = offload_failures_only
356 self._test_source_build = test_source_build
Shuhei Takahashif8659c62017-06-14 20:02:26 +0900357 self._job_keyvals = job_keyvals
Allen Li010c0412017-03-29 17:31:35 -0700358
359
Allen Li27f72a22017-03-29 17:37:43 -0700360 @property
361 def cros_build(self):
362 """Return the CrOS build or the first build in the builds dict."""
363 # TODO(ayatane): Note that the builds dict isn't ordered. I'm not
364 # sure what the implications of this are, but it's probably not a
365 # good thing.
366 return self._builds.get(provision.CROS_VERSION_PREFIX,
367 self._builds.values()[0])
368
369
Allen Li388b7a12017-03-29 17:58:23 -0700370 def create_job(self, test, retry_for=None):
371 """
372 Thin wrapper around frontend.AFE.create_job().
373
374 @param test: ControlData object for a test to run.
375 @param retry_for: If the to-be-created job is a retry for an
376 old job, the afe_job_id of the old job will
377 be passed in as |retry_for|, which will be
378 recorded in the new job's keyvals.
379 @returns: A frontend.Job object with an added test_name member.
380 test_name is used to preserve the higher level TEST_NAME
381 name of the job.
382 """
Keith Haddow782e2a82017-09-26 15:44:51 -0700383 # For a system running multiple suites which share tests, the priority
384 # overridden may lead to unexpected scheduling order that adds extra
385 # provision jobs.
386 test_priority = self._priority
387 if utils.is_moblab():
388 test_priority = max(self._priority, test.priority)
389
Xixuan Wueb1acc42017-11-22 15:46:03 -0800390 reboot_before = (model_attributes.RebootBefore.NEVER if test.fast
391 else None)
392
Allen Li388b7a12017-03-29 17:58:23 -0700393 test_obj = self._afe.create_job(
394 control_file=test.text,
395 name=tools.create_job_name(
396 self._test_source_build or self.cros_build,
397 self._tag,
398 test.name),
399 control_type=test.test_type.capitalize(),
400 meta_hosts=[self._board]*test.sync_count,
401 dependencies=self._create_job_deps(test),
402 keyvals=self._create_keyvals_for_test_job(test, retry_for),
403 max_runtime_mins=self._max_runtime_mins,
404 timeout_mins=self._timeout_mins,
405 parent_job_id=self._suite_job_id,
Xixuan Wueb1acc42017-11-22 15:46:03 -0800406 reboot_before=reboot_before,
407 run_reset=not test.fast,
Keith Haddow782e2a82017-09-26 15:44:51 -0700408 priority=test_priority,
Allen Li388b7a12017-03-29 17:58:23 -0700409 synch_count=test.sync_count,
410 require_ssp=test.require_ssp)
411
412 test_obj.test_name = test.name
413 return test_obj
414
415
Allen Li010c0412017-03-29 17:31:35 -0700416 def _create_job_deps(self, test):
417 """Create job deps list for a test job.
418
419 @returns: A list of dependency strings.
420 """
421 if self._ignore_deps:
422 job_deps = []
423 else:
424 job_deps = list(test.dependencies)
425 job_deps.extend(self._extra_deps)
Allen Li010c0412017-03-29 17:31:35 -0700426 return job_deps
427
Allen Lida198fd2017-03-29 17:22:13 -0700428
Allen Li55de3402017-03-29 17:48:46 -0700429 def _create_keyvals_for_test_job(self, test, retry_for=None):
430 """Create keyvals dict for creating a test job.
431
432 @param test: ControlData object for a test to run.
433 @param retry_for: If the to-be-created job is a retry for an
434 old job, the afe_job_id of the old job will
435 be passed in as |retry_for|, which will be
436 recorded in the new job's keyvals.
437 @returns: A keyvals dict for creating the test job.
438 """
439 keyvals = {
440 constants.JOB_BUILD_KEY: self.cros_build,
441 constants.JOB_SUITE_KEY: self._tag,
442 constants.JOB_EXPERIMENTAL_KEY: test.experimental,
443 constants.JOB_BUILDS_KEY: self._builds
444 }
445 # test_source_build is saved to job_keyvals so scheduler can retrieve
446 # the build name from database when compiling autoserv commandline.
447 # This avoid a database change to add a new field in afe_jobs.
448 #
449 # Only add `test_source_build` to job keyvals if the build is different
450 # from the CrOS build or the job uses more than one build, e.g., both
451 # firmware and CrOS will be updated in the dut.
452 # This is for backwards compatibility, so the update Autotest code can
453 # compile an autoserv command line to run in a SSP container using
454 # previous builds.
455 if (self._test_source_build and
456 (self.cros_build != self._test_source_build or
457 len(self._builds) > 1)):
458 keyvals[constants.JOB_TEST_SOURCE_BUILD_KEY] = \
459 self._test_source_build
460 for prefix, build in self._builds.iteritems():
461 if prefix == provision.FW_RW_VERSION_PREFIX:
462 keyvals[constants.FWRW_BUILD]= build
463 elif prefix == provision.FW_RO_VERSION_PREFIX:
464 keyvals[constants.FWRO_BUILD] = build
465 # Add suite job id to keyvals so tko parser can read it from keyval
466 # file.
467 if self._suite_job_id:
468 keyvals[constants.PARENT_JOB_ID] = self._suite_job_id
469 # We drop the old job's id in the new job's keyval file so that
470 # later our tko parser can figure out the retry relationship and
471 # invalidate the results of the old job in tko database.
472 if retry_for:
473 keyvals[constants.RETRY_ORIGINAL_JOB_ID] = retry_for
474 if self._offload_failures_only:
475 keyvals[constants.JOB_OFFLOAD_FAILURES_KEY] = True
Shuhei Takahashif8659c62017-06-14 20:02:26 +0900476 if self._job_keyvals:
477 for key in constants.INHERITED_KEYVALS:
478 if key in self._job_keyvals:
479 keyvals[key] = self._job_keyvals[key]
Allen Li55de3402017-03-29 17:48:46 -0700480 return keyvals
481
482
Allen Li574fe4d2017-03-10 16:11:53 -0800483class _ControlFileRetriever(object):
484 """Retrieves control files.
485
486 This returns control data instances, unlike control file getters
487 which simply return the control file text contents.
Allen Li066f5872017-02-28 13:30:44 -0800488 """
Allen Li066f5872017-02-28 13:30:44 -0800489
Allen Liaed93492017-03-14 13:36:26 -0700490 def __init__(self, cf_getter, forgiving_parser=True, run_prod_code=False,
491 test_args=None):
Allen Li36746972017-03-10 16:17:46 -0800492 """Initialize instance.
493
494 @param cf_getter: a control_file_getter.ControlFileGetter used to list
495 and fetch the content of control files
Allen Li574fe4d2017-03-10 16:11:53 -0800496 @param forgiving_parser: If False, will raise ControlVariableExceptions
497 if any are encountered when parsing control
498 files. Note that this can raise an exception
499 for syntax errors in unrelated files, because
500 we parse them before applying the predicate.
Allen Liaed93492017-03-14 13:36:26 -0700501 @param run_prod_code: If true, the retrieved tests will run the test
502 code that lives in prod aka the test code
503 currently on the lab servers by disabling
504 SSP for the discovered tests.
Allen Li574fe4d2017-03-10 16:11:53 -0800505 @param test_args: A dict of args to be seeded in test control file under
506 the name |args_dict|.
Allen Liaed93492017-03-14 13:36:26 -0700507 """
508 self._cf_getter = cf_getter
509 self._forgiving_parser = forgiving_parser
510 self._run_prod_code = run_prod_code
511 self._test_args = test_args
512
513
Xixuan Wub2cf7fc2018-05-04 17:37:24 -0700514 def retrieve_for_test(self, test_name):
Allen Lib47f59a2017-03-10 17:50:45 -0800515 """Retrieve a test's control data.
516
517 This ignores forgiving_parser because we cannot return a
518 forgiving value.
519
520 @param test_name: Name of test to retrieve.
521
522 @raises ControlVariableException: There is a syntax error in a
523 control file.
524
525 @returns a ControlData object
526 """
Xixuan Wu9af22652018-05-14 10:50:54 -0700527 return suite_common.retrieve_control_data_for_test(
528 self._cf_getter, test_name)
Allen Lib47f59a2017-03-10 17:50:45 -0800529
530
Allen Liaed93492017-03-14 13:36:26 -0700531 def retrieve_for_suite(self, suite_name=''):
532 """Scan through all tests and find all tests.
533
534 @param suite_name: If specified, this method will attempt to restrain
535 the search space to just this suite's control files.
Allen Li066f5872017-02-28 13:30:44 -0800536
Allen Li574fe4d2017-03-10 16:11:53 -0800537 @raises ControlVariableException: If forgiving_parser is False and there
538 is a syntax error in a control file.
539
540 @returns a dictionary of ControlData objects that based on given
541 parameters.
542 """
Xixuan Wu92249042018-04-30 17:17:10 -0700543 tests = suite_common.retrieve_for_suite(
544 self._cf_getter, suite_name, self._forgiving_parser,
545 self._test_args)
Allen Lif8b0b702017-03-10 17:42:11 -0800546 if self._run_prod_code:
Xixuan Wu92249042018-04-30 17:17:10 -0700547 for test in tests.itervalues():
548 test.require_ssp = False
Allen Lif8b0b702017-03-10 17:42:11 -0800549
Xixuan Wu92249042018-04-30 17:17:10 -0700550 return tests
Allen Li1abded52017-03-10 16:37:57 -0800551
552
Allen Li122cd092017-03-09 15:56:46 -0800553def list_all_suites(build, devserver, cf_getter=None):
554 """
555 Parses all ControlData objects with a SUITE tag and extracts all
556 defined suite names.
557
558 @param build: the build on which we're running this suite.
559 @param devserver: the devserver which contains the build.
560 @param cf_getter: control_file_getter.ControlFileGetter. Defaults to
561 using DevServerGetter.
562
563 @return list of suites
564 """
565 if cf_getter is None:
566 cf_getter = _create_ds_getter(build, devserver)
567
568 suites = set()
569 predicate = lambda t: True
Allen Lif487fa12017-05-17 16:46:32 -0700570 for test in find_and_parse_tests(cf_getter, predicate):
Allen Li122cd092017-03-09 15:56:46 -0800571 suites.update(test.suite_tag_parts)
572 return list(suites)
573
574
Allen Lid1806ac2017-03-09 15:52:33 -0800575def test_file_similarity_predicate(test_file_pattern):
576 """Returns predicate that gets the similarity based on a test's file
577 name pattern.
578
579 Builds a predicate that takes in a parsed control file (a ControlData)
580 and returns a tuple of (file path, ratio), where ratio is the
581 similarity between the test file name and the given test_file_pattern.
582
583 @param test_file_pattern: regular expression (string) to match against
584 control file names.
585 @return a callable that takes a ControlData and and returns a tuple of
586 (file path, ratio), where ratio is the similarity between the
587 test file name and the given test_file_pattern.
588 """
589 return lambda t: ((None, 0) if not hasattr(t, 'path') else
590 (t.path, difflib.SequenceMatcher(a=t.path,
591 b=test_file_pattern).ratio()))
592
593
Allen Lib5b4a7a2017-03-09 15:50:09 -0800594def test_name_similarity_predicate(test_name):
595 """Returns predicate that matched based on a test's name.
596
597 Builds a predicate that takes in a parsed control file (a ControlData)
598 and returns a tuple of (test name, ratio), where ratio is the similarity
599 between the test name and the given test_name.
600
601 @param test_name: the test name to base the predicate on.
602 @return a callable that takes a ControlData and returns a tuple of
603 (test name, ratio), where ratio is the similarity between the
604 test name and the given test_name.
605 """
606 return lambda t: ((None, 0) if not hasattr(t, 'name') else
607 (t.name,
608 difflib.SequenceMatcher(a=t.name, b=test_name).ratio()))
609
610
Allen Lie37d6ba2017-03-09 15:49:25 -0800611def matches_attribute_expression_predicate(test_attr_boolstr):
612 """Returns predicate that matches based on boolean expression of
613 attributes.
614
615 Builds a predicate that takes in a parsed control file (a ControlData)
616 ans returns True if the test attributes satisfy the given attribute
617 boolean expression.
618
619 @param test_attr_boolstr: boolean expression of the attributes to be
620 test, like 'system:all and interval:daily'.
621
622 @return a callable that takes a ControlData and returns True if the test
623 attributes satisfy the given boolean expression.
624 """
625 return lambda t: boolparse_lib.BoolstrResult(
626 test_attr_boolstr, t.attributes)
627
628
Allen Lif29b48a2017-03-09 15:48:41 -0800629def test_file_matches_pattern_predicate(test_file_pattern):
630 """Returns predicate that matches based on a test's file name pattern.
631
632 Builds a predicate that takes in a parsed control file (a ControlData)
633 and returns True if the test's control file name matches the given
634 regular expression.
635
636 @param test_file_pattern: regular expression (string) to match against
637 control file names.
638 @return a callable that takes a ControlData and and returns
639 True if control file name matches the pattern.
640 """
641 return lambda t: hasattr(t, 'path') and re.match(test_file_pattern,
642 t.path)
643
644
Allen Li1819f522017-03-09 15:47:25 -0800645def test_name_matches_pattern_predicate(test_name_pattern):
646 """Returns predicate that matches based on a test's name pattern.
647
648 Builds a predicate that takes in a parsed control file (a ControlData)
649 and returns True if the test name matches the given regular expression.
650
651 @param test_name_pattern: regular expression (string) to match against
652 test names.
653 @return a callable that takes a ControlData and returns
654 True if the name fields matches the pattern.
655 """
656 return lambda t: hasattr(t, 'name') and re.match(test_name_pattern,
657 t.name)
658
659
Allen Lif8441c82017-03-09 15:46:32 -0800660def test_name_equals_predicate(test_name):
661 """Returns predicate that matched based on a test's name.
662
663 Builds a predicate that takes in a parsed control file (a ControlData)
664 and returns True if the test name is equal to |test_name|.
665
666 @param test_name: the test name to base the predicate on.
667 @return a callable that takes a ControlData and looks for |test_name|
668 in that ControlData's name.
669 """
670 return lambda t: hasattr(t, 'name') and test_name == t.name
671
672
Allen Li6e2fa4f2017-03-09 15:45:43 -0800673def name_in_tag_similarity_predicate(name):
674 """Returns predicate that takes a control file and gets the similarity
675 of the suites in the control file and the given name.
676
677 Builds a predicate that takes in a parsed control file (a ControlData)
678 and returns a list of tuples of (suite name, ratio), where suite name
679 is each suite listed in the control file, and ratio is the similarity
680 between each suite and the given name.
681
682 @param name: the suite name to base the predicate on.
683 @return a callable that takes a ControlData and returns a list of tuples
684 of (suite name, ratio), where suite name is each suite listed in
685 the control file, and ratio is the similarity between each suite
686 and the given name.
687 """
688 return lambda t: [(suite,
689 difflib.SequenceMatcher(a=suite, b=name).ratio())
690 for suite in t.suite_tag_parts] or [(None, 0)]
691
692
Allen Li398ddbd2017-03-09 15:44:25 -0800693def name_in_tag_predicate(name):
694 """Returns predicate that takes a control file and looks for |name|.
695
696 Builds a predicate that takes in a parsed control file (a ControlData)
697 and returns True if the SUITE tag is present and contains |name|.
698
699 @param name: the suite name to base the predicate on.
700 @return a callable that takes a ControlData and looks for |name| in that
701 ControlData object's suite member.
702 """
Xixuan Wu81b71cb2019-01-10 16:00:30 -0800703 return suite_common.name_in_tag_predicate(name)
Allen Li398ddbd2017-03-09 15:44:25 -0800704
705
Allen Lia640d6d2017-03-09 15:41:35 -0800706def create_fs_getter(autotest_dir):
707 """
708 @param autotest_dir: the place to find autotests.
709 @return a FileSystemGetter instance that looks under |autotest_dir|.
710 """
711 # currently hard-coded places to look for tests.
712 subpaths = ['server/site_tests', 'client/site_tests',
713 'server/tests', 'client/tests']
714 directories = [os.path.join(autotest_dir, p) for p in subpaths]
715 return control_file_getter.FileSystemGetter(directories)
716
717
Allen Li0f915872017-02-28 18:51:04 -0800718def _create_ds_getter(build, devserver):
719 """
720 @param build: the build on which we're running this suite.
721 @param devserver: the devserver which contains the build.
722 @return a FileSystemGetter instance that looks under |autotest_dir|.
723 """
724 return control_file_getter.DevServerGetter(build, devserver)
725
726
Allen Li3adae952017-03-10 17:18:12 -0800727def _non_experimental_tests_predicate(test_data):
728 """Test predicate for non-experimental tests."""
729 return not test_data.experimental
730
731
Allen Li0b1fa382017-02-28 18:47:16 -0800732def find_and_parse_tests(cf_getter, predicate, suite_name='',
733 add_experimental=False, forgiving_parser=True,
734 run_prod_code=False, test_args=None):
735 """
736 Function to scan through all tests and find eligible tests.
737
738 Search through all tests based on given cf_getter, suite_name,
739 add_experimental and forgiving_parser, return the tests that match
740 given predicate.
741
742 @param cf_getter: a control_file_getter.ControlFileGetter used to list
743 and fetch the content of control files
744 @param predicate: a function that should return True when run over a
745 ControlData representation of a control file that should be in
746 this Suite.
747 @param suite_name: If specified, this method will attempt to restrain
748 the search space to just this suite's control files.
749 @param add_experimental: add tests with experimental attribute set.
750 @param forgiving_parser: If False, will raise ControlVariableExceptions
751 if any are encountered when parsing control
752 files. Note that this can raise an exception
753 for syntax errors in unrelated files, because
754 we parse them before applying the predicate.
755 @param run_prod_code: If true, the suite will run the test code that
756 lives in prod aka the test code currently on the
757 lab servers by disabling SSP for the discovered
758 tests.
759 @param test_args: A dict of args to be seeded in test control file.
760
761 @raises ControlVariableException: If forgiving_parser is False and there
762 is a syntax error in a control file.
763
764 @return list of ControlData objects that should be run, with control
765 file text added in |text| attribute. Results are sorted based
766 on the TIME setting in control file, slowest test comes first.
767 """
Allen Libb60f442017-03-14 12:18:57 -0700768 logging.debug('Getting control file list for suite: %s', suite_name)
Xixuan Wu92249042018-04-30 17:17:10 -0700769 retriever = _ControlFileRetriever(cf_getter,
770 forgiving_parser=forgiving_parser,
771 run_prod_code=run_prod_code,
772 test_args=test_args)
Allen Liaed93492017-03-14 13:36:26 -0700773 tests = retriever.retrieve_for_suite(suite_name)
Allen Li3adae952017-03-10 17:18:12 -0800774 if not add_experimental:
775 predicate = _ComposedPredicate([predicate,
776 _non_experimental_tests_predicate])
Xixuan Wud9648532018-05-04 18:06:53 -0700777 return suite_common.filter_tests(tests, predicate)
Allen Li0b1fa382017-02-28 18:47:16 -0800778
779
Allen Lida012192017-02-28 18:37:52 -0800780def find_possible_tests(cf_getter, predicate, suite_name='', count=10):
781 """
782 Function to scan through all tests and find possible tests.
783
784 Search through all tests based on given cf_getter, suite_name,
785 add_experimental and forgiving_parser. Use the given predicate to
786 calculate the similarity and return the top 10 matches.
787
788 @param cf_getter: a control_file_getter.ControlFileGetter used to list
789 and fetch the content of control files
790 @param predicate: a function that should return a tuple of (name, ratio)
791 when run over a ControlData representation of a control file that
792 should be in this Suite. `name` is the key to be compared, e.g.,
793 a suite name or test name. `ratio` is a value between [0,1]
794 indicating the similarity of `name` and the value to be compared.
795 @param suite_name: If specified, this method will attempt to restrain
796 the search space to just this suite's control files.
797 @param count: Number of suggestions to return, default to 10.
798
799 @return list of top names that similar to the given test, sorted by
800 match ratio.
801 """
Allen Libb60f442017-03-14 12:18:57 -0700802 logging.debug('Getting control file list for suite: %s', suite_name)
Xixuan Wu92249042018-04-30 17:17:10 -0700803 tests = _ControlFileRetriever(cf_getter).retrieve_for_suite(suite_name)
Allen Lida012192017-02-28 18:37:52 -0800804 logging.debug('Parsed %s control files.', len(tests))
805 similarities = {}
806 for test in tests.itervalues():
807 ratios = predicate(test)
808 # Some predicates may return a list of tuples, e.g.,
809 # name_in_tag_similarity_predicate. Convert all returns to a list.
810 if not isinstance(ratios, list):
811 ratios = [ratios]
812 for name, ratio in ratios:
813 similarities[name] = ratio
814 return [s[0] for s in
815 sorted(similarities.items(), key=operator.itemgetter(1),
816 reverse=True)][:count]
817
818
Allen Li98a26a42017-02-28 18:43:24 -0800819def _deprecated_suite_method(func):
820 """Decorator for deprecated Suite static methods.
821
822 TODO(ayatane): This is used to decorate functions that are called as
823 static methods on Suite.
824 """
825 @functools.wraps(func)
826 def wrapper(*args, **kwargs):
Shuhei Takahashif8659c62017-06-14 20:02:26 +0900827 """Wraps |func| for warning."""
Brian Norris32add5d2017-06-19 11:09:54 -0700828 warnings.warn('Calling method "%s" from Suite is deprecated' %
829 func.__name__)
Allen Li98a26a42017-02-28 18:43:24 -0800830 return func(*args, **kwargs)
831 return staticmethod(wrapper)
832
833
Allen Li4b5a24f2017-03-09 16:01:35 -0800834class _BaseSuite(object):
Chris Masone44e4d6c2012-08-15 14:25:53 -0700835 """
836 A suite of tests, defined by some predicate over control file variables.
837
838 Given a place to search for control files a predicate to match the desired
839 tests, can gather tests and fire off jobs to run them, and then wait for
840 results.
841
842 @var _predicate: a function that should return True when run over a
843 ControlData representation of a control file that should be in
844 this Suite.
845 @var _tag: a string with which to tag jobs run in this suite.
Dan Shi36cfd832014-10-10 13:38:51 -0700846 @var _builds: the builds on which we're running this suite.
Chris Masone44e4d6c2012-08-15 14:25:53 -0700847 @var _afe: an instance of AFE as defined in server/frontend.py.
848 @var _tko: an instance of TKO as defined in server/frontend.py.
849 @var _jobs: currently scheduled jobs, if any.
Fang Denge3bc24b2014-03-17 15:19:46 -0700850 @var _jobs_to_tests: a dictionary that maps job ids to tests represented
851 ControlData objects.
Fang Denge3bc24b2014-03-17 15:19:46 -0700852 @var _retry: a bool value indicating whether jobs should be retried on
853 failure.
854 @var _retry_handler: a RetryHandler object.
855
Chris Masone44e4d6c2012-08-15 14:25:53 -0700856 """
857
Dan Shi36cfd832014-10-10 13:38:51 -0700858
Allen Li6fff5502016-12-09 18:04:26 -0800859 def __init__(
860 self,
Allen Li00bbe5b2017-03-09 16:44:30 -0800861 tests,
Allen Li6fff5502016-12-09 18:04:26 -0800862 tag,
863 builds,
864 board,
Allen Li6fff5502016-12-09 18:04:26 -0800865 afe=None,
866 tko=None,
867 pool=None,
868 results_dir=None,
869 max_runtime_mins=24*60,
870 timeout_mins=24*60,
871 file_bugs=False,
Allen Li6fff5502016-12-09 18:04:26 -0800872 suite_job_id=None,
873 ignore_deps=False,
Allen Li493eefa2016-12-09 18:05:35 -0800874 extra_deps=None,
Allen Li6fff5502016-12-09 18:04:26 -0800875 priority=priorities.Priority.DEFAULT,
Allen Li6fff5502016-12-09 18:04:26 -0800876 wait_for_results=True,
877 job_retry=False,
878 max_retries=sys.maxint,
879 offload_failures_only=False,
Shuqian Zhaoda1118d2017-02-13 16:22:58 -0800880 test_source_build=None,
Prathmesh Prabhu013afa52017-09-07 17:54:23 +0000881 job_keyvals=None,
Aviv Keshetd5a83f72017-10-30 12:53:01 -0700882 child_dependencies=(),
Prathmesh Prabhu013afa52017-09-07 17:54:23 +0000883 result_reporter=None,
Allen Li6fff5502016-12-09 18:04:26 -0800884 ):
Allen Li7f43ef92017-03-09 16:29:48 -0800885 """Initialize instance.
Chris Masone44e4d6c2012-08-15 14:25:53 -0700886
Allen Li00bbe5b2017-03-09 16:44:30 -0800887 @param tests: Iterable of tests to run.
Chris Masone44e4d6c2012-08-15 14:25:53 -0700888 @param tag: a string with which to tag jobs run in this suite.
Dan Shi36cfd832014-10-10 13:38:51 -0700889 @param builds: the builds on which we're running this suite.
Alex Millera0913072013-06-12 10:01:51 -0700890 @param board: the board on which we're running this suite.
Chris Masone44e4d6c2012-08-15 14:25:53 -0700891 @param afe: an instance of AFE as defined in server/frontend.py.
892 @param tko: an instance of TKO as defined in server/frontend.py.
893 @param pool: Specify the pool of machines to use for scheduling
894 purposes.
895 @param results_dir: The directory where the job can write results to.
896 This must be set if you want job_id of sub-jobs
897 list in the job keyvals.
Aviv Keshet18308922013-02-19 17:49:49 -0800898 @param max_runtime_mins: Maximum suite runtime, in minutes.
Alex Miller028b0312013-09-07 15:25:45 -0700899 @param timeout: Maximum job lifetime, in hours.
Aviv Keshet18308922013-02-19 17:49:49 -0800900 @param suite_job_id: Job id that will act as parent id to all sub jobs.
901 Default: None
Aviv Keshetd7959f32013-05-17 15:58:43 -0700902 @param ignore_deps: True if jobs should ignore the DEPENDENCIES
903 attribute and skip applying of dependency labels.
904 (Default:False)
Alex Miller47a03672013-08-27 09:09:53 -0700905 @param extra_deps: A list of strings which are the extra DEPENDENCIES
906 to add to each test being scheduled.
Alex Miller7d658cf2013-09-04 16:00:35 -0700907 @param priority: Integer priority level. Higher is more important.
Dan Shi95122412013-11-12 16:20:33 -0800908 @param wait_for_results: Set to False to run the suite job without
909 waiting for test jobs to finish. Default is
910 True.
Jacob Kopczynski2cefa1f2018-01-10 17:25:38 -0800911 @param job_retry: A bool value indicating whether jobs should be retried
Fang Denge3bc24b2014-03-17 15:19:46 -0700912 on failure. If True, the field 'JOB_RETRIES' in
913 control files will be respected. If False, do not
914 retry.
Fang Deng443f1952015-01-02 14:51:49 -0800915 @param max_retries: Maximum retry limit at suite level.
916 Regardless how many times each individual test
917 has been retried, the total number of retries
918 happening in the suite can't exceed _max_retries.
919 Default to sys.maxint.
Simran Basi1e10e922015-04-16 15:09:56 -0700920 @param offload_failures_only: Only enable gs_offloading for failed
921 jobs.
Dan Shi36cfd832014-10-10 13:38:51 -0700922 @param test_source_build: Build that contains the server-side test code.
Shuqian Zhaoda1118d2017-02-13 16:22:58 -0800923 @param job_keyvals: General job keyvals to be inserted into keyval file,
924 which will be used by tko/parse later.
Aviv Keshetd5a83f72017-10-30 12:53:01 -0700925 @param child_dependencies: (optional) list of dependency strings
926 to be added as dependencies to child jobs.
Prathmesh Prabhu013afa52017-09-07 17:54:23 +0000927 @param result_reporter: A _ResultReporter instance to report results. If
928 None, an _EmailReporter will be created.
Chris Masone44e4d6c2012-08-15 14:25:53 -0700929 """
Allen Li493eefa2016-12-09 18:05:35 -0800930
Allen Li00bbe5b2017-03-09 16:44:30 -0800931 self.tests = list(tests)
Chris Masone44e4d6c2012-08-15 14:25:53 -0700932 self._tag = tag
Dan Shi36cfd832014-10-10 13:38:51 -0700933 self._builds = builds
Chris Masone44e4d6c2012-08-15 14:25:53 -0700934 self._results_dir = results_dir
935 self._afe = afe or frontend_wrappers.RetryingAFE(timeout_min=30,
936 delay_sec=10,
937 debug=False)
938 self._tko = tko or frontend_wrappers.RetryingTKO(timeout_min=30,
939 delay_sec=10,
940 debug=False)
Chris Masone44e4d6c2012-08-15 14:25:53 -0700941 self._jobs = []
Fang Denge3bc24b2014-03-17 15:19:46 -0700942 self._jobs_to_tests = {}
beeps89f1e062013-09-18 12:00:17 -0700943
Alex Millera3a4fe72013-01-22 09:57:47 -0800944 self._file_bugs = file_bugs
Aviv Keshet18308922013-02-19 17:49:49 -0800945 self._suite_job_id = suite_job_id
Fang Denge3bc24b2014-03-17 15:19:46 -0700946 self._job_retry=job_retry
Fang Deng443f1952015-01-02 14:51:49 -0800947 self._max_retries = max_retries
Fang Denge3bc24b2014-03-17 15:19:46 -0700948 # RetryHandler to be initialized in schedule()
949 self._retry_handler = None
Dan Shi95122412013-11-12 16:20:33 -0800950 self.wait_for_results = wait_for_results
Shuqian Zhaoda1118d2017-02-13 16:22:58 -0800951 self._job_keyvals = job_keyvals
Prathmesh Prabhu013afa52017-09-07 17:54:23 +0000952 if result_reporter is None:
953 self._result_reporter = _EmailReporter(self)
954 else:
955 self._result_reporter = result_reporter
Alex Millera3a4fe72013-01-22 09:57:47 -0800956
Allen Li80dc02c2017-02-28 18:22:16 -0800957 if extra_deps is None:
958 extra_deps = []
Allen Li3a83fe62017-02-28 18:27:09 -0800959 extra_deps.append(board)
Allen Licceb1832017-02-28 18:25:06 -0800960 if pool:
961 extra_deps.append(pool)
Aviv Keshetd5a83f72017-10-30 12:53:01 -0700962 extra_deps.extend(child_dependencies)
Allen Lide5ecce2017-11-06 17:48:09 -0800963 self._dependencies = tuple(extra_deps)
Aviv Keshetd5a83f72017-10-30 12:53:01 -0700964
Allen Li010c0412017-03-29 17:31:35 -0700965 self._job_creator = _SuiteChildJobCreator(
Allen Li55de3402017-03-29 17:48:46 -0700966 tag=tag,
Allen Li27f72a22017-03-29 17:37:43 -0700967 builds=builds,
Allen Li010c0412017-03-29 17:31:35 -0700968 board=board,
Allen Li388b7a12017-03-29 17:58:23 -0700969 afe=afe,
Allen Li388b7a12017-03-29 17:58:23 -0700970 max_runtime_mins=max_runtime_mins,
971 timeout_mins=timeout_mins,
Allen Li55de3402017-03-29 17:48:46 -0700972 suite_job_id=suite_job_id,
Allen Li010c0412017-03-29 17:31:35 -0700973 ignore_deps=ignore_deps,
974 extra_deps=extra_deps,
Allen Li388b7a12017-03-29 17:58:23 -0700975 priority=priority,
Allen Li55de3402017-03-29 17:48:46 -0700976 offload_failures_only=offload_failures_only,
977 test_source_build=test_source_build,
Shuhei Takahashif8659c62017-06-14 20:02:26 +0900978 job_keyvals=job_keyvals,
Allen Li010c0412017-03-29 17:31:35 -0700979 )
Allen Lida198fd2017-03-29 17:22:13 -0700980
Chris Masone44e4d6c2012-08-15 14:25:53 -0700981
Allen Liad281cf2017-07-07 16:50:38 -0700982 def _schedule_test(self, record, test, retry_for=None):
Fang Denge3bc24b2014-03-17 15:19:46 -0700983 """Schedule a single test and return the job.
984
Allen Lie79b3cb2016-12-12 18:24:17 -0800985 Schedule a single test by creating a job, and then update relevant
986 data structures that are used to keep track of all running jobs.
Fang Denge3bc24b2014-03-17 15:19:46 -0700987
Allen Lie79b3cb2016-12-12 18:24:17 -0800988 Emits a TEST_NA status log entry if it failed to schedule the test due
989 to NoEligibleHostException or a non-existent board label.
990
991 Returns a frontend.Job object if the test is successfully scheduled.
992 If scheduling failed due to NoEligibleHostException or a non-existent
Allen Liad281cf2017-07-07 16:50:38 -0700993 board label, returns None.
Fang Denge3bc24b2014-03-17 15:19:46 -0700994
995 @param record: A callable to use for logging.
996 prototype: record(base_job.status_log_entry)
997 @param test: ControlData for a test to run.
998 @param retry_for: If we are scheduling a test to retry an
999 old job, the afe_job_id of the old job
1000 will be passed in as |retry_for|.
Fang Denge3bc24b2014-03-17 15:19:46 -07001001
Allen Lie79b3cb2016-12-12 18:24:17 -08001002 @returns: A frontend.Job object or None
Fang Denge3bc24b2014-03-17 15:19:46 -07001003 """
1004 msg = 'Scheduling %s' % test.name
1005 if retry_for:
1006 msg = msg + ', to retry afe job %d' % retry_for
1007 logging.debug(msg)
Dan Shidfea3682014-08-10 23:38:40 -07001008 begin_time_str = datetime.datetime.now().strftime(time_utils.TIME_FMT)
Fang Denge3bc24b2014-03-17 15:19:46 -07001009 try:
Allen Li388b7a12017-03-29 17:58:23 -07001010 job = self._job_creator.create_job(test, retry_for=retry_for)
Allen Li6fd440f2016-12-12 18:40:05 -08001011 except (error.NoEligibleHostException, proxy.ValidationError) as e:
1012 if (isinstance(e, error.NoEligibleHostException)
1013 or (isinstance(e, proxy.ValidationError)
1014 and _is_nonexistent_board_error(e))):
1015 # Treat a dependency on a non-existent board label the same as
1016 # a dependency on a board that exists, but for which there's no
1017 # hardware.
1018 logging.debug('%s not applicable for this board/pool. '
1019 'Emitting TEST_NA.', test.name)
1020 Status('TEST_NA', test.name,
1021 'Skipping: test not supported on this board/pool.',
Allen Li9fcd4b42016-12-12 16:15:14 -08001022 begin_time_str=begin_time_str).record_all(record)
1023 return None
1024 else:
Fang Denge3bc24b2014-03-17 15:19:46 -07001025 raise e
Allen Liad281cf2017-07-07 16:50:38 -07001026 except (error.RPCException, proxy.JSONRPCException):
Fang Denge3bc24b2014-03-17 15:19:46 -07001027 if retry_for:
1028 # Mark that we've attempted to retry the old job.
Jacob Kopczynski2cefa1f2018-01-10 17:25:38 -08001029 logging.debug("RPC exception occurred")
Fang Denge3bc24b2014-03-17 15:19:46 -07001030 self._retry_handler.set_attempted(job_id=retry_for)
Allen Liad281cf2017-07-07 16:50:38 -07001031 raise
Fang Denge3bc24b2014-03-17 15:19:46 -07001032 else:
1033 self._jobs.append(job)
1034 self._jobs_to_tests[job.id] = test
1035 if retry_for:
1036 # A retry job was just created, record it.
1037 self._retry_handler.add_retry(
1038 old_job_id=retry_for, new_job_id=job.id)
1039 retry_count = (test.job_retries -
1040 self._retry_handler.get_retry_max(job.id))
1041 logging.debug('Job %d created to retry job %d. '
1042 'Have retried for %d time(s)',
1043 job.id, retry_for, retry_count)
Allen Li4df053e2016-12-29 16:05:41 -08001044 self._remember_job_keyval(job)
Fang Denge3bc24b2014-03-17 15:19:46 -07001045 return job
Fang Denge3bc24b2014-03-17 15:19:46 -07001046
Allen Li5b5642f2017-05-17 17:02:56 -07001047 def schedule(self, record):
Chris Masone44e4d6c2012-08-15 14:25:53 -07001048 """
1049 Schedule jobs using |self._afe|.
1050
1051 frontend.Job objects representing each scheduled job will be put in
1052 |self._jobs|.
1053
Fang Denge3bc24b2014-03-17 15:19:46 -07001054 @param record: A callable to use for logging.
1055 prototype: record(base_job.status_log_entry)
Aviv Keshete9170d92013-07-19 11:20:45 -07001056 @returns: The number of tests that were scheduled.
Chris Masone44e4d6c2012-08-15 14:25:53 -07001057 """
Allen Lif4cb5ec2017-01-03 16:58:12 -08001058 scheduled_test_names = []
Allen Li5b5642f2017-05-17 17:02:56 -07001059 logging.debug('Discovered %d tests.', len(self.tests))
Chris Masone44e4d6c2012-08-15 14:25:53 -07001060
Alex Miller3a69adc2012-12-19 13:38:31 -08001061 Status('INFO', 'Start %s' % self._tag).record_result(record)
1062 try:
Shuqian Zhaoda1118d2017-02-13 16:22:58 -08001063 # Write job_keyvals into keyval file.
1064 if self._job_keyvals:
1065 utils.write_keyval(self._results_dir, self._job_keyvals)
1066
Prathmesh Prabhu7295bf32017-06-08 10:44:52 -07001067 # TODO(crbug.com/730885): This is a hack to protect tests that are
1068 # not usually retried from getting hit by a provision error when run
1069 # as part of a suite. Remove this hack once provision is separated
1070 # out in its own suite.
Allen Li5b5642f2017-05-17 17:02:56 -07001071 self._bump_up_test_retries(self.tests)
1072 for test in self.tests:
Allen Lida905732016-12-12 15:49:16 -08001073 scheduled_job = self._schedule_test(record, test)
1074 if scheduled_job is not None:
Shuqian Zhaocd866f32016-11-29 20:14:34 -08001075 scheduled_test_names.append(test.name)
1076
1077 # Write the num of scheduled tests and name of them to keyval file.
Shuqian Zhaocd866f32016-11-29 20:14:34 -08001078 logging.debug('Scheduled %d tests, writing the total to keyval.',
Allen Lia4d35022016-12-12 15:42:10 -08001079 len(scheduled_test_names))
Allen Lid4d5dda2016-12-12 15:39:11 -08001080 utils.write_keyval(
1081 self._results_dir,
Allen Lidda59b82016-12-12 18:20:04 -08001082 self._make_scheduled_tests_keyvals(scheduled_test_names))
Aviv Keshetff7bd292017-07-27 11:14:41 -07001083 except Exception:
Allen Lib892d9f2016-12-29 15:50:11 -08001084 logging.exception('Exception while scheduling suite')
Alex Miller3a69adc2012-12-19 13:38:31 -08001085 Status('FAIL', self._tag,
1086 'Exception while scheduling suite').record_result(record)
1087
Fang Deng7e655a92014-05-23 13:48:11 -07001088 if self._job_retry:
Jacob Kopczynski2cefa1f2018-01-10 17:25:38 -08001089 logging.debug("Initializing RetryHandler for suite %s.", self._tag)
Fang Deng7e655a92014-05-23 13:48:11 -07001090 self._retry_handler = RetryHandler(
Fang Deng443f1952015-01-02 14:51:49 -08001091 initial_jobs_to_tests=self._jobs_to_tests,
1092 max_retries=self._max_retries)
Jacob Kopczynski2cefa1f2018-01-10 17:25:38 -08001093 logging.debug("retry map created: %s ",
1094 self._retry_handler._retry_map)
1095 else:
Jacob Kopczynski61a2d372018-06-13 11:51:46 -07001096 logging.info("Will not retry jobs from suite %s.", self._tag)
Allen Lia4d35022016-12-12 15:42:10 -08001097 return len(scheduled_test_names)
Aviv Keshete9170d92013-07-19 11:20:45 -07001098
Alex Miller3a69adc2012-12-19 13:38:31 -08001099
Prathmesh Prabhu7295bf32017-06-08 10:44:52 -07001100 def _bump_up_test_retries(self, tests):
1101 """Bump up individual test retries to match suite retry options."""
1102 if not self._job_retry:
1103 return
1104
1105 for test in tests:
Ilja H. Friedeldd98c2b2017-08-31 23:55:40 -07001106 # We do honor if a test insists on JOB_RETRIES = 0.
1107 if test.job_retries is None:
Prathmesh Prabhu7295bf32017-06-08 10:44:52 -07001108 logging.debug(
Ilja H. Friedeldd98c2b2017-08-31 23:55:40 -07001109 'Test %s did not request retries, but suite requires '
Prathmesh Prabhu7295bf32017-06-08 10:44:52 -07001110 'retries. Bumping retries up to 1. '
1111 '(See crbug.com/730885)',
1112 test.name)
1113 test.job_retries = 1
1114
1115
Allen Lidda59b82016-12-12 18:20:04 -08001116 def _make_scheduled_tests_keyvals(self, scheduled_test_names):
1117 """Make a keyvals dict to write for scheduled test names.
1118
1119 @param scheduled_test_names: A list of scheduled test name strings.
1120
1121 @returns: A keyvals dict.
1122 """
1123 return {
1124 constants.SCHEDULED_TEST_COUNT_KEY: len(scheduled_test_names),
1125 constants.SCHEDULED_TEST_NAMES_KEY: repr(scheduled_test_names),
1126 }
1127
1128
Allen Lid1cbccf2016-12-29 15:12:39 -08001129 def _should_report(self, result):
beepsda5b7112013-05-30 11:34:14 -07001130 """
Shuqian Zhaoe33ba4a2015-09-11 18:51:43 -07001131 Returns True if this failure requires to be reported.
beepsda5b7112013-05-30 11:34:14 -07001132
1133 @param result: A result, encapsulating the status of the failed job.
Shuqian Zhaoe33ba4a2015-09-11 18:51:43 -07001134 @return: True if we should report this failure.
beepsda5b7112013-05-30 11:34:14 -07001135 """
Alex Millerfcc119b2014-01-15 13:54:58 -08001136 return (self._file_bugs and result.test_executed and
Fang Dengd82c1c72014-07-29 10:43:01 -07001137 not result.is_testna() and
beeps32fa6772014-01-28 13:19:53 -08001138 result.is_worse_than(job_status.Status('GOOD', '', 'reason')))
beepsda5b7112013-05-30 11:34:14 -07001139
1140
Allen Licc752292017-01-03 12:44:39 -08001141 def _has_retry(self, result):
1142 """
1143 Return True if this result gets to retry.
1144
1145 @param result: A result, encapsulating the status of the failed job.
1146 @return: bool
1147 """
1148 return (self._job_retry
1149 and self._retry_handler.has_following_retry(result))
1150
1151
Aviv Keshet3e5ff4a2017-08-04 14:11:37 -07001152 def wait(self, record):
Alex Miller3a69adc2012-12-19 13:38:31 -08001153 """
1154 Polls for the job statuses, using |record| to print status when each
1155 completes.
1156
1157 @param record: callable that records job status.
1158 prototype:
1159 record(base_job.status_log_entry)
1160 """
Allen Li4a993a62017-07-10 11:44:54 -07001161 waiter = job_status.JobResultWaiter(self._afe, self._tko)
Alex Miller3a69adc2012-12-19 13:38:31 -08001162 try:
Aviv Keshet133beb12013-08-20 14:37:13 -07001163 if self._suite_job_id:
Allen Li4a993a62017-07-10 11:44:54 -07001164 jobs = self._afe.get_jobs(parent_job_id=self._suite_job_id)
Aviv Keshet133beb12013-08-20 14:37:13 -07001165 else:
Ilja H. Friedel04be2bd2014-05-07 21:29:59 -07001166 logging.warning('Unknown suite_job_id, falling back to less '
Dan Shi08ff1282016-02-18 19:51:16 -08001167 'efficient results_generator.')
Allen Li4a993a62017-07-10 11:44:54 -07001168 jobs = self._jobs
1169 waiter.add_jobs(jobs)
1170 for result in waiter.wait_for_results():
Prathmesh Prabhu013afa52017-09-07 17:54:23 +00001171 self._handle_result(result=result, record=record, waiter=waiter)
Allen Li337e1242017-07-10 13:00:31 -07001172 if self._finished_waiting():
1173 break
1174 except Exception: # pylint: disable=W0703
Allen Lib892d9f2016-12-29 15:50:11 -08001175 logging.exception('Exception waiting for results')
Alex Miller3a69adc2012-12-19 13:38:31 -08001176 Status('FAIL', self._tag,
1177 'Exception waiting for results').record_result(record)
1178
1179
Allen Li337e1242017-07-10 13:00:31 -07001180 def _finished_waiting(self):
1181 """Return whether the suite is finished waiting for child jobs."""
1182 return False
1183
1184
Prathmesh Prabhu013afa52017-09-07 17:54:23 +00001185 def _handle_result(self, result, record, waiter):
Allen Li26b340d2016-12-29 15:23:01 -08001186 """
Allen Lie406a5e2017-07-10 12:53:06 -07001187 Handle a test job result.
Allen Li26b340d2016-12-29 15:23:01 -08001188
1189 @param result: Status instance for job.
1190 @param record: callable that records job status.
1191 prototype:
1192 record(base_job.status_log_entry)
Allen Li4a993a62017-07-10 11:44:54 -07001193 @param waiter: JobResultsWaiter instance.
Jacob Kopczynski2cefa1f2018-01-10 17:25:38 -08001194
1195 @instance_param _result_reporter: _ResultReporter instance.
Allen Li26b340d2016-12-29 15:23:01 -08001196 """
Allen Lie406a5e2017-07-10 12:53:06 -07001197 self._record_result(result, record)
Prathmesh Prabhu69fdb0d2017-09-07 12:30:03 -07001198 rescheduled = False
xixuanbf854f82017-04-20 10:40:15 -07001199 if self._job_retry and self._retry_handler._should_retry(result):
Prathmesh Prabhu69fdb0d2017-09-07 12:30:03 -07001200 rescheduled = self._retry_result(result, record, waiter)
1201 # TODO (crbug.com/751428): If the suite times out before a retry could
1202 # finish, we would lose the chance to report errors from the original
1203 # job.
1204 if self._has_retry(result) and rescheduled:
1205 return
1206
Prathmesh Prabhu013afa52017-09-07 17:54:23 +00001207 if self._should_report(result):
1208 self._result_reporter.report(result)
Allen Li26b340d2016-12-29 15:23:01 -08001209
Allen Lie406a5e2017-07-10 12:53:06 -07001210 def _record_result(self, result, record):
1211 """
1212 Record a test job result.
1213
1214 @param result: Status instance for job.
1215 @param record: callable that records job status.
1216 prototype:
1217 record(base_job.status_log_entry)
1218 """
1219 result.record_all(record)
1220 self._remember_job_keyval(result)
1221
1222
1223 def _retry_result(self, result, record, waiter):
1224 """
1225 Retry a test job result.
1226
1227 @param result: Status instance for job.
1228 @param record: callable that records job status.
1229 prototype:
1230 record(base_job.status_log_entry)
1231 @param waiter: JobResultsWaiter instance.
Prathmesh Prabhu69fdb0d2017-09-07 12:30:03 -07001232 @returns: True if a job was scheduled for retry, False otherwise.
Allen Lie406a5e2017-07-10 12:53:06 -07001233 """
1234 test = self._jobs_to_tests[result.id]
1235 try:
Xixuan Wu163ba1f2017-12-05 11:03:47 -08001236 # It only takes effect for CQ retriable job:
1237 # 1) in first try, test.fast=True.
1238 # 2) in second try, test will be run in normal mode, so reset
1239 # test.fast=False.
1240 test.fast = False
Allen Lie406a5e2017-07-10 12:53:06 -07001241 new_job = self._schedule_test(
1242 record=record, test=test, retry_for=result.id)
1243 except (error.RPCException, proxy.JSONRPCException) as e:
1244 logging.error('Failed to schedule test: %s, Reason: %s',
1245 test.name, e)
Prathmesh Prabhu69fdb0d2017-09-07 12:30:03 -07001246 return False
Allen Lie406a5e2017-07-10 12:53:06 -07001247 else:
1248 waiter.add_job(new_job)
Prathmesh Prabhu69fdb0d2017-09-07 12:30:03 -07001249 return bool(new_job)
Allen Lie406a5e2017-07-10 12:53:06 -07001250
Jacob Kopczynski2cefa1f2018-01-10 17:25:38 -08001251 @property
1252 def jobs(self):
1253 """Give a copy of the associated jobs
1254
1255 @returns: array of jobs"""
1256 return [job for job in self._jobs]
1257
Allen Lie406a5e2017-07-10 12:53:06 -07001258
Allen Li11308982016-12-29 16:19:55 -08001259 @property
1260 def _should_file_bugs(self):
1261 """Return whether bugs should be filed.
1262
1263 @returns: bool
1264 """
1265 # File bug when failure is one of the _FILE_BUG_SUITES,
1266 # otherwise send an email to the owner anc cc.
1267 return self._tag in _FILE_BUG_SUITES
1268
1269
Alex Miller3a69adc2012-12-19 13:38:31 -08001270 def abort(self):
1271 """
1272 Abort all scheduled test jobs.
1273 """
1274 if self._jobs:
1275 job_ids = [job.id for job in self._jobs]
1276 self._afe.run('abort_host_queue_entries', job__id__in=job_ids)
Chris Masone44e4d6c2012-08-15 14:25:53 -07001277
1278
Allen Li4df053e2016-12-29 16:05:41 -08001279 def _remember_job_keyval(self, job):
Chris Masoned9f13c52012-08-29 10:37:08 -07001280 """
1281 Record provided job as a suite job keyval, for later referencing.
1282
Allen Li4df053e2016-12-29 16:05:41 -08001283 @param job: some representation of a job that has the attributes:
1284 id, test_name, and owner
Chris Masoned9f13c52012-08-29 10:37:08 -07001285 """
Allen Li3cc73cd2016-12-12 16:02:21 -08001286 if self._results_dir and job.id and job.owner and job.test_name:
Chris Masone44e4d6c2012-08-15 14:25:53 -07001287 job_id_owner = '%s-%s' % (job.id, job.owner)
Chris Masoned9f13c52012-08-29 10:37:08 -07001288 logging.debug('Adding job keyval for %s=%s',
Chris Sosaaccb5ce2012-08-30 17:29:15 -07001289 job.test_name, job_id_owner)
Chris Masone44e4d6c2012-08-15 14:25:53 -07001290 utils.write_keyval(
1291 self._results_dir,
1292 {hashlib.md5(job.test_name).hexdigest(): job_id_owner})
1293
Dan Shid1521802013-05-24 13:08:37 -07001294
Allen Li4b5a24f2017-03-09 16:01:35 -08001295class Suite(_BaseSuite):
1296 """
1297 A suite of tests, defined by some predicate over control file variables.
1298
1299 Given a place to search for control files a predicate to match the desired
1300 tests, can gather tests and fire off jobs to run them, and then wait for
1301 results.
1302
1303 @var _predicate: a function that should return True when run over a
1304 ControlData representation of a control file that should be in
1305 this Suite.
1306 @var _tag: a string with which to tag jobs run in this suite.
1307 @var _builds: the builds on which we're running this suite.
1308 @var _afe: an instance of AFE as defined in server/frontend.py.
1309 @var _tko: an instance of TKO as defined in server/frontend.py.
1310 @var _jobs: currently scheduled jobs, if any.
1311 @var _jobs_to_tests: a dictionary that maps job ids to tests represented
1312 ControlData objects.
1313 @var _cf_getter: a control_file_getter.ControlFileGetter
1314 @var _retry: a bool value indicating whether jobs should be retried on
1315 failure.
1316 @var _retry_handler: a RetryHandler object.
1317
1318 """
1319
1320 # TODO(ayatane): These methods are kept on the Suite class for
1321 # backward compatibility.
1322 find_and_parse_tests = _deprecated_suite_method(find_and_parse_tests)
1323 find_possible_tests = _deprecated_suite_method(find_possible_tests)
1324 create_fs_getter = _deprecated_suite_method(create_fs_getter)
Xixuan Wu81b71cb2019-01-10 16:00:30 -08001325 name_in_tag_predicate = _deprecated_suite_method(
1326 suite_common.name_in_tag_predicate)
Allen Li4b5a24f2017-03-09 16:01:35 -08001327 name_in_tag_similarity_predicate = _deprecated_suite_method(
1328 name_in_tag_similarity_predicate)
1329 test_name_equals_predicate = _deprecated_suite_method(
1330 test_name_equals_predicate)
1331 test_name_matches_pattern_predicate = _deprecated_suite_method(
1332 test_name_matches_pattern_predicate)
1333 test_file_matches_pattern_predicate = _deprecated_suite_method(
1334 test_file_matches_pattern_predicate)
1335 matches_attribute_expression_predicate = _deprecated_suite_method(
1336 matches_attribute_expression_predicate)
1337 test_name_similarity_predicate = _deprecated_suite_method(
1338 test_name_similarity_predicate)
1339 test_file_similarity_predicate = _deprecated_suite_method(
1340 test_file_similarity_predicate)
1341 list_all_suites = _deprecated_suite_method(list_all_suites)
Xixuan Wu7cc10e52018-04-25 17:04:51 -07001342 get_test_source_build = _deprecated_suite_method(
1343 suite_common.get_test_source_build)
Allen Li4b5a24f2017-03-09 16:01:35 -08001344
1345
Allen Li25bb1c62017-03-09 16:27:00 -08001346 @classmethod
1347 def create_from_predicates(cls, predicates, builds, board, devserver,
1348 cf_getter=None, name='ad_hoc_suite',
1349 run_prod_code=False, **dargs):
1350 """
1351 Create a Suite using a given predicate test filters.
1352
1353 Uses supplied predicate(s) to instantiate a Suite. Looks for tests in
1354 |autotest_dir| and will schedule them using |afe|. Pulls control files
1355 from the default dev server. Results will be pulled from |tko| upon
1356 completion.
1357
1358 @param predicates: A list of callables that accept ControlData
1359 representations of control files. A test will be
1360 included in suite if all callables in this list
1361 return True on the given control file.
1362 @param builds: the builds on which we're running this suite. It's a
1363 dictionary of version_prefix:build.
1364 @param board: the board on which we're running this suite.
1365 @param devserver: the devserver which contains the build.
1366 @param cf_getter: control_file_getter.ControlFileGetter. Defaults to
1367 using DevServerGetter.
1368 @param name: name of suite. Defaults to 'ad_hoc_suite'
1369 @param run_prod_code: If true, the suite will run the tests that
1370 lives in prod aka the test code currently on the
1371 lab servers.
1372 @param **dargs: Any other Suite constructor parameters, as described
1373 in Suite.__init__ docstring.
1374 @return a Suite instance.
1375 """
1376 if cf_getter is None:
1377 if run_prod_code:
1378 cf_getter = create_fs_getter(_AUTOTEST_DIR)
1379 else:
Xixuan Wu7cc10e52018-04-25 17:04:51 -07001380 build = suite_common.get_test_source_build(builds, **dargs)
Allen Li25bb1c62017-03-09 16:27:00 -08001381 cf_getter = _create_ds_getter(build, devserver)
1382
1383 return cls(predicates,
1384 name, builds, board, cf_getter, run_prod_code, **dargs)
1385
1386
1387 @classmethod
1388 def create_from_name(cls, name, builds, board, devserver, cf_getter=None,
1389 **dargs):
1390 """
1391 Create a Suite using a predicate based on the SUITE control file var.
1392
1393 Makes a predicate based on |name| and uses it to instantiate a Suite
1394 that looks for tests in |autotest_dir| and will schedule them using
1395 |afe|. Pulls control files from the default dev server.
1396 Results will be pulled from |tko| upon completion.
1397
1398 @param name: a value of the SUITE control file variable to search for.
1399 @param builds: the builds on which we're running this suite. It's a
1400 dictionary of version_prefix:build.
1401 @param board: the board on which we're running this suite.
1402 @param devserver: the devserver which contains the build.
1403 @param cf_getter: control_file_getter.ControlFileGetter. Defaults to
1404 using DevServerGetter.
1405 @param **dargs: Any other Suite constructor parameters, as described
1406 in Suite.__init__ docstring.
1407 @return a Suite instance.
1408 """
1409 if cf_getter is None:
Xixuan Wu7cc10e52018-04-25 17:04:51 -07001410 build = suite_common.get_test_source_build(builds, **dargs)
Allen Li25bb1c62017-03-09 16:27:00 -08001411 cf_getter = _create_ds_getter(build, devserver)
1412
Xixuan Wu81b71cb2019-01-10 16:00:30 -08001413 return cls([suite_common.name_in_tag_predicate(name)],
Allen Li25bb1c62017-03-09 16:27:00 -08001414 name, builds, board, cf_getter, **dargs)
1415
1416
Allen Li3b1d4e52017-03-09 16:23:06 -08001417 def __init__(
1418 self,
1419 predicates,
1420 tag,
1421 builds,
1422 board,
1423 cf_getter,
1424 run_prod_code=False,
1425 afe=None,
1426 tko=None,
1427 pool=None,
1428 results_dir=None,
1429 max_runtime_mins=24*60,
1430 timeout_mins=24*60,
1431 file_bugs=False,
Allen Li3b1d4e52017-03-09 16:23:06 -08001432 suite_job_id=None,
1433 ignore_deps=False,
1434 extra_deps=None,
1435 priority=priorities.Priority.DEFAULT,
1436 forgiving_parser=True,
1437 wait_for_results=True,
1438 job_retry=False,
1439 max_retries=sys.maxint,
1440 offload_failures_only=False,
1441 test_source_build=None,
Allen Li7f43ef92017-03-09 16:29:48 -08001442 job_keyvals=None,
Prathmesh Prabhu013afa52017-09-07 17:54:23 +00001443 test_args=None,
Aviv Keshetd5a83f72017-10-30 12:53:01 -07001444 child_dependencies=(),
Prathmesh Prabhu013afa52017-09-07 17:54:23 +00001445 result_reporter=None,
Allen Li3b1d4e52017-03-09 16:23:06 -08001446 ):
1447 """
1448 Constructor
1449
1450 @param predicates: A list of callables that accept ControlData
1451 representations of control files. A test will be
Allen Li2887e332017-03-09 16:30:36 -08001452 included in suite if all callables in this list
Allen Li3b1d4e52017-03-09 16:23:06 -08001453 return True on the given control file.
1454 @param tag: a string with which to tag jobs run in this suite.
1455 @param builds: the builds on which we're running this suite.
1456 @param board: the board on which we're running this suite.
1457 @param cf_getter: a control_file_getter.ControlFileGetter
1458 @param afe: an instance of AFE as defined in server/frontend.py.
1459 @param tko: an instance of TKO as defined in server/frontend.py.
1460 @param pool: Specify the pool of machines to use for scheduling
1461 purposes.
1462 @param run_prod_code: If true, the suite will run the test code that
1463 lives in prod aka the test code currently on the
1464 lab servers.
1465 @param results_dir: The directory where the job can write results to.
1466 This must be set if you want job_id of sub-jobs
1467 list in the job keyvals.
1468 @param max_runtime_mins: Maximum suite runtime, in minutes.
1469 @param timeout: Maximum job lifetime, in hours.
1470 @param suite_job_id: Job id that will act as parent id to all sub jobs.
1471 Default: None
1472 @param ignore_deps: True if jobs should ignore the DEPENDENCIES
1473 attribute and skip applying of dependency labels.
1474 (Default:False)
1475 @param extra_deps: A list of strings which are the extra DEPENDENCIES
1476 to add to each test being scheduled.
1477 @param priority: Integer priority level. Higher is more important.
1478 @param wait_for_results: Set to False to run the suite job without
1479 waiting for test jobs to finish. Default is
1480 True.
Jacob Kopczynski2cefa1f2018-01-10 17:25:38 -08001481 @param job_retry: A bool value indicating whether jobs should be retried
Allen Li3b1d4e52017-03-09 16:23:06 -08001482 on failure. If True, the field 'JOB_RETRIES' in
1483 control files will be respected. If False, do not
1484 retry.
1485 @param max_retries: Maximum retry limit at suite level.
1486 Regardless how many times each individual test
1487 has been retried, the total number of retries
1488 happening in the suite can't exceed _max_retries.
1489 Default to sys.maxint.
1490 @param offload_failures_only: Only enable gs_offloading for failed
1491 jobs.
1492 @param test_source_build: Build that contains the server-side test code.
1493 @param job_keyvals: General job keyvals to be inserted into keyval file,
1494 which will be used by tko/parse later.
Allen Li7f43ef92017-03-09 16:29:48 -08001495 @param test_args: A dict of args passed all the way to each individual
1496 test that will be actually ran.
Aviv Keshetd5a83f72017-10-30 12:53:01 -07001497 @param child_dependencies: (optional) list of dependency strings
1498 to be added as dependencies to child jobs.
Prathmesh Prabhu013afa52017-09-07 17:54:23 +00001499 @param result_reporter: A _ResultReporter instance to report results. If
1500 None, an _EmailReporter will be created.
Allen Li3b1d4e52017-03-09 16:23:06 -08001501 """
Allen Li00bbe5b2017-03-09 16:44:30 -08001502 tests = find_and_parse_tests(
1503 cf_getter,
1504 _ComposedPredicate(predicates),
1505 tag,
Allen Li00bbe5b2017-03-09 16:44:30 -08001506 forgiving_parser=forgiving_parser,
1507 run_prod_code=run_prod_code,
1508 test_args=test_args,
1509 )
Allen Li3b1d4e52017-03-09 16:23:06 -08001510 super(Suite, self).__init__(
Allen Li00bbe5b2017-03-09 16:44:30 -08001511 tests=tests,
Allen Li3b1d4e52017-03-09 16:23:06 -08001512 tag=tag,
1513 builds=builds,
1514 board=board,
Allen Li3b1d4e52017-03-09 16:23:06 -08001515 afe=afe,
1516 tko=tko,
1517 pool=pool,
1518 results_dir=results_dir,
1519 max_runtime_mins=max_runtime_mins,
1520 timeout_mins=timeout_mins,
1521 file_bugs=file_bugs,
Allen Li3b1d4e52017-03-09 16:23:06 -08001522 suite_job_id=suite_job_id,
1523 ignore_deps=ignore_deps,
1524 extra_deps=extra_deps,
1525 priority=priority,
Allen Li3b1d4e52017-03-09 16:23:06 -08001526 wait_for_results=wait_for_results,
1527 job_retry=job_retry,
1528 max_retries=max_retries,
1529 offload_failures_only=offload_failures_only,
1530 test_source_build=test_source_build,
Prathmesh Prabhu013afa52017-09-07 17:54:23 +00001531 job_keyvals=job_keyvals,
Aviv Keshetd5a83f72017-10-30 12:53:01 -07001532 child_dependencies=child_dependencies,
Prathmesh Prabhu013afa52017-09-07 17:54:23 +00001533 result_reporter=result_reporter,
1534 )
Allen Li3b1d4e52017-03-09 16:23:06 -08001535
Allen Li4b5a24f2017-03-09 16:01:35 -08001536
Allen Li44969e32017-05-24 16:47:37 -07001537class ProvisionSuite(_BaseSuite):
1538 """
1539 A suite for provisioning DUTs.
1540
1541 This is done by creating dummy_Pass tests.
1542 """
1543
1544
1545 def __init__(
1546 self,
1547 tag,
1548 builds,
1549 board,
Allen Li44969e32017-05-24 16:47:37 -07001550 devserver,
Allen Li337e1242017-07-10 13:00:31 -07001551 num_required,
Allen Lide5ecce2017-11-06 17:48:09 -08001552 num_max=float('inf'),
Allen Li44969e32017-05-24 16:47:37 -07001553 cf_getter=None,
1554 run_prod_code=False,
1555 test_args=None,
1556 test_source_build=None,
Allen Liaa7f2842017-07-06 16:06:32 -07001557 **kwargs):
Allen Li44969e32017-05-24 16:47:37 -07001558 """
1559 Constructor
1560
1561 @param tag: a string with which to tag jobs run in this suite.
1562 @param builds: the builds on which we're running this suite.
1563 @param board: the board on which we're running this suite.
Allen Li44969e32017-05-24 16:47:37 -07001564 @param devserver: the devserver which contains the build.
Allen Lide5ecce2017-11-06 17:48:09 -08001565 @param num_required: number of tests that must pass. This is
1566 capped by the number of tests that are run.
1567 @param num_max: max number of tests to make. By default there
1568 is no cap, a test is created for each eligible host.
Allen Li44969e32017-05-24 16:47:37 -07001569 @param cf_getter: a control_file_getter.ControlFileGetter.
1570 @param test_args: A dict of args passed all the way to each individual
1571 test that will be actually ran.
1572 @param test_source_build: Build that contains the server-side test code.
Allen Liaa7f2842017-07-06 16:06:32 -07001573 @param kwargs: Various keyword arguments passed to
1574 _BaseSuite constructor.
Allen Li44969e32017-05-24 16:47:37 -07001575 """
Allen Li44969e32017-05-24 16:47:37 -07001576 super(ProvisionSuite, self).__init__(
Allen Lide5ecce2017-11-06 17:48:09 -08001577 tests=[],
Allen Li44969e32017-05-24 16:47:37 -07001578 tag=tag,
1579 builds=builds,
1580 board=board,
Allen Liaa7f2842017-07-06 16:06:32 -07001581 **kwargs)
Allen Li4f09a6d2018-01-23 15:00:17 -08001582 self._num_successful = 0
1583 self._num_required = 0
1584 self.tests = []
1585
Allen Li0c0e7762017-11-17 14:07:43 -08001586 static_deps = [dep for dep in self._dependencies
1587 if not provision.Provision.acts_on(dep)]
Allen Li4f09a6d2018-01-23 15:00:17 -08001588 if 'pool:suites' in static_deps:
1589 logging.info('Provision suite is disabled on suites pool')
1590 return
1591 logging.debug('Looking for hosts matching %r', static_deps)
Allen Lide5ecce2017-11-06 17:48:09 -08001592 hosts = self._afe.get_hosts(
Allen Li0c0e7762017-11-17 14:07:43 -08001593 invalid=False, multiple_labels=static_deps)
Allen Li42e511e2017-11-13 18:36:34 -08001594 logging.debug('Found %d matching hosts for ProvisionSuite', len(hosts))
Allen Lide5ecce2017-11-06 17:48:09 -08001595 available_hosts = [h for h in hosts if h.is_available()]
Allen Li42e511e2017-11-13 18:36:34 -08001596 logging.debug('Found %d available hosts for ProvisionSuite',
1597 len(available_hosts))
Allen Li4f09a6d2018-01-23 15:00:17 -08001598 dummy_test = _load_dummy_test(
1599 builds, devserver, cf_getter,
1600 run_prod_code, test_args, test_source_build)
Allen Lide5ecce2017-11-06 17:48:09 -08001601 self.tests = [dummy_test] * min(len(available_hosts), num_max)
1602 logging.debug('Made %d tests for ProvisionSuite', len(self.tests))
1603 self._num_required = min(num_required, len(self.tests))
1604 logging.debug('Expecting %d tests to pass for ProvisionSuite',
1605 self._num_required)
Allen Li337e1242017-07-10 13:00:31 -07001606
Prathmesh Prabhu013afa52017-09-07 17:54:23 +00001607 def _handle_result(self, result, record, waiter):
1608 super(ProvisionSuite, self)._handle_result(result, record, waiter)
Allen Li337e1242017-07-10 13:00:31 -07001609 if result.is_good():
1610 self._num_successful += 1
1611
1612 def _finished_waiting(self):
1613 return self._num_successful >= self._num_required
Allen Li44969e32017-05-24 16:47:37 -07001614
1615
1616def _load_dummy_test(
1617 builds,
1618 devserver,
1619 cf_getter=None,
1620 run_prod_code=False,
1621 test_args=None,
1622 test_source_build=None):
1623 """
1624 Load and return the dummy pass test.
1625
1626 @param builds: the builds on which we're running this suite.
1627 @param devserver: the devserver which contains the build.
1628 @param cf_getter: a control_file_getter.ControlFileGetter.
1629 @param test_args: A dict of args passed all the way to each individual
1630 test that will be actually ran.
1631 @param test_source_build: Build that contains the server-side test code.
Allen Li44969e32017-05-24 16:47:37 -07001632 """
1633 if cf_getter is None:
1634 if run_prod_code:
1635 cf_getter = create_fs_getter(_AUTOTEST_DIR)
1636 else:
Xixuan Wu7cc10e52018-04-25 17:04:51 -07001637 build = suite_common.get_test_source_build(
Allen Li44969e32017-05-24 16:47:37 -07001638 builds, test_source_build=test_source_build)
Richard Barnetteadf05862018-06-04 17:37:02 -07001639 devserver.stage_artifacts(image=build,
1640 artifacts=['control_files'])
Allen Li44969e32017-05-24 16:47:37 -07001641 cf_getter = _create_ds_getter(build, devserver)
Xixuan Wu92249042018-04-30 17:17:10 -07001642 retriever = _ControlFileRetriever(cf_getter,
1643 run_prod_code=run_prod_code,
1644 test_args=test_args)
Xixuan Wub2cf7fc2018-05-04 17:37:24 -07001645 return retriever.retrieve_for_test('dummy_Pass')
Allen Li44969e32017-05-24 16:47:37 -07001646
1647
Allen Licec26f72017-03-09 16:39:09 -08001648class _ComposedPredicate(object):
1649 """Return the composition of the predicates.
1650
1651 Predicates are functions that take a test control data object and
1652 return True of that test is to be included. The returned
1653 predicate's set is the intersection of all of the input predicates'
1654 sets (it returns True if all predicates return True).
1655 """
1656
1657 def __init__(self, predicates):
1658 """Initialize instance.
1659
1660 @param predicates: Iterable of predicates.
1661 """
1662 self._predicates = list(predicates)
1663
1664 def __repr__(self):
1665 return '{cls}({this._predicates!r})'.format(
Allen Li5511bd32017-05-17 16:57:26 -07001666 cls=type(self).__name__,
Allen Licec26f72017-03-09 16:39:09 -08001667 this=self,
1668 )
1669
1670 def __call__(self, control_data_):
1671 return all(f(control_data_) for f in self._predicates)
1672
1673
Allen Li9fcd4b42016-12-12 16:15:14 -08001674def _is_nonexistent_board_error(e):
1675 """Return True if error is caused by nonexistent board label.
1676
1677 As of this writing, the particular case we want looks like this:
1678
1679 1) e.problem_keys is a dictionary
1680 2) e.problem_keys['meta_hosts'] exists as the only key
1681 in the dictionary.
1682 3) e.problem_keys['meta_hosts'] matches this pattern:
1683 "Label "board:.*" not found"
1684
1685 We check for conditions 1) and 2) on the
1686 theory that they're relatively immutable.
1687 We don't check condition 3) because it seems
1688 likely to be a maintenance burden, and for the
1689 times when we're wrong, being right shouldn't
1690 matter enough (we _hope_).
1691
1692 @param e: proxy.ValidationError instance
1693 @returns: boolean
1694 """
1695 return (isinstance(e.problem_keys, dict)
1696 and len(e.problem_keys) == 1
1697 and 'meta_hosts' in e.problem_keys)
Allen Liee36ab82017-07-07 15:46:40 -07001698
1699
1700class _ResultReporter(object):
1701 """Abstract base class for reporting test results.
1702
1703 Usually, this is used to report test failures.
1704 """
1705
1706 __metaclass__ = abc.ABCMeta
1707
1708 @abc.abstractmethod
1709 def report(self, result):
1710 """Report test result.
1711
1712 @param result: Status instance for job.
1713 """
1714
1715
Aviv Keshetf93775c2017-08-14 11:07:51 -07001716class _EmailReporter(_ResultReporter):
1717 """Class that emails based on test failures."""
Allen Lic1ce0342017-07-10 12:45:29 -07001718
Aviv Keshetf93775c2017-08-14 11:07:51 -07001719 # TODO(akeshet): Document what |bug_template| is actually supposed to come
1720 # from, and rename it to something unrelated to "bugs" which are no longer
1721 # relevant now that this is purely an email sender.
1722 def __init__(self, suite, bug_template=None):
Allen Lic1ce0342017-07-10 12:45:29 -07001723 self._suite = suite
Aviv Keshetf93775c2017-08-14 11:07:51 -07001724 self._bug_template = bug_template or {}
Allen Lic1ce0342017-07-10 12:45:29 -07001725
1726 def _get_test_bug(self, result):
1727 """Get TestBug for the given result.
1728
1729 @param result: Status instance for a test job.
1730 @returns: TestBug instance.
1731 """
1732 # reporting modules have dependency on external packages, e.g., httplib2
1733 # Such dependency can cause issue to any module tries to import suite.py
1734 # without building site-packages first. Since the reporting modules are
1735 # only used in this function, move the imports here avoid the
1736 # requirement of building site packages to use other functions in this
1737 # module.
1738 from autotest_lib.server.cros.dynamic_suite import reporting
1739
1740 job_views = self._suite._tko.run('get_detailed_test_views',
1741 afe_job_id=result.id)
1742 return reporting.TestBug(self._suite._job_creator.cros_build,
1743 utils.get_chrome_version(job_views),
1744 self._suite._tag,
1745 result)
1746
1747 def _get_bug_template(self, result):
1748 """Get BugTemplate for test job.
1749
1750 @param result: Status instance for job.
1751 @param bug_template: A template dictionary specifying the default bug
1752 filing options for failures in this suite.
1753 @returns: BugTemplate instance
1754 """
1755 # reporting modules have dependency on external packages, e.g., httplib2
1756 # Such dependency can cause issue to any module tries to import suite.py
1757 # without building site-packages first. Since the reporting modules are
1758 # only used in this function, move the imports here avoid the
1759 # requirement of building site packages to use other functions in this
1760 # module.
1761 from autotest_lib.server.cros.dynamic_suite import reporting_utils
1762
1763 # Try to merge with bug template in test control file.
1764 template = reporting_utils.BugTemplate(self._bug_template)
1765 try:
1766 test_data = self._suite._jobs_to_tests[result.id]
1767 return template.finalize_bug_template(
1768 test_data.bug_template)
1769 except AttributeError:
1770 # Test control file does not have bug template defined.
1771 return template.bug_template
1772 except reporting_utils.InvalidBugTemplateException as e:
1773 logging.error('Merging bug templates failed with '
1774 'error: %s An empty bug template will '
1775 'be used.', e)
1776 return {}
1777
Allen Liee36ab82017-07-07 15:46:40 -07001778 def report(self, result):
1779 # reporting modules have dependency on external
1780 # packages, e.g., httplib2 Such dependency can cause
1781 # issue to any module tries to import suite.py without
1782 # building site-packages first. Since the reporting
1783 # modules are only used in this function, move the
1784 # imports here avoid the requirement of building site
1785 # packages to use other functions in this module.
1786 from autotest_lib.server.cros.dynamic_suite import reporting
1787
1788 reporting.send_email(
Aviv Keshetf93775c2017-08-14 11:07:51 -07001789 self._get_test_bug(result),
1790 self._get_bug_template(result))