blob: fc90bb7f7d6c39e974d268f4882eab26e98f86d2 [file] [log] [blame]
Chris Masone44e4d6c2012-08-15 14:25:53 -07001# Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
Fang Deng443f1952015-01-02 14:51:49 -08005import datetime
6import difflib
7import hashlib
8import logging
9import operator
10import os
11import re
Fang Deng443f1952015-01-02 14:51:49 -080012import sys
Chris Masone44e4d6c2012-08-15 14:25:53 -070013
14import common
15
J. Richard Barnetteb592fbc2014-04-02 10:27:33 -070016from autotest_lib.frontend.afe.json_rpc import proxy
Alex Miller3a69adc2012-12-19 13:38:31 -080017from autotest_lib.client.common_lib import control_data
Fang Denge3bc24b2014-03-17 15:19:46 -070018from autotest_lib.client.common_lib import enum
Dan Shidfea3682014-08-10 23:38:40 -070019from autotest_lib.client.common_lib import error
Simran Basi5ace6f22016-01-06 17:30:44 -080020from autotest_lib.client.common_lib import global_config
Alex Miller7d658cf2013-09-04 16:00:35 -070021from autotest_lib.client.common_lib import priorities
Dan Shidfea3682014-08-10 23:38:40 -070022from autotest_lib.client.common_lib import site_utils
23from autotest_lib.client.common_lib import time_utils
24from autotest_lib.client.common_lib import utils
Fang Denge3bc24b2014-03-17 15:19:46 -070025from autotest_lib.frontend.afe.json_rpc import proxy
Dan Shi36cfd832014-10-10 13:38:51 -070026from autotest_lib.server.cros import provision
Chris Masone44e4d6c2012-08-15 14:25:53 -070027from autotest_lib.server.cros.dynamic_suite import constants
28from autotest_lib.server.cros.dynamic_suite import control_file_getter
29from autotest_lib.server.cros.dynamic_suite import frontend_wrappers
Alex Miller3a69adc2012-12-19 13:38:31 -080030from autotest_lib.server.cros.dynamic_suite import job_status
J. Richard Barnettee7b98bb2013-08-21 16:34:16 -070031from autotest_lib.server.cros.dynamic_suite import tools
32from autotest_lib.server.cros.dynamic_suite.job_status import Status
Chris Masone44e4d6c2012-08-15 14:25:53 -070033
Shuqian Zhaoab468812015-04-08 14:40:38 -070034try:
35 from chromite.lib import boolparse_lib
36 from chromite.lib import cros_logging as logging
37except ImportError:
38 print 'Unable to import chromite.'
39 print 'This script must be either:'
40 print ' - Be run in the chroot.'
41 print ' - (not yet supported) be run after running '
42 print ' ../utils/build_externals.py'
Fang Denge3bc24b2014-03-17 15:19:46 -070043
Shuqian Zhao490f78f2016-01-20 13:18:40 -080044_FILE_BUG_SUITES = ['au', 'bvt', 'bvt-cq', 'bvt-inline', 'paygen_au_beta',
45 'paygen_au_canary', 'paygen_au_dev', 'paygen_au_stable',
46 'sanity', 'push_to_prod']
Simran Basi5ace6f22016-01-06 17:30:44 -080047_AUTOTEST_DIR = global_config.global_config.get_config_value(
48 'SCHEDULER', 'drone_installation_directory')
xixuan0f7755d2016-04-18 14:49:12 -070049ENABLE_CONTROLS_IN_BATCH = global_config.global_config.get_config_value(
50 'CROS', 'enable_getting_controls_in_batch', type=bool, default=False)
Shuqian Zhaoe33ba4a2015-09-11 18:51:43 -070051
Fang Denge3bc24b2014-03-17 15:19:46 -070052class RetryHandler(object):
53 """Maintain retry information.
54
55 @var _retry_map: A dictionary that stores retry history.
56 The key is afe job id. The value is a dictionary.
57 {job_id: {'state':RetryHandler.States, 'retry_max':int}}
58 - state:
59 The retry state of a job.
60 NOT_ATTEMPTED:
61 We haven't done anything about the job.
62 ATTEMPTED:
63 We've made an attempt to schedule a retry job. The
64 scheduling may or may not be successful, e.g.
65 it might encounter an rpc error. Note failure
66 in scheduling a retry is different from a retry job failure.
67 For each job, we only attempt to schedule a retry once.
68 For example, assume we have a test with JOB_RETRIES=5 and
69 its second retry job failed. When we attempt to create
70 a third retry job to retry the second, we hit an rpc
71 error. In such case, we will give up on all following
72 retries.
73 RETRIED:
74 A retry job has already been successfully
75 scheduled.
76 - retry_max:
77 The maximum of times the job can still
78 be retried, taking into account retries
79 that have occurred.
80 @var _retry_level: A retry might be triggered only if the result
81 is worse than the level.
Fang Deng443f1952015-01-02 14:51:49 -080082 @var _max_retries: Maximum retry limit at suite level.
83 Regardless how many times each individual test
84 has been retried, the total number of retries happening in
85 the suite can't exceed _max_retries.
Fang Denge3bc24b2014-03-17 15:19:46 -070086 """
87
88 States = enum.Enum('NOT_ATTEMPTED', 'ATTEMPTED', 'RETRIED',
89 start_value=1, step=1)
90
Fang Deng443f1952015-01-02 14:51:49 -080091 def __init__(self, initial_jobs_to_tests, retry_level='WARN',
92 max_retries=None):
Fang Denge3bc24b2014-03-17 15:19:46 -070093 """Initialize RetryHandler.
94
95 @param initial_jobs_to_tests: A dictionary that maps a job id to
96 a ControlData object. This dictionary should contain
97 jobs that are originally scheduled by the suite.
98 @param retry_level: A retry might be triggered only if the result is
99 worse than the level.
Fang Deng443f1952015-01-02 14:51:49 -0800100 @param max_retries: Integer, maxmium total retries allowed
101 for the suite. Default to None, no max.
Fang Denge3bc24b2014-03-17 15:19:46 -0700102 """
103 self._retry_map = {}
104 self._retry_level = retry_level
Fang Deng443f1952015-01-02 14:51:49 -0800105 self._max_retries = (max_retries
106 if max_retries is not None else sys.maxint)
Fang Denge3bc24b2014-03-17 15:19:46 -0700107 for job_id, test in initial_jobs_to_tests.items():
108 if test.job_retries > 0:
Allen Lifb89e2b2017-01-03 12:47:58 -0800109 self._add_job(new_job_id=job_id,
110 retry_max=test.job_retries)
Fang Denge3bc24b2014-03-17 15:19:46 -0700111
112
Allen Lifb89e2b2017-01-03 12:47:58 -0800113 def _add_job(self, new_job_id, retry_max):
Fang Denge3bc24b2014-03-17 15:19:46 -0700114 """Add a newly-created job to the retry map.
115
116 @param new_job_id: The afe_job_id of a newly created job.
117 @param retry_max: The maximum of times that we could retry
118 the test if the job fails.
119
120 @raises ValueError if new_job_id is already in retry map.
121
122 """
123 if new_job_id in self._retry_map:
124 raise ValueError('add_job called when job is already in retry map.')
125
126 self._retry_map[new_job_id] = {
127 'state': self.States.NOT_ATTEMPTED,
128 'retry_max': retry_max}
129
130
Allen Li0cd19262017-01-03 12:56:08 -0800131 def _suite_max_reached(self):
Fang Deng443f1952015-01-02 14:51:49 -0800132 """Return whether maximum retry limit for a suite has been reached."""
Fang Denge4326d62015-01-06 13:15:15 -0800133 return self._max_retries <= 0
Fang Deng443f1952015-01-02 14:51:49 -0800134
135
Fang Denge3bc24b2014-03-17 15:19:46 -0700136 def add_retry(self, old_job_id, new_job_id):
137 """Record a retry.
138
139 Update retry map with the retry information.
140
141 @param old_job_id: The afe_job_id of the job that is retried.
142 @param new_job_id: The afe_job_id of the retry job.
143
144 @raises KeyError if old_job_id isn't in the retry map.
145 @raises ValueError if we have already retried or made an attempt
146 to retry the old job.
147
148 """
149 old_record = self._retry_map[old_job_id]
150 if old_record['state'] != self.States.NOT_ATTEMPTED:
151 raise ValueError(
152 'We have already retried or attempted to retry job %d' %
153 old_job_id)
154 old_record['state'] = self.States.RETRIED
Allen Lifb89e2b2017-01-03 12:47:58 -0800155 self._add_job(new_job_id=new_job_id,
156 retry_max=old_record['retry_max'] - 1)
Fang Deng443f1952015-01-02 14:51:49 -0800157 self._max_retries -= 1
Fang Denge3bc24b2014-03-17 15:19:46 -0700158
159
160 def set_attempted(self, job_id):
161 """Set the state of the job to ATTEMPTED.
162
163 @param job_id: afe_job_id of a job.
164
165 @raises KeyError if job_id isn't in the retry map.
166 @raises ValueError if the current state is not NOT_ATTEMPTED.
167
168 """
169 current_state = self._retry_map[job_id]['state']
170 if current_state != self.States.NOT_ATTEMPTED:
171 # We are supposed to retry or attempt to retry each job
172 # only once. Raise an error if this is not the case.
173 raise ValueError('Unexpected state transition: %s -> %s' %
174 (self.States.get_string(current_state),
175 self.States.get_string(self.States.ATTEMPTED)))
176 else:
177 self._retry_map[job_id]['state'] = self.States.ATTEMPTED
178
179
180 def has_following_retry(self, result):
181 """Check whether there will be a following retry.
182
183 We have the following cases for a given job id (result.id),
184 - no retry map entry -> retry not required, no following retry
185 - has retry map entry:
186 - already retried -> has following retry
187 - has not retried
188 (this branch can be handled by checking should_retry(result))
189 - retry_max == 0 --> the last retry job, no more retry
190 - retry_max > 0
191 - attempted, but has failed in scheduling a
192 following retry due to rpc error --> no more retry
193 - has not attempped --> has following retry if test failed.
194
195 @param result: A result, encapsulating the status of the job.
196
197 @returns: True, if there will be a following retry.
198 False otherwise.
199
200 """
Allen Li2ee2a262017-01-03 13:21:10 -0800201 return (result.test_executed
202 and result.id in self._retry_map
203 and (self._retry_map[result.id]['state'] == self.States.RETRIED
204 or self._should_retry(result)))
Allen Li5cb00652017-01-03 13:06:30 -0800205
206
207 def _should_retry(self, result):
208 """Check whether we should retry a job based on its result.
209
Allen Li2ee2a262017-01-03 13:21:10 -0800210 This method only makes sense when called by has_following_retry().
211
Allen Li5cb00652017-01-03 13:06:30 -0800212 We will retry the job that corresponds to the result
213 when all of the following are true.
214 a) The test was actually executed, meaning that if
215 a job was aborted before it could ever reach the state
216 of 'Running', the job will not be retried.
217 b) The result is worse than |self._retry_level| which
218 defaults to 'WARN'.
219 c) The test requires retry, i.e. the job has an entry in the retry map.
220 d) We haven't made any retry attempt yet, i.e. state == NOT_ATTEMPTED
221 Note that if a test has JOB_RETRIES=5, and the second time
222 it was retried it hit an rpc error, we will give up on
223 all following retries.
224 e) The job has not reached its retry max, i.e. retry_max > 0
225
226 @param result: A result, encapsulating the status of the job.
227
228 @returns: True if we should retry the job.
229
230 """
Allen Li2ee2a262017-01-03 13:21:10 -0800231 assert result.test_executed
232 assert result.id in self._retry_map
Allen Li5cb00652017-01-03 13:06:30 -0800233 return (
234 not self._suite_max_reached()
Allen Li5cb00652017-01-03 13:06:30 -0800235 and result.is_worse_than(
236 job_status.Status(self._retry_level, '', 'reason'))
Allen Li5cb00652017-01-03 13:06:30 -0800237 and self._retry_map[result.id]['state'] == self.States.NOT_ATTEMPTED
238 and self._retry_map[result.id]['retry_max'] > 0
239 )
Fang Denge3bc24b2014-03-17 15:19:46 -0700240
241
242 def get_retry_max(self, job_id):
243 """Get the maximum times the job can still be retried.
244
245 @param job_id: afe_job_id of a job.
246
247 @returns: An int, representing the maximum times the job can still be
248 retried.
249 @raises KeyError if job_id isn't in the retry map.
250
251 """
252 return self._retry_map[job_id]['retry_max']
253
254
Chris Masone44e4d6c2012-08-15 14:25:53 -0700255class Suite(object):
256 """
257 A suite of tests, defined by some predicate over control file variables.
258
259 Given a place to search for control files a predicate to match the desired
260 tests, can gather tests and fire off jobs to run them, and then wait for
261 results.
262
263 @var _predicate: a function that should return True when run over a
264 ControlData representation of a control file that should be in
265 this Suite.
266 @var _tag: a string with which to tag jobs run in this suite.
Dan Shi36cfd832014-10-10 13:38:51 -0700267 @var _builds: the builds on which we're running this suite.
Chris Masone44e4d6c2012-08-15 14:25:53 -0700268 @var _afe: an instance of AFE as defined in server/frontend.py.
269 @var _tko: an instance of TKO as defined in server/frontend.py.
270 @var _jobs: currently scheduled jobs, if any.
Fang Denge3bc24b2014-03-17 15:19:46 -0700271 @var _jobs_to_tests: a dictionary that maps job ids to tests represented
272 ControlData objects.
Chris Masone44e4d6c2012-08-15 14:25:53 -0700273 @var _cf_getter: a control_file_getter.ControlFileGetter
Fang Denge3bc24b2014-03-17 15:19:46 -0700274 @var _retry: a bool value indicating whether jobs should be retried on
275 failure.
276 @var _retry_handler: a RetryHandler object.
277
Chris Masone44e4d6c2012-08-15 14:25:53 -0700278 """
279
280
281 @staticmethod
Allen Li9864ed62016-12-29 16:30:53 -0800282 def _create_ds_getter(build, devserver):
Chris Masone44e4d6c2012-08-15 14:25:53 -0700283 """
284 @param build: the build on which we're running this suite.
Chris Sosaaccb5ce2012-08-30 17:29:15 -0700285 @param devserver: the devserver which contains the build.
Chris Masone44e4d6c2012-08-15 14:25:53 -0700286 @return a FileSystemGetter instance that looks under |autotest_dir|.
287 """
Chris Sosaaccb5ce2012-08-30 17:29:15 -0700288 return control_file_getter.DevServerGetter(build, devserver)
Chris Masone44e4d6c2012-08-15 14:25:53 -0700289
290
291 @staticmethod
292 def create_fs_getter(autotest_dir):
293 """
294 @param autotest_dir: the place to find autotests.
295 @return a FileSystemGetter instance that looks under |autotest_dir|.
296 """
297 # currently hard-coded places to look for tests.
298 subpaths = ['server/site_tests', 'client/site_tests',
299 'server/tests', 'client/tests']
300 directories = [os.path.join(autotest_dir, p) for p in subpaths]
301 return control_file_getter.FileSystemGetter(directories)
302
303
304 @staticmethod
Allen Lif20e17d2017-01-03 18:24:19 -0800305 def name_in_tag_predicate(name):
Chris Masone44e4d6c2012-08-15 14:25:53 -0700306 """Returns predicate that takes a control file and looks for |name|.
307
308 Builds a predicate that takes in a parsed control file (a ControlData)
309 and returns True if the SUITE tag is present and contains |name|.
310
311 @param name: the suite name to base the predicate on.
312 @return a callable that takes a ControlData and looks for |name| in that
313 ControlData object's suite member.
314 """
Allen Li30833702017-01-03 18:34:15 -0800315 return lambda t: name in t.suite_tag_parts
Dan Shi5783f8a2014-12-22 14:34:45 -0800316
317
Allen Lif20e17d2017-01-03 18:24:19 -0800318 @staticmethod
319 def name_in_tag_similarity_predicate(name):
Dan Shi5783f8a2014-12-22 14:34:45 -0800320 """Returns predicate that takes a control file and gets the similarity
321 of the suites in the control file and the given name.
322
323 Builds a predicate that takes in a parsed control file (a ControlData)
324 and returns a list of tuples of (suite name, ratio), where suite name
325 is each suite listed in the control file, and ratio is the similarity
326 between each suite and the given name.
327
328 @param name: the suite name to base the predicate on.
329 @return a callable that takes a ControlData and returns a list of tuples
330 of (suite name, ratio), where suite name is each suite listed in
331 the control file, and ratio is the similarity between each suite
332 and the given name.
333 """
Allen Li30833702017-01-03 18:34:15 -0800334 return lambda t: [(suite,
335 difflib.SequenceMatcher(a=suite, b=name).ratio())
336 for suite in t.suite_tag_parts] or [(None, 0)]
Chris Masone44e4d6c2012-08-15 14:25:53 -0700337
338
339 @staticmethod
Aviv Keshet40222a42013-06-04 16:25:49 -0700340 def test_name_equals_predicate(test_name):
341 """Returns predicate that matched based on a test's name.
342
343 Builds a predicate that takes in a parsed control file (a ControlData)
344 and returns True if the test name is equal to |test_name|.
345
346 @param test_name: the test name to base the predicate on.
347 @return a callable that takes a ControlData and looks for |test_name|
348 in that ControlData's name.
349 """
350 return lambda t: hasattr(t, 'name') and test_name == t.name
351
352
353 @staticmethod
Aviv Kesheta6adc7a2013-08-30 11:13:38 -0700354 def test_name_matches_pattern_predicate(test_name_pattern):
355 """Returns predicate that matches based on a test's name pattern.
356
357 Builds a predicate that takes in a parsed control file (a ControlData)
358 and returns True if the test name matches the given regular expression.
359
360 @param test_name_pattern: regular expression (string) to match against
361 test names.
362 @return a callable that takes a ControlData and returns
363 True if the name fields matches the pattern.
364 """
365 return lambda t: hasattr(t, 'name') and re.match(test_name_pattern,
366 t.name)
367
368
369 @staticmethod
370 def test_file_matches_pattern_predicate(test_file_pattern):
371 """Returns predicate that matches based on a test's file name pattern.
372
373 Builds a predicate that takes in a parsed control file (a ControlData)
374 and returns True if the test's control file name matches the given
375 regular expression.
376
377 @param test_file_pattern: regular expression (string) to match against
378 control file names.
379 @return a callable that takes a ControlData and and returns
380 True if control file name matches the pattern.
381 """
382 return lambda t: hasattr(t, 'path') and re.match(test_file_pattern,
383 t.path)
384
385
386 @staticmethod
Shuqian Zhaoab468812015-04-08 14:40:38 -0700387 def matches_attribute_expression_predicate(test_attr_boolstr):
388 """Returns predicate that matches based on boolean expression of
389 attributes.
390
391 Builds a predicate that takes in a parsed control file (a ControlData)
392 ans returns True if the test attributes satisfy the given attribute
393 boolean expression.
394
395 @param test_attr_boolstr: boolean expression of the attributes to be
396 test, like 'system:all and interval:daily'.
397
398 @return a callable that takes a ControlData and returns True if the test
399 attributes satisfy the given boolean expression.
400 """
401 return lambda t: boolparse_lib.BoolstrResult(
402 test_attr_boolstr, t.attributes)
403
404 @staticmethod
Dan Shi5783f8a2014-12-22 14:34:45 -0800405 def test_name_similarity_predicate(test_name):
406 """Returns predicate that matched based on a test's name.
407
408 Builds a predicate that takes in a parsed control file (a ControlData)
409 and returns a tuple of (test name, ratio), where ratio is the similarity
410 between the test name and the given test_name.
411
412 @param test_name: the test name to base the predicate on.
413 @return a callable that takes a ControlData and returns a tuple of
414 (test name, ratio), where ratio is the similarity between the
415 test name and the given test_name.
416 """
417 return lambda t: ((None, 0) if not hasattr(t, 'name') else
418 (t.name,
419 difflib.SequenceMatcher(a=t.name, b=test_name).ratio()))
420
421
422 @staticmethod
423 def test_file_similarity_predicate(test_file_pattern):
424 """Returns predicate that gets the similarity based on a test's file
425 name pattern.
426
427 Builds a predicate that takes in a parsed control file (a ControlData)
428 and returns a tuple of (file path, ratio), where ratio is the
429 similarity between the test file name and the given test_file_pattern.
430
431 @param test_file_pattern: regular expression (string) to match against
432 control file names.
433 @return a callable that takes a ControlData and and returns a tuple of
434 (file path, ratio), where ratio is the similarity between the
435 test file name and the given test_file_pattern.
436 """
437 return lambda t: ((None, 0) if not hasattr(t, 'path') else
438 (t.path, difflib.SequenceMatcher(a=t.path,
439 b=test_file_pattern).ratio()))
440
441
Allen Li9864ed62016-12-29 16:30:53 -0800442 @classmethod
443 def list_all_suites(cls, build, devserver, cf_getter=None):
Chris Masone44e4d6c2012-08-15 14:25:53 -0700444 """
445 Parses all ControlData objects with a SUITE tag and extracts all
446 defined suite names.
447
Chris Sosaaccb5ce2012-08-30 17:29:15 -0700448 @param build: the build on which we're running this suite.
449 @param devserver: the devserver which contains the build.
Chris Masone44e4d6c2012-08-15 14:25:53 -0700450 @param cf_getter: control_file_getter.ControlFileGetter. Defaults to
451 using DevServerGetter.
452
453 @return list of suites
454 """
455 if cf_getter is None:
Allen Li9864ed62016-12-29 16:30:53 -0800456 cf_getter = cls._create_ds_getter(build, devserver)
Chris Masone44e4d6c2012-08-15 14:25:53 -0700457
458 suites = set()
Allen Li30833702017-01-03 18:34:15 -0800459 predicate = lambda t: True
Allen Lic9be3662017-01-03 17:56:26 -0800460 for test in cls.find_and_parse_tests(cf_getter, predicate,
Allen Li30833702017-01-03 18:34:15 -0800461 add_experimental=True):
Allen Lif20e17d2017-01-03 18:24:19 -0800462 suites.update(test.suite_tag_parts)
Chris Masone44e4d6c2012-08-15 14:25:53 -0700463 return list(suites)
464
465
466 @staticmethod
Dan Shi36cfd832014-10-10 13:38:51 -0700467 def get_test_source_build(builds, **dargs):
468 """Get the build of test code.
469
470 Get the test source build from arguments. If parameter
471 `test_source_build` is set and has a value, return its value. Otherwise
472 returns the ChromeOS build name if it exists. If ChromeOS build is not
473 specified either, raise SuiteArgumentException.
474
475 @param builds: the builds on which we're running this suite. It's a
476 dictionary of version_prefix:build.
477 @param **dargs: Any other Suite constructor parameters, as described
478 in Suite.__init__ docstring.
479
480 @return: The build contains the test code.
481 @raise: SuiteArgumentException if both test_source_build and ChromeOS
482 build are not specified.
483
484 """
485 if dargs.get('test_source_build', None):
486 return dargs['test_source_build']
487 test_source_build = builds.get(provision.CROS_VERSION_PREFIX, None)
488 if not test_source_build:
489 raise error.SuiteArgumentException(
490 'test_source_build must be specified if CrOS build is not '
491 'specified.')
492 return test_source_build
493
494
Allen Li9864ed62016-12-29 16:30:53 -0800495 @classmethod
496 def create_from_predicates(cls, predicates, builds, board, devserver,
Simran Basi5ace6f22016-01-06 17:30:44 -0800497 cf_getter=None, name='ad_hoc_suite',
498 run_prod_code=False, **dargs):
Aviv Keshet69ebb6c2013-06-11 13:58:44 -0700499 """
500 Create a Suite using a given predicate test filters.
501
502 Uses supplied predicate(s) to instantiate a Suite. Looks for tests in
503 |autotest_dir| and will schedule them using |afe|. Pulls control files
504 from the default dev server. Results will be pulled from |tko| upon
505 completion.
506
507 @param predicates: A list of callables that accept ControlData
508 representations of control files. A test will be
Aviv Keshet938a6772013-07-25 14:05:45 -0700509 included in suite if all callables in this list
Aviv Keshet69ebb6c2013-06-11 13:58:44 -0700510 return True on the given control file.
Dan Shi36cfd832014-10-10 13:38:51 -0700511 @param builds: the builds on which we're running this suite. It's a
512 dictionary of version_prefix:build.
Alex Millera0913072013-06-12 10:01:51 -0700513 @param board: the board on which we're running this suite.
Aviv Keshet69ebb6c2013-06-11 13:58:44 -0700514 @param devserver: the devserver which contains the build.
515 @param cf_getter: control_file_getter.ControlFileGetter. Defaults to
516 using DevServerGetter.
517 @param name: name of suite. Defaults to 'ad_hoc_suite'
Simran Basi5ace6f22016-01-06 17:30:44 -0800518 @param run_prod_code: If true, the suite will run the tests that
519 lives in prod aka the test code currently on the
520 lab servers.
Aviv Keshet69ebb6c2013-06-11 13:58:44 -0700521 @param **dargs: Any other Suite constructor parameters, as described
522 in Suite.__init__ docstring.
523 @return a Suite instance.
524 """
525 if cf_getter is None:
Simran Basi5ace6f22016-01-06 17:30:44 -0800526 if run_prod_code:
Allen Li9864ed62016-12-29 16:30:53 -0800527 cf_getter = cls.create_fs_getter(_AUTOTEST_DIR)
Simran Basi5ace6f22016-01-06 17:30:44 -0800528 else:
Allen Lic9be3662017-01-03 17:56:26 -0800529 build = cls.get_test_source_build(builds, **dargs)
Allen Li9864ed62016-12-29 16:30:53 -0800530 cf_getter = cls._create_ds_getter(build, devserver)
Aviv Keshet69ebb6c2013-06-11 13:58:44 -0700531
Allen Lic9be3662017-01-03 17:56:26 -0800532 return cls(predicates,
533 name, builds, board, cf_getter, run_prod_code, **dargs)
Aviv Keshet69ebb6c2013-06-11 13:58:44 -0700534
535
Allen Li9864ed62016-12-29 16:30:53 -0800536 @classmethod
537 def create_from_name(cls, name, builds, board, devserver, cf_getter=None,
Alex Millera0913072013-06-12 10:01:51 -0700538 **dargs):
Chris Masone44e4d6c2012-08-15 14:25:53 -0700539 """
540 Create a Suite using a predicate based on the SUITE control file var.
541
542 Makes a predicate based on |name| and uses it to instantiate a Suite
543 that looks for tests in |autotest_dir| and will schedule them using
544 |afe|. Pulls control files from the default dev server.
545 Results will be pulled from |tko| upon completion.
546
547 @param name: a value of the SUITE control file variable to search for.
Dan Shi36cfd832014-10-10 13:38:51 -0700548 @param builds: the builds on which we're running this suite. It's a
549 dictionary of version_prefix:build.
Alex Millera0913072013-06-12 10:01:51 -0700550 @param board: the board on which we're running this suite.
Chris Sosaaccb5ce2012-08-30 17:29:15 -0700551 @param devserver: the devserver which contains the build.
Aviv Keshet813d6782013-06-04 17:11:03 -0700552 @param cf_getter: control_file_getter.ControlFileGetter. Defaults to
553 using DevServerGetter.
554 @param **dargs: Any other Suite constructor parameters, as described
555 in Suite.__init__ docstring.
Chris Masone44e4d6c2012-08-15 14:25:53 -0700556 @return a Suite instance.
557 """
558 if cf_getter is None:
Allen Li9864ed62016-12-29 16:30:53 -0800559 build = cls.get_test_source_build(builds, **dargs)
560 cf_getter = cls._create_ds_getter(build, devserver)
Chris Sosaaccb5ce2012-08-30 17:29:15 -0700561
Allen Lic9be3662017-01-03 17:56:26 -0800562 return cls([cls.name_in_tag_predicate(name)],
563 name, builds, board, cf_getter, **dargs)
Chris Masone44e4d6c2012-08-15 14:25:53 -0700564
565
Allen Li6fff5502016-12-09 18:04:26 -0800566 def __init__(
567 self,
568 predicates,
569 tag,
570 builds,
571 board,
572 cf_getter,
573 run_prod_code=False,
574 afe=None,
575 tko=None,
576 pool=None,
577 results_dir=None,
578 max_runtime_mins=24*60,
579 timeout_mins=24*60,
580 file_bugs=False,
581 file_experimental_bugs=False,
582 suite_job_id=None,
583 ignore_deps=False,
Allen Li493eefa2016-12-09 18:05:35 -0800584 extra_deps=None,
Allen Li6fff5502016-12-09 18:04:26 -0800585 priority=priorities.Priority.DEFAULT,
586 forgiving_parser=True,
587 wait_for_results=True,
588 job_retry=False,
589 max_retries=sys.maxint,
590 offload_failures_only=False,
Shuqian Zhaoda1118d2017-02-13 16:22:58 -0800591 test_source_build=None,
592 job_keyvals=None
Allen Li6fff5502016-12-09 18:04:26 -0800593 ):
Chris Masone44e4d6c2012-08-15 14:25:53 -0700594 """
595 Constructor
596
Aviv Keshet40222a42013-06-04 16:25:49 -0700597 @param predicates: A list of callables that accept ControlData
598 representations of control files. A test will be
599 included in suite is all callables in this list
600 return True on the given control file.
Chris Masone44e4d6c2012-08-15 14:25:53 -0700601 @param tag: a string with which to tag jobs run in this suite.
Dan Shi36cfd832014-10-10 13:38:51 -0700602 @param builds: the builds on which we're running this suite.
Alex Millera0913072013-06-12 10:01:51 -0700603 @param board: the board on which we're running this suite.
Chris Masone44e4d6c2012-08-15 14:25:53 -0700604 @param cf_getter: a control_file_getter.ControlFileGetter
605 @param afe: an instance of AFE as defined in server/frontend.py.
606 @param tko: an instance of TKO as defined in server/frontend.py.
607 @param pool: Specify the pool of machines to use for scheduling
608 purposes.
Simran Basi5ace6f22016-01-06 17:30:44 -0800609 @param run_prod_code: If true, the suite will run the test code that
610 lives in prod aka the test code currently on the
611 lab servers.
Chris Masone44e4d6c2012-08-15 14:25:53 -0700612 @param results_dir: The directory where the job can write results to.
613 This must be set if you want job_id of sub-jobs
614 list in the job keyvals.
Aviv Keshet18308922013-02-19 17:49:49 -0800615 @param max_runtime_mins: Maximum suite runtime, in minutes.
Alex Miller028b0312013-09-07 15:25:45 -0700616 @param timeout: Maximum job lifetime, in hours.
Aviv Keshet18308922013-02-19 17:49:49 -0800617 @param suite_job_id: Job id that will act as parent id to all sub jobs.
618 Default: None
Aviv Keshetd7959f32013-05-17 15:58:43 -0700619 @param ignore_deps: True if jobs should ignore the DEPENDENCIES
620 attribute and skip applying of dependency labels.
621 (Default:False)
Alex Miller47a03672013-08-27 09:09:53 -0700622 @param extra_deps: A list of strings which are the extra DEPENDENCIES
623 to add to each test being scheduled.
Alex Miller7d658cf2013-09-04 16:00:35 -0700624 @param priority: Integer priority level. Higher is more important.
Dan Shi95122412013-11-12 16:20:33 -0800625 @param wait_for_results: Set to False to run the suite job without
626 waiting for test jobs to finish. Default is
627 True.
Fang Denge3bc24b2014-03-17 15:19:46 -0700628 @param job_retry: A bool value indicating whether jobs should be retired
629 on failure. If True, the field 'JOB_RETRIES' in
630 control files will be respected. If False, do not
631 retry.
Fang Deng443f1952015-01-02 14:51:49 -0800632 @param max_retries: Maximum retry limit at suite level.
633 Regardless how many times each individual test
634 has been retried, the total number of retries
635 happening in the suite can't exceed _max_retries.
636 Default to sys.maxint.
Simran Basi1e10e922015-04-16 15:09:56 -0700637 @param offload_failures_only: Only enable gs_offloading for failed
638 jobs.
Dan Shi36cfd832014-10-10 13:38:51 -0700639 @param test_source_build: Build that contains the server-side test code.
Shuqian Zhaoda1118d2017-02-13 16:22:58 -0800640 @param job_keyvals: General job keyvals to be inserted into keyval file,
641 which will be used by tko/parse later.
Simran Basi1e10e922015-04-16 15:09:56 -0700642
Chris Masone44e4d6c2012-08-15 14:25:53 -0700643 """
Allen Li493eefa2016-12-09 18:05:35 -0800644 if extra_deps is None:
645 extra_deps = []
646
Chris Masone44e4d6c2012-08-15 14:25:53 -0700647 self._tag = tag
Dan Shi36cfd832014-10-10 13:38:51 -0700648 self._builds = builds
Alex Millera0913072013-06-12 10:01:51 -0700649 self._board = board
Chris Masone44e4d6c2012-08-15 14:25:53 -0700650 self._cf_getter = cf_getter
651 self._results_dir = results_dir
652 self._afe = afe or frontend_wrappers.RetryingAFE(timeout_min=30,
653 delay_sec=10,
654 debug=False)
655 self._tko = tko or frontend_wrappers.RetryingTKO(timeout_min=30,
656 delay_sec=10,
657 debug=False)
658 self._pool = pool
659 self._jobs = []
Fang Denge3bc24b2014-03-17 15:19:46 -0700660 self._jobs_to_tests = {}
Allen Li7947f732016-12-29 16:44:28 -0800661 self.tests = self.find_and_parse_tests(
Allen Li8a649092016-12-09 18:07:39 -0800662 self._cf_getter,
Allen Lid69b9f02016-12-09 18:15:59 -0800663 lambda control_data: all(f(control_data) for f in predicates),
Allen Li8a649092016-12-09 18:07:39 -0800664 self._tag,
665 add_experimental=True,
666 forgiving_parser=forgiving_parser,
667 run_prod_code=run_prod_code,
668 )
beeps89f1e062013-09-18 12:00:17 -0700669
Simran Basic68cda42012-11-19 17:03:18 -0800670 self._max_runtime_mins = max_runtime_mins
Simran Basi8705d672013-11-19 15:56:58 -0800671 self._timeout_mins = timeout_mins
Alex Millera3a4fe72013-01-22 09:57:47 -0800672 self._file_bugs = file_bugs
beepsda5b7112013-05-30 11:34:14 -0700673 self._file_experimental_bugs = file_experimental_bugs
Aviv Keshet18308922013-02-19 17:49:49 -0800674 self._suite_job_id = suite_job_id
Aviv Keshetd7959f32013-05-17 15:58:43 -0700675 self._ignore_deps = ignore_deps
Alex Miller47a03672013-08-27 09:09:53 -0700676 self._extra_deps = extra_deps
Alex Miller7d658cf2013-09-04 16:00:35 -0700677 self._priority = priority
Fang Denge3bc24b2014-03-17 15:19:46 -0700678 self._job_retry=job_retry
Fang Deng443f1952015-01-02 14:51:49 -0800679 self._max_retries = max_retries
Fang Denge3bc24b2014-03-17 15:19:46 -0700680 # RetryHandler to be initialized in schedule()
681 self._retry_handler = None
Dan Shi95122412013-11-12 16:20:33 -0800682 self.wait_for_results = wait_for_results
Simran Basi1e10e922015-04-16 15:09:56 -0700683 self._offload_failures_only = offload_failures_only
Dan Shi36cfd832014-10-10 13:38:51 -0700684 self._test_source_build = test_source_build
Shuqian Zhaoda1118d2017-02-13 16:22:58 -0800685 self._job_keyvals = job_keyvals
Alex Millera3a4fe72013-01-22 09:57:47 -0800686
Chris Masone44e4d6c2012-08-15 14:25:53 -0700687
688 @property
Allen Lidb8eafe2016-12-12 16:33:58 -0800689 def _cros_build(self):
690 """Return the CrOS build or the first build in the builds dict."""
691 # TODO(ayatane): Note that the builds dict isn't ordered. I'm not
692 # sure what the implications of this are, but it's probably not a
693 # good thing.
694 return self._builds.get(provision.CROS_VERSION_PREFIX,
695 self._builds.values()[0])
696
697
Fang Denge3bc24b2014-03-17 15:19:46 -0700698 def _create_job(self, test, retry_for=None):
Chris Masone44e4d6c2012-08-15 14:25:53 -0700699 """
700 Thin wrapper around frontend.AFE.create_job().
701
702 @param test: ControlData object for a test to run.
Fang Denge3bc24b2014-03-17 15:19:46 -0700703 @param retry_for: If the to-be-created job is a retry for an
704 old job, the afe_job_id of the old job will
705 be passed in as |retry_for|, which will be
706 recorded in the new job's keyvals.
707 @returns: A frontend.Job object with an added test_name member.
708 test_name is used to preserve the higher level TEST_NAME
709 name of the job.
Chris Masone44e4d6c2012-08-15 14:25:53 -0700710 """
Allen Li069fc252016-12-12 16:26:21 -0800711 test_obj = self._afe.create_job(
712 control_file=test.text,
Allen Li468d6152016-12-12 16:35:01 -0800713 name=tools.create_job_name(
714 self._test_source_build or self._cros_build,
715 self._tag,
716 test.name),
Allen Li069fc252016-12-12 16:26:21 -0800717 control_type=test.test_type.capitalize(),
718 meta_hosts=[self._board]*test.sync_count,
Allen Lic68ca4a2016-12-12 17:28:36 -0800719 dependencies=self._create_job_deps(test),
Allen Lia4ae1352016-12-12 16:26:57 -0800720 keyvals=self._create_keyvals_for_test_job(test, retry_for),
Allen Li069fc252016-12-12 16:26:21 -0800721 max_runtime_mins=self._max_runtime_mins,
722 timeout_mins=self._timeout_mins,
723 parent_job_id=self._suite_job_id,
724 test_retry=test.retries,
725 priority=self._priority,
726 synch_count=test.sync_count,
727 require_ssp=test.require_ssp)
728
729 test_obj.test_name = test.name
730 return test_obj
731
732
Allen Lic68ca4a2016-12-12 17:28:36 -0800733 def _create_job_deps(self, test):
734 """Create job deps list for a test job.
735
736 @returns: A list of dependency strings.
737 """
738 if self._ignore_deps:
739 job_deps = []
740 else:
741 job_deps = list(test.dependencies)
742 job_deps.extend(self._extra_deps)
743 if self._pool:
744 job_deps.append(self._pool)
745 job_deps.append(self._board)
746 return job_deps
747
748
Allen Li069fc252016-12-12 16:26:21 -0800749 def _create_keyvals_for_test_job(self, test, retry_for=None):
750 """Create keyvals dict for creating a test job.
751
752 @param test: ControlData object for a test to run.
753 @param retry_for: If the to-be-created job is a retry for an
754 old job, the afe_job_id of the old job will
755 be passed in as |retry_for|, which will be
756 recorded in the new job's keyvals.
757 @returns: A keyvals dict for creating the test job.
758 """
Allen Li015e71b2016-12-12 16:37:25 -0800759 keyvals = {
760 constants.JOB_BUILD_KEY: self._cros_build,
761 constants.JOB_SUITE_KEY: self._tag,
762 constants.JOB_EXPERIMENTAL_KEY: test.experimental,
763 constants.JOB_BUILDS_KEY: self._builds
764 }
Dan Shi36cfd832014-10-10 13:38:51 -0700765 # test_source_build is saved to job_keyvals so scheduler can retrieve
766 # the build name from database when compiling autoserv commandline.
767 # This avoid a database change to add a new field in afe_jobs.
Allen Li015e71b2016-12-12 16:37:25 -0800768 #
Dan Shi36cfd832014-10-10 13:38:51 -0700769 # Only add `test_source_build` to job keyvals if the build is different
770 # from the CrOS build or the job uses more than one build, e.g., both
771 # firmware and CrOS will be updated in the dut.
772 # This is for backwards compatibility, so the update Autotest code can
773 # compile an autoserv command line to run in a SSP container using
774 # previous builds.
775 if (self._test_source_build and
Allen Li015e71b2016-12-12 16:37:25 -0800776 (self._cros_build != self._test_source_build or
777 len(self._builds) > 1)):
778 keyvals[constants.JOB_TEST_SOURCE_BUILD_KEY] = \
779 self._test_source_build
Dan Shidac462f2015-08-14 11:07:32 -0700780 for prefix, build in self._builds.iteritems():
781 if prefix == provision.FW_RW_VERSION_PREFIX:
782 keyvals[constants.FWRW_BUILD]= build
783 elif prefix == provision.FW_RO_VERSION_PREFIX:
784 keyvals[constants.FWRO_BUILD] = build
Allen Li015e71b2016-12-12 16:37:25 -0800785 # Add suite job id to keyvals so tko parser can read it from keyval
786 # file.
Dan Shidac462f2015-08-14 11:07:32 -0700787 if self._suite_job_id:
788 keyvals[constants.PARENT_JOB_ID] = self._suite_job_id
Allen Li015e71b2016-12-12 16:37:25 -0800789 # We drop the old job's id in the new job's keyval file so that
790 # later our tko parser can figure out the retry relationship and
791 # invalidate the results of the old job in tko database.
Fang Denge3bc24b2014-03-17 15:19:46 -0700792 if retry_for:
Fang Denge3bc24b2014-03-17 15:19:46 -0700793 keyvals[constants.RETRY_ORIGINAL_JOB_ID] = retry_for
Simran Basi1e10e922015-04-16 15:09:56 -0700794 if self._offload_failures_only:
795 keyvals[constants.JOB_OFFLOAD_FAILURES_KEY] = True
Allen Li069fc252016-12-12 16:26:21 -0800796 return keyvals
Chris Masone44e4d6c2012-08-15 14:25:53 -0700797
798
Fang Denge3bc24b2014-03-17 15:19:46 -0700799 def _schedule_test(self, record, test, retry_for=None, ignore_errors=False):
800 """Schedule a single test and return the job.
801
Allen Lie79b3cb2016-12-12 18:24:17 -0800802 Schedule a single test by creating a job, and then update relevant
803 data structures that are used to keep track of all running jobs.
Fang Denge3bc24b2014-03-17 15:19:46 -0700804
Allen Lie79b3cb2016-12-12 18:24:17 -0800805 Emits a TEST_NA status log entry if it failed to schedule the test due
806 to NoEligibleHostException or a non-existent board label.
807
808 Returns a frontend.Job object if the test is successfully scheduled.
809 If scheduling failed due to NoEligibleHostException or a non-existent
810 board label, returns None. If ignore_errors is True, all unknown
811 errors return None, otherwise the errors are raised as-is.
Fang Denge3bc24b2014-03-17 15:19:46 -0700812
813 @param record: A callable to use for logging.
814 prototype: record(base_job.status_log_entry)
815 @param test: ControlData for a test to run.
816 @param retry_for: If we are scheduling a test to retry an
817 old job, the afe_job_id of the old job
818 will be passed in as |retry_for|.
819 @param ignore_errors: If True, when an rpc error occur, ignore
820 the error and will return None.
821 If False, rpc errors will be raised.
822
Allen Lie79b3cb2016-12-12 18:24:17 -0800823 @returns: A frontend.Job object or None
Fang Denge3bc24b2014-03-17 15:19:46 -0700824 """
825 msg = 'Scheduling %s' % test.name
826 if retry_for:
827 msg = msg + ', to retry afe job %d' % retry_for
828 logging.debug(msg)
Dan Shidfea3682014-08-10 23:38:40 -0700829 begin_time_str = datetime.datetime.now().strftime(time_utils.TIME_FMT)
Fang Denge3bc24b2014-03-17 15:19:46 -0700830 try:
831 job = self._create_job(test, retry_for=retry_for)
Allen Li6fd440f2016-12-12 18:40:05 -0800832 except (error.NoEligibleHostException, proxy.ValidationError) as e:
833 if (isinstance(e, error.NoEligibleHostException)
834 or (isinstance(e, proxy.ValidationError)
835 and _is_nonexistent_board_error(e))):
836 # Treat a dependency on a non-existent board label the same as
837 # a dependency on a board that exists, but for which there's no
838 # hardware.
839 logging.debug('%s not applicable for this board/pool. '
840 'Emitting TEST_NA.', test.name)
841 Status('TEST_NA', test.name,
842 'Skipping: test not supported on this board/pool.',
Allen Li9fcd4b42016-12-12 16:15:14 -0800843 begin_time_str=begin_time_str).record_all(record)
844 return None
845 else:
Fang Denge3bc24b2014-03-17 15:19:46 -0700846 raise e
Fang Denge3bc24b2014-03-17 15:19:46 -0700847 except (error.RPCException, proxy.JSONRPCException) as e:
848 if retry_for:
849 # Mark that we've attempted to retry the old job.
850 self._retry_handler.set_attempted(job_id=retry_for)
Allen Li0ba59342016-12-12 15:57:02 -0800851
Fang Denge3bc24b2014-03-17 15:19:46 -0700852 if ignore_errors:
853 logging.error('Failed to schedule test: %s, Reason: %s',
854 test.name, e)
Allen Li0ba59342016-12-12 15:57:02 -0800855 return None
Fang Denge3bc24b2014-03-17 15:19:46 -0700856 else:
857 raise e
858 else:
859 self._jobs.append(job)
860 self._jobs_to_tests[job.id] = test
861 if retry_for:
862 # A retry job was just created, record it.
863 self._retry_handler.add_retry(
864 old_job_id=retry_for, new_job_id=job.id)
865 retry_count = (test.job_retries -
866 self._retry_handler.get_retry_max(job.id))
867 logging.debug('Job %d created to retry job %d. '
868 'Have retried for %d time(s)',
869 job.id, retry_for, retry_count)
Allen Li4df053e2016-12-29 16:05:41 -0800870 self._remember_job_keyval(job)
Fang Denge3bc24b2014-03-17 15:19:46 -0700871 return job
Fang Denge3bc24b2014-03-17 15:19:46 -0700872
873
Alex Miller3a69adc2012-12-19 13:38:31 -0800874 def schedule(self, record, add_experimental=True):
Aviv Keshet18308922013-02-19 17:49:49 -0800875 #pylint: disable-msg=C0111
Chris Masone44e4d6c2012-08-15 14:25:53 -0700876 """
877 Schedule jobs using |self._afe|.
878
879 frontend.Job objects representing each scheduled job will be put in
880 |self._jobs|.
881
Fang Denge3bc24b2014-03-17 15:19:46 -0700882 @param record: A callable to use for logging.
883 prototype: record(base_job.status_log_entry)
Chris Masone44e4d6c2012-08-15 14:25:53 -0700884 @param add_experimental: schedule experimental tests as well, or not.
Aviv Keshete9170d92013-07-19 11:20:45 -0700885 @returns: The number of tests that were scheduled.
Chris Masone44e4d6c2012-08-15 14:25:53 -0700886 """
Allen Lif4cb5ec2017-01-03 16:58:12 -0800887 scheduled_test_names = []
888 discoverer = _DynamicSuiteDiscoverer(
889 tests=self.tests,
890 add_experimental=add_experimental)
891 logging.debug('Discovered %d stable tests.',
892 len(discoverer.stable_tests))
Alex Miller3a69adc2012-12-19 13:38:31 -0800893 logging.debug('Discovered %d unstable tests.',
Allen Lif4cb5ec2017-01-03 16:58:12 -0800894 len(discoverer.unstable_tests))
Chris Masone44e4d6c2012-08-15 14:25:53 -0700895
Alex Miller3a69adc2012-12-19 13:38:31 -0800896 Status('INFO', 'Start %s' % self._tag).record_result(record)
897 try:
Shuqian Zhaoda1118d2017-02-13 16:22:58 -0800898 # Write job_keyvals into keyval file.
899 if self._job_keyvals:
900 utils.write_keyval(self._results_dir, self._job_keyvals)
901
Allen Lif4cb5ec2017-01-03 16:58:12 -0800902 for test in discoverer.discover_tests():
Allen Lida905732016-12-12 15:49:16 -0800903 scheduled_job = self._schedule_test(record, test)
904 if scheduled_job is not None:
Shuqian Zhaocd866f32016-11-29 20:14:34 -0800905 scheduled_test_names.append(test.name)
906
907 # Write the num of scheduled tests and name of them to keyval file.
Shuqian Zhaocd866f32016-11-29 20:14:34 -0800908 logging.debug('Scheduled %d tests, writing the total to keyval.',
Allen Lia4d35022016-12-12 15:42:10 -0800909 len(scheduled_test_names))
Allen Lid4d5dda2016-12-12 15:39:11 -0800910 utils.write_keyval(
911 self._results_dir,
Allen Lidda59b82016-12-12 18:20:04 -0800912 self._make_scheduled_tests_keyvals(scheduled_test_names))
Alex Miller3a69adc2012-12-19 13:38:31 -0800913 except Exception: # pylint: disable=W0703
Allen Lib892d9f2016-12-29 15:50:11 -0800914 logging.exception('Exception while scheduling suite')
Alex Miller3a69adc2012-12-19 13:38:31 -0800915 Status('FAIL', self._tag,
916 'Exception while scheduling suite').record_result(record)
917
Fang Deng7e655a92014-05-23 13:48:11 -0700918 if self._job_retry:
919 self._retry_handler = RetryHandler(
Fang Deng443f1952015-01-02 14:51:49 -0800920 initial_jobs_to_tests=self._jobs_to_tests,
921 max_retries=self._max_retries)
Allen Lia4d35022016-12-12 15:42:10 -0800922 return len(scheduled_test_names)
Aviv Keshete9170d92013-07-19 11:20:45 -0700923
Alex Miller3a69adc2012-12-19 13:38:31 -0800924
Allen Lidda59b82016-12-12 18:20:04 -0800925 def _make_scheduled_tests_keyvals(self, scheduled_test_names):
926 """Make a keyvals dict to write for scheduled test names.
927
928 @param scheduled_test_names: A list of scheduled test name strings.
929
930 @returns: A keyvals dict.
931 """
932 return {
933 constants.SCHEDULED_TEST_COUNT_KEY: len(scheduled_test_names),
934 constants.SCHEDULED_TEST_NAMES_KEY: repr(scheduled_test_names),
935 }
936
937
Allen Lid1cbccf2016-12-29 15:12:39 -0800938 def _should_report(self, result):
beepsda5b7112013-05-30 11:34:14 -0700939 """
Shuqian Zhaoe33ba4a2015-09-11 18:51:43 -0700940 Returns True if this failure requires to be reported.
beepsda5b7112013-05-30 11:34:14 -0700941
942 @param result: A result, encapsulating the status of the failed job.
Shuqian Zhaoe33ba4a2015-09-11 18:51:43 -0700943 @return: True if we should report this failure.
beepsda5b7112013-05-30 11:34:14 -0700944 """
Allen Licc752292017-01-03 12:44:39 -0800945 if self._has_retry(result):
Fang Denge3bc24b2014-03-17 15:19:46 -0700946 return False
947
beepsbeefc062013-08-02 11:17:09 -0700948 is_not_experimental = (
949 constants.EXPERIMENTAL_PREFIX not in result._test_name and
950 constants.EXPERIMENTAL_PREFIX not in result._job_name)
951
Alex Millerfcc119b2014-01-15 13:54:58 -0800952 return (self._file_bugs and result.test_executed and
beepsbeefc062013-08-02 11:17:09 -0700953 (is_not_experimental or self._file_experimental_bugs) and
Fang Dengd82c1c72014-07-29 10:43:01 -0700954 not result.is_testna() and
beeps32fa6772014-01-28 13:19:53 -0800955 result.is_worse_than(job_status.Status('GOOD', '', 'reason')))
beepsda5b7112013-05-30 11:34:14 -0700956
957
Allen Licc752292017-01-03 12:44:39 -0800958 def _has_retry(self, result):
959 """
960 Return True if this result gets to retry.
961
962 @param result: A result, encapsulating the status of the failed job.
963 @return: bool
964 """
965 return (self._job_retry
966 and self._retry_handler.has_following_retry(result))
967
968
Allen Li18503452016-12-29 14:56:48 -0800969 def wait(self, record, bug_template=None):
Alex Miller3a69adc2012-12-19 13:38:31 -0800970 """
971 Polls for the job statuses, using |record| to print status when each
972 completes.
973
974 @param record: callable that records job status.
975 prototype:
976 record(base_job.status_log_entry)
beepsc8a875b2013-03-25 10:20:38 -0700977 @param bug_template: A template dictionary specifying the default bug
978 filing options for failures in this suite.
Alex Miller3a69adc2012-12-19 13:38:31 -0800979 """
Dan Shie67bd6a2016-02-17 14:44:07 -0800980 # reporting modules have dependency on external packages, e.g., httplib2
981 # Such dependency can cause issue to any module tries to import suite.py
982 # without building site-packages first. Since the reporting modules are
983 # only used in this function, move the imports here avoid the
984 # requirement of building site packages to use other functions in this
985 # module.
986 from autotest_lib.server.cros.dynamic_suite import reporting
Dan Shie67bd6a2016-02-17 14:44:07 -0800987
Allen Li18503452016-12-29 14:56:48 -0800988 if bug_template is None:
989 bug_template = {}
990
Alex Millera3a4fe72013-01-22 09:57:47 -0800991 if self._file_bugs:
992 bug_reporter = reporting.Reporter()
Allen Li733dab92016-12-29 15:07:50 -0800993 else:
994 bug_reporter = reporting.NullReporter()
Alex Miller3a69adc2012-12-19 13:38:31 -0800995 try:
Aviv Keshet133beb12013-08-20 14:37:13 -0700996 if self._suite_job_id:
997 results_generator = job_status.wait_for_child_results(
998 self._afe, self._tko, self._suite_job_id)
999 else:
Ilja H. Friedel04be2bd2014-05-07 21:29:59 -07001000 logging.warning('Unknown suite_job_id, falling back to less '
Dan Shi08ff1282016-02-18 19:51:16 -08001001 'efficient results_generator.')
Aviv Keshet133beb12013-08-20 14:37:13 -07001002 results_generator = job_status.wait_for_results(self._afe,
1003 self._tko,
1004 self._jobs)
1005 for result in results_generator:
Allen Li26b340d2016-12-29 15:23:01 -08001006 self._record_result(
1007 result=result,
1008 record=record,
1009 results_generator=results_generator,
1010 bug_reporter=bug_reporter,
1011 bug_template=bug_template)
beeps8ead53c2013-04-26 19:12:46 -07001012
Alex Miller3a69adc2012-12-19 13:38:31 -08001013 except Exception: # pylint: disable=W0703
Allen Lib892d9f2016-12-29 15:50:11 -08001014 logging.exception('Exception waiting for results')
Alex Miller3a69adc2012-12-19 13:38:31 -08001015 Status('FAIL', self._tag,
1016 'Exception waiting for results').record_result(record)
1017
1018
Allen Li26b340d2016-12-29 15:23:01 -08001019 def _record_result(self, result, record, results_generator, bug_reporter,
1020 bug_template):
1021 """
1022 Record a single test job result.
1023
1024 @param result: Status instance for job.
1025 @param record: callable that records job status.
1026 prototype:
1027 record(base_job.status_log_entry)
1028 @param results_generator: Results generator for sending job retries.
1029 @param bug_reporter: Reporter instance for reporting bugs.
1030 @param bug_template: A template dictionary specifying the default bug
1031 filing options for failures in this suite.
1032 """
Allen Li26b340d2016-12-29 15:23:01 -08001033 result.record_all(record)
Allen Li4df053e2016-12-29 16:05:41 -08001034 self._remember_job_keyval(result)
Allen Li26b340d2016-12-29 15:23:01 -08001035
Allen Licc752292017-01-03 12:44:39 -08001036 if self._has_retry(result):
Allen Li26b340d2016-12-29 15:23:01 -08001037 new_job = self._schedule_test(
1038 record=record, test=self._jobs_to_tests[result.id],
1039 retry_for=result.id, ignore_errors=True)
1040 if new_job:
1041 results_generator.send([new_job])
1042
1043 # TODO (fdeng): If the suite times out before a retry could
1044 # finish, we would lose the chance to file a bug for the
1045 # original job.
1046 if self._should_report(result):
Allen Li11308982016-12-29 16:19:55 -08001047 if self._should_file_bugs:
Allen Li47c9fca2016-12-29 16:22:53 -08001048 self._file_bug(result, bug_reporter, bug_template)
Allen Li26b340d2016-12-29 15:23:01 -08001049 else:
Allen Lid5df44b2016-12-29 15:59:06 -08001050 # reporting modules have dependency on external
1051 # packages, e.g., httplib2 Such dependency can cause
1052 # issue to any module tries to import suite.py without
1053 # building site-packages first. Since the reporting
1054 # modules are only used in this function, move the
1055 # imports here avoid the requirement of building site
1056 # packages to use other functions in this module.
1057 from autotest_lib.server.cros.dynamic_suite import reporting
1058
Allen Li7b973112016-12-29 16:17:41 -08001059 reporting.send_email(
1060 self._get_test_bug(result),
1061 self._get_bug_template(result, bug_template))
Allen Li26b340d2016-12-29 15:23:01 -08001062
1063
Allen Lid5df44b2016-12-29 15:59:06 -08001064 def _get_bug_template(self, result, bug_template):
1065 """Get BugTemplate for test job.
1066
1067 @param result: Status instance for job.
1068 @param bug_template: A template dictionary specifying the default bug
1069 filing options for failures in this suite.
1070 @returns: BugTemplate instance
1071 """
1072 # reporting modules have dependency on external packages, e.g., httplib2
1073 # Such dependency can cause issue to any module tries to import suite.py
1074 # without building site-packages first. Since the reporting modules are
1075 # only used in this function, move the imports here avoid the
1076 # requirement of building site packages to use other functions in this
1077 # module.
1078 from autotest_lib.server.cros.dynamic_suite import reporting_utils
1079
1080 # Try to merge with bug template in test control file.
1081 template = reporting_utils.BugTemplate(bug_template)
1082 try:
1083 test_data = self._jobs_to_tests[result.id]
1084 return template.finalize_bug_template(
1085 test_data.bug_template)
1086 except AttributeError:
1087 # Test control file does not have bug template defined.
1088 return template.bug_template
1089 except reporting_utils.InvalidBugTemplateException as e:
1090 logging.error('Merging bug templates failed with '
1091 'error: %s An empty bug template will '
1092 'be used.', e)
1093 return {}
1094
1095
Allen Li003913e2016-12-29 15:53:34 -08001096 def _get_test_bug(self, result):
1097 """Get TestBug for the given result.
1098
1099 @param result: Status instance for a test job.
1100 @returns: TestBug instance.
1101 """
1102 # reporting modules have dependency on external packages, e.g., httplib2
1103 # Such dependency can cause issue to any module tries to import suite.py
1104 # without building site-packages first. Since the reporting modules are
1105 # only used in this function, move the imports here avoid the
1106 # requirement of building site packages to use other functions in this
1107 # module.
1108 from autotest_lib.server.cros.dynamic_suite import reporting
1109
1110 job_views = self._tko.run('get_detailed_test_views',
1111 afe_job_id=result.id)
1112 return reporting.TestBug(self._cros_build,
1113 site_utils.get_chrome_version(job_views),
1114 self._tag,
1115 result)
1116
1117
Allen Li11308982016-12-29 16:19:55 -08001118 @property
1119 def _should_file_bugs(self):
1120 """Return whether bugs should be filed.
1121
1122 @returns: bool
1123 """
1124 # File bug when failure is one of the _FILE_BUG_SUITES,
1125 # otherwise send an email to the owner anc cc.
1126 return self._tag in _FILE_BUG_SUITES
1127
1128
Allen Li47c9fca2016-12-29 16:22:53 -08001129 def _file_bug(self, result, bug_reporter, bug_template):
1130 """File a bug for a test job result.
1131
1132 @param result: Status instance for job.
1133 @param bug_reporter: Reporter instance for reporting bugs.
1134 @param bug_template: A template dictionary specifying the default bug
1135 filing options for failures in this suite.
1136 """
1137 bug_id, bug_count = bug_reporter.report(
1138 self._get_test_bug(result),
1139 self._get_bug_template(result, bug_template))
1140
1141 # We use keyvals to communicate bugs filed with run_suite.
1142 if bug_id is not None:
1143 bug_keyvals = tools.create_bug_keyvals(
1144 result.id, result.test_name,
1145 (bug_id, bug_count))
1146 try:
1147 utils.write_keyval(self._results_dir,
1148 bug_keyvals)
1149 except ValueError:
1150 logging.error('Unable to log bug keyval for:%s',
1151 result.test_name)
1152
1153
Alex Miller3a69adc2012-12-19 13:38:31 -08001154 def abort(self):
1155 """
1156 Abort all scheduled test jobs.
1157 """
1158 if self._jobs:
1159 job_ids = [job.id for job in self._jobs]
1160 self._afe.run('abort_host_queue_entries', job__id__in=job_ids)
Chris Masone44e4d6c2012-08-15 14:25:53 -07001161
1162
Allen Li4df053e2016-12-29 16:05:41 -08001163 def _remember_job_keyval(self, job):
Chris Masoned9f13c52012-08-29 10:37:08 -07001164 """
1165 Record provided job as a suite job keyval, for later referencing.
1166
Allen Li4df053e2016-12-29 16:05:41 -08001167 @param job: some representation of a job that has the attributes:
1168 id, test_name, and owner
Chris Masoned9f13c52012-08-29 10:37:08 -07001169 """
Allen Li3cc73cd2016-12-12 16:02:21 -08001170 if self._results_dir and job.id and job.owner and job.test_name:
Chris Masone44e4d6c2012-08-15 14:25:53 -07001171 job_id_owner = '%s-%s' % (job.id, job.owner)
Chris Masoned9f13c52012-08-29 10:37:08 -07001172 logging.debug('Adding job keyval for %s=%s',
Chris Sosaaccb5ce2012-08-30 17:29:15 -07001173 job.test_name, job_id_owner)
Chris Masone44e4d6c2012-08-15 14:25:53 -07001174 utils.write_keyval(
1175 self._results_dir,
1176 {hashlib.md5(job.test_name).hexdigest(): job_id_owner})
1177
Dan Shid1521802013-05-24 13:08:37 -07001178
Chris Masone44e4d6c2012-08-15 14:25:53 -07001179 @staticmethod
Allen Lie61acfe2016-12-29 16:27:21 -08001180 def _find_all_tests(cf_getter, suite_name='', add_experimental=False,
Simran Basi5ace6f22016-01-06 17:30:44 -08001181 forgiving_parser=True, run_prod_code=False):
Chris Masone44e4d6c2012-08-15 14:25:53 -07001182 """
Dan Shi5783f8a2014-12-22 14:34:45 -08001183 Function to scan through all tests and find all tests.
Chris Masone44e4d6c2012-08-15 14:25:53 -07001184
xixuan0f7755d2016-04-18 14:49:12 -07001185 When this method is called with a file system ControlFileGetter, or
1186 enable_controls_in_batch is set as false, this function will looks at
Allen Lid69b9f02016-12-09 18:15:59 -08001187 control files returned by cf_getter.get_control_file_list() for tests.
xixuan0f7755d2016-04-18 14:49:12 -07001188
1189 If cf_getter is a File system ControlFileGetter, it performs a full
1190 parse of the root directory associated with the getter. This is the
1191 case when it's invoked from suite_preprocessor.
1192
1193 If cf_getter is a devserver getter it looks up the suite_name in a
1194 suite to control file map generated at build time, and parses the
1195 relevant control files alone. This lookup happens on the devserver,
1196 so as far as this method is concerned, both cases are equivalent. If
1197 enable_controls_in_batch is switched on, this function will call
1198 cf_getter.get_suite_info() to get a dict of control files and contents
1199 in batch.
Chris Masone44e4d6c2012-08-15 14:25:53 -07001200
1201 @param cf_getter: a control_file_getter.ControlFileGetter used to list
1202 and fetch the content of control files
Dan Shi5783f8a2014-12-22 14:34:45 -08001203 @param suite_name: If specified, this method will attempt to restrain
1204 the search space to just this suite's control files.
1205 @param add_experimental: add tests with experimental attribute set.
1206 @param forgiving_parser: If False, will raise ControlVariableExceptions
1207 if any are encountered when parsing control
1208 files. Note that this can raise an exception
1209 for syntax errors in unrelated files, because
1210 we parse them before applying the predicate.
Simran Basi5ace6f22016-01-06 17:30:44 -08001211 @param run_prod_code: If true, the suite will run the test code that
1212 lives in prod aka the test code currently on the
1213 lab servers by disabling SSP for the discovered
1214 tests.
Dan Shi5783f8a2014-12-22 14:34:45 -08001215
1216 @raises ControlVariableException: If forgiving_parser is False and there
1217 is a syntax error in a control file.
1218
1219 @returns a dictionary of ControlData objects that based on given
1220 parameters.
1221 """
1222 logging.debug('Getting control file list for suite: %s', suite_name)
1223 tests = {}
xixuan0f7755d2016-04-18 14:49:12 -07001224 use_batch = (ENABLE_CONTROLS_IN_BATCH and hasattr(
1225 cf_getter, '_dev_server'))
1226 if use_batch:
1227 suite_info = cf_getter.get_suite_info(suite_name=suite_name)
1228 files = suite_info.keys()
1229 else:
1230 files = cf_getter.get_control_file_list(suite_name=suite_name)
1231
Dan Shi5783f8a2014-12-22 14:34:45 -08001232
1233 logging.debug('Parsing control files ...')
1234 matcher = re.compile(r'[^/]+/(deps|profilers)/.+')
1235 for file in filter(lambda f: not matcher.match(f), files):
xixuan0f7755d2016-04-18 14:49:12 -07001236 if use_batch:
1237 text = suite_info[file]
1238 else:
1239 text = cf_getter.get_control_file_contents(file)
Dan Shi5783f8a2014-12-22 14:34:45 -08001240 try:
1241 found_test = control_data.parse_control_string(
Christopher Wiley10439d82016-03-07 12:45:26 -08001242 text, raise_warnings=True, path=file)
Dan Shi5783f8a2014-12-22 14:34:45 -08001243 if not add_experimental and found_test.experimental:
1244 continue
1245 found_test.text = text
Simran Basi5ace6f22016-01-06 17:30:44 -08001246 if run_prod_code:
1247 found_test.require_ssp = False
Dan Shi5783f8a2014-12-22 14:34:45 -08001248 tests[file] = found_test
1249 except control_data.ControlVariableException, e:
1250 if not forgiving_parser:
1251 msg = "Failed parsing %s\n%s" % (file, e)
1252 raise control_data.ControlVariableException(msg)
1253 logging.warning("Skipping %s\n%s", file, e)
1254 except Exception, e:
1255 logging.error("Bad %s\n%s", file, e)
1256 return tests
1257
1258
Allen Lie61acfe2016-12-29 16:27:21 -08001259 @classmethod
1260 def find_and_parse_tests(cls, cf_getter, predicate, suite_name='',
Simran Basi5ace6f22016-01-06 17:30:44 -08001261 add_experimental=False, forgiving_parser=True,
1262 run_prod_code=False):
Dan Shi5783f8a2014-12-22 14:34:45 -08001263 """
1264 Function to scan through all tests and find eligible tests.
1265
1266 Search through all tests based on given cf_getter, suite_name,
1267 add_experimental and forgiving_parser, return the tests that match
1268 given predicate.
1269
1270 @param cf_getter: a control_file_getter.ControlFileGetter used to list
1271 and fetch the content of control files
Chris Masone44e4d6c2012-08-15 14:25:53 -07001272 @param predicate: a function that should return True when run over a
1273 ControlData representation of a control file that should be in
1274 this Suite.
beepsc594c1c2013-07-09 22:33:18 -07001275 @param suite_name: If specified, this method will attempt to restrain
1276 the search space to just this suite's control files.
Chris Masone44e4d6c2012-08-15 14:25:53 -07001277 @param add_experimental: add tests with experimental attribute set.
beeps89f1e062013-09-18 12:00:17 -07001278 @param forgiving_parser: If False, will raise ControlVariableExceptions
1279 if any are encountered when parsing control
1280 files. Note that this can raise an exception
1281 for syntax errors in unrelated files, because
1282 we parse them before applying the predicate.
Simran Basi5ace6f22016-01-06 17:30:44 -08001283 @param run_prod_code: If true, the suite will run the test code that
1284 lives in prod aka the test code currently on the
1285 lab servers by disabling SSP for the discovered
1286 tests.
beeps89f1e062013-09-18 12:00:17 -07001287
1288 @raises ControlVariableException: If forgiving_parser is False and there
1289 is a syntax error in a control file.
Chris Masone44e4d6c2012-08-15 14:25:53 -07001290
1291 @return list of ControlData objects that should be run, with control
Dan Shief5b53f2013-01-22 10:22:01 -08001292 file text added in |text| attribute. Results are sorted based
1293 on the TIME setting in control file, slowest test comes first.
Chris Masone44e4d6c2012-08-15 14:25:53 -07001294 """
Allen Lie61acfe2016-12-29 16:27:21 -08001295 tests = cls._find_all_tests(cf_getter, suite_name, add_experimental,
1296 forgiving_parser,
1297 run_prod_code=run_prod_code)
Dan Shi5783f8a2014-12-22 14:34:45 -08001298 logging.debug('Parsed %s control files.', len(tests))
Dan Shief5b53f2013-01-22 10:22:01 -08001299 tests = [test for test in tests.itervalues() if predicate(test)]
1300 tests.sort(key=lambda t:
1301 control_data.ControlData.get_test_time_index(t.time),
1302 reverse=True)
1303 return tests
Dan Shi5783f8a2014-12-22 14:34:45 -08001304
1305
Allen Lie61acfe2016-12-29 16:27:21 -08001306 @classmethod
1307 def find_possible_tests(cls, cf_getter, predicate, suite_name='', count=10):
Dan Shi5783f8a2014-12-22 14:34:45 -08001308 """
1309 Function to scan through all tests and find possible tests.
1310
1311 Search through all tests based on given cf_getter, suite_name,
1312 add_experimental and forgiving_parser. Use the given predicate to
1313 calculate the similarity and return the top 10 matches.
1314
1315 @param cf_getter: a control_file_getter.ControlFileGetter used to list
1316 and fetch the content of control files
1317 @param predicate: a function that should return a tuple of (name, ratio)
1318 when run over a ControlData representation of a control file that
1319 should be in this Suite. `name` is the key to be compared, e.g.,
1320 a suite name or test name. `ratio` is a value between [0,1]
1321 indicating the similarity of `name` and the value to be compared.
1322 @param suite_name: If specified, this method will attempt to restrain
1323 the search space to just this suite's control files.
1324 @param count: Number of suggestions to return, default to 10.
1325
1326 @return list of top names that similar to the given test, sorted by
1327 match ratio.
1328 """
Allen Lie61acfe2016-12-29 16:27:21 -08001329 tests = cls._find_all_tests(cf_getter, suite_name,
1330 add_experimental=True,
1331 forgiving_parser=True)
Dan Shi5783f8a2014-12-22 14:34:45 -08001332 logging.debug('Parsed %s control files.', len(tests))
1333 similarities = {}
1334 for test in tests.itervalues():
1335 ratios = predicate(test)
1336 # Some predicates may return a list of tuples, e.g.,
1337 # name_in_tag_similarity_predicate. Convert all returns to a list.
1338 if not isinstance(ratios, list):
1339 ratios = [ratios]
1340 for name, ratio in ratios:
1341 similarities[name] = ratio
1342 return [s[0] for s in
1343 sorted(similarities.items(), key=operator.itemgetter(1),
1344 reverse=True)][:count]
Allen Li9fcd4b42016-12-12 16:15:14 -08001345
1346
Allen Lif4cb5ec2017-01-03 16:58:12 -08001347class _DynamicSuiteDiscoverer(object):
1348 """Test discoverer for dynamic suite tests."""
1349
1350
1351 def __init__(self, tests, add_experimental=True):
1352 """Initialize instance.
1353
1354 @param tests: iterable of tests (ControlData objects)
1355 @param add_experimental: schedule experimental tests as well, or not.
1356 """
1357 self._tests = list(tests)
1358 self._add_experimental = add_experimental
1359
1360
1361 def discover_tests(self):
1362 """Return a list of tests to be scheduled for this suite.
1363
1364 @returns: list of tests (ControlData objects)
1365 """
1366 tests = self.stable_tests
1367 if self._add_experimental:
1368 for test in self.unstable_tests:
1369 if not test.name.startswith(constants.EXPERIMENTAL_PREFIX):
1370 test.name = constants.EXPERIMENTAL_PREFIX + test.name
1371 tests.append(test)
1372 return tests
1373
1374
1375 @property
1376 def stable_tests(self):
1377 """Non-experimental tests.
1378
1379 @returns: list
1380 """
1381 return filter(lambda t: not t.experimental, self._tests)
1382
1383
1384 @property
1385 def unstable_tests(self):
1386 """Experimental tests.
1387
1388 @returns: list
1389 """
1390 return filter(lambda t: t.experimental, self._tests)
1391
1392
Allen Li9fcd4b42016-12-12 16:15:14 -08001393def _is_nonexistent_board_error(e):
1394 """Return True if error is caused by nonexistent board label.
1395
1396 As of this writing, the particular case we want looks like this:
1397
1398 1) e.problem_keys is a dictionary
1399 2) e.problem_keys['meta_hosts'] exists as the only key
1400 in the dictionary.
1401 3) e.problem_keys['meta_hosts'] matches this pattern:
1402 "Label "board:.*" not found"
1403
1404 We check for conditions 1) and 2) on the
1405 theory that they're relatively immutable.
1406 We don't check condition 3) because it seems
1407 likely to be a maintenance burden, and for the
1408 times when we're wrong, being right shouldn't
1409 matter enough (we _hope_).
1410
1411 @param e: proxy.ValidationError instance
1412 @returns: boolean
1413 """
1414 return (isinstance(e.problem_keys, dict)
1415 and len(e.problem_keys) == 1
1416 and 'meta_hosts' in e.problem_keys)