blob: 5c681c49319e7cff81252ce452d21d46db055295 [file] [log] [blame]
Chris Masone44e4d6c2012-08-15 14:25:53 -07001# Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
Fang Deng443f1952015-01-02 14:51:49 -08005import datetime
6import difflib
7import hashlib
8import logging
9import operator
10import os
11import re
Fang Deng443f1952015-01-02 14:51:49 -080012import sys
Chris Masone44e4d6c2012-08-15 14:25:53 -070013
14import common
15
J. Richard Barnetteb592fbc2014-04-02 10:27:33 -070016from autotest_lib.frontend.afe.json_rpc import proxy
Alex Miller3a69adc2012-12-19 13:38:31 -080017from autotest_lib.client.common_lib import control_data
Fang Denge3bc24b2014-03-17 15:19:46 -070018from autotest_lib.client.common_lib import enum
Dan Shidfea3682014-08-10 23:38:40 -070019from autotest_lib.client.common_lib import error
Simran Basi5ace6f22016-01-06 17:30:44 -080020from autotest_lib.client.common_lib import global_config
Alex Miller7d658cf2013-09-04 16:00:35 -070021from autotest_lib.client.common_lib import priorities
Dan Shidfea3682014-08-10 23:38:40 -070022from autotest_lib.client.common_lib import site_utils
23from autotest_lib.client.common_lib import time_utils
24from autotest_lib.client.common_lib import utils
Fang Denge3bc24b2014-03-17 15:19:46 -070025from autotest_lib.frontend.afe.json_rpc import proxy
Dan Shi36cfd832014-10-10 13:38:51 -070026from autotest_lib.server.cros import provision
Chris Masone44e4d6c2012-08-15 14:25:53 -070027from autotest_lib.server.cros.dynamic_suite import constants
28from autotest_lib.server.cros.dynamic_suite import control_file_getter
29from autotest_lib.server.cros.dynamic_suite import frontend_wrappers
Alex Miller3a69adc2012-12-19 13:38:31 -080030from autotest_lib.server.cros.dynamic_suite import job_status
J. Richard Barnettee7b98bb2013-08-21 16:34:16 -070031from autotest_lib.server.cros.dynamic_suite import tools
32from autotest_lib.server.cros.dynamic_suite.job_status import Status
Chris Masone44e4d6c2012-08-15 14:25:53 -070033
Shuqian Zhaoab468812015-04-08 14:40:38 -070034try:
35 from chromite.lib import boolparse_lib
36 from chromite.lib import cros_logging as logging
37except ImportError:
38 print 'Unable to import chromite.'
39 print 'This script must be either:'
40 print ' - Be run in the chroot.'
41 print ' - (not yet supported) be run after running '
42 print ' ../utils/build_externals.py'
Fang Denge3bc24b2014-03-17 15:19:46 -070043
Shuqian Zhao490f78f2016-01-20 13:18:40 -080044_FILE_BUG_SUITES = ['au', 'bvt', 'bvt-cq', 'bvt-inline', 'paygen_au_beta',
45 'paygen_au_canary', 'paygen_au_dev', 'paygen_au_stable',
46 'sanity', 'push_to_prod']
Simran Basi5ace6f22016-01-06 17:30:44 -080047_AUTOTEST_DIR = global_config.global_config.get_config_value(
48 'SCHEDULER', 'drone_installation_directory')
xixuan0f7755d2016-04-18 14:49:12 -070049ENABLE_CONTROLS_IN_BATCH = global_config.global_config.get_config_value(
50 'CROS', 'enable_getting_controls_in_batch', type=bool, default=False)
Shuqian Zhaoe33ba4a2015-09-11 18:51:43 -070051
Fang Denge3bc24b2014-03-17 15:19:46 -070052class RetryHandler(object):
53 """Maintain retry information.
54
55 @var _retry_map: A dictionary that stores retry history.
56 The key is afe job id. The value is a dictionary.
57 {job_id: {'state':RetryHandler.States, 'retry_max':int}}
58 - state:
59 The retry state of a job.
60 NOT_ATTEMPTED:
61 We haven't done anything about the job.
62 ATTEMPTED:
63 We've made an attempt to schedule a retry job. The
64 scheduling may or may not be successful, e.g.
65 it might encounter an rpc error. Note failure
66 in scheduling a retry is different from a retry job failure.
67 For each job, we only attempt to schedule a retry once.
68 For example, assume we have a test with JOB_RETRIES=5 and
69 its second retry job failed. When we attempt to create
70 a third retry job to retry the second, we hit an rpc
71 error. In such case, we will give up on all following
72 retries.
73 RETRIED:
74 A retry job has already been successfully
75 scheduled.
76 - retry_max:
77 The maximum of times the job can still
78 be retried, taking into account retries
79 that have occurred.
80 @var _retry_level: A retry might be triggered only if the result
81 is worse than the level.
Fang Deng443f1952015-01-02 14:51:49 -080082 @var _max_retries: Maximum retry limit at suite level.
83 Regardless how many times each individual test
84 has been retried, the total number of retries happening in
85 the suite can't exceed _max_retries.
Fang Denge3bc24b2014-03-17 15:19:46 -070086 """
87
88 States = enum.Enum('NOT_ATTEMPTED', 'ATTEMPTED', 'RETRIED',
89 start_value=1, step=1)
90
Fang Deng443f1952015-01-02 14:51:49 -080091 def __init__(self, initial_jobs_to_tests, retry_level='WARN',
92 max_retries=None):
Fang Denge3bc24b2014-03-17 15:19:46 -070093 """Initialize RetryHandler.
94
95 @param initial_jobs_to_tests: A dictionary that maps a job id to
96 a ControlData object. This dictionary should contain
97 jobs that are originally scheduled by the suite.
98 @param retry_level: A retry might be triggered only if the result is
99 worse than the level.
Fang Deng443f1952015-01-02 14:51:49 -0800100 @param max_retries: Integer, maxmium total retries allowed
101 for the suite. Default to None, no max.
Fang Denge3bc24b2014-03-17 15:19:46 -0700102 """
103 self._retry_map = {}
104 self._retry_level = retry_level
Fang Deng443f1952015-01-02 14:51:49 -0800105 self._max_retries = (max_retries
106 if max_retries is not None else sys.maxint)
Fang Denge3bc24b2014-03-17 15:19:46 -0700107 for job_id, test in initial_jobs_to_tests.items():
108 if test.job_retries > 0:
109 self.add_job(new_job_id=job_id,
110 retry_max=test.job_retries)
111
112
113 def add_job(self, new_job_id, retry_max):
114 """Add a newly-created job to the retry map.
115
116 @param new_job_id: The afe_job_id of a newly created job.
117 @param retry_max: The maximum of times that we could retry
118 the test if the job fails.
119
120 @raises ValueError if new_job_id is already in retry map.
121
122 """
123 if new_job_id in self._retry_map:
124 raise ValueError('add_job called when job is already in retry map.')
125
126 self._retry_map[new_job_id] = {
127 'state': self.States.NOT_ATTEMPTED,
128 'retry_max': retry_max}
129
130
Fang Deng443f1952015-01-02 14:51:49 -0800131 def suite_max_reached(self):
132 """Return whether maximum retry limit for a suite has been reached."""
Fang Denge4326d62015-01-06 13:15:15 -0800133 return self._max_retries <= 0
Fang Deng443f1952015-01-02 14:51:49 -0800134
135
Fang Denge3bc24b2014-03-17 15:19:46 -0700136 def should_retry(self, result):
137 """Check whether we should retry a job based on its result.
138
139 We will retry the job that corresponds to the result
140 when all of the following are true.
141 a) The test was actually executed, meaning that if
142 a job was aborted before it could ever reach the state
143 of 'Running', the job will not be retried.
144 b) The result is worse than |self._retry_level| which
145 defaults to 'WARN'.
146 c) The test requires retry, i.e. the job has an entry in the retry map.
147 d) We haven't made any retry attempt yet, i.e. state == NOT_ATTEMPTED
148 Note that if a test has JOB_RETRIES=5, and the second time
149 it was retried it hit an rpc error, we will give up on
150 all following retries.
151 e) The job has not reached its retry max, i.e. retry_max > 0
152
153 @param result: A result, encapsulating the status of the job.
154
155 @returns: True if we should retry the job.
156
157 """
Allen Li2ba82812016-11-14 14:52:56 -0800158 return (
159 not self.suite_max_reached()
160 and result.test_executed
161 and result.is_worse_than(
162 job_status.Status(self._retry_level, '', 'reason'))
163 and result.id in self._retry_map
164 and self._retry_map[result.id]['state'] == self.States.NOT_ATTEMPTED
165 and self._retry_map[result.id]['retry_max'] > 0
166 )
Fang Denge3bc24b2014-03-17 15:19:46 -0700167
168
169 def add_retry(self, old_job_id, new_job_id):
170 """Record a retry.
171
172 Update retry map with the retry information.
173
174 @param old_job_id: The afe_job_id of the job that is retried.
175 @param new_job_id: The afe_job_id of the retry job.
176
177 @raises KeyError if old_job_id isn't in the retry map.
178 @raises ValueError if we have already retried or made an attempt
179 to retry the old job.
180
181 """
182 old_record = self._retry_map[old_job_id]
183 if old_record['state'] != self.States.NOT_ATTEMPTED:
184 raise ValueError(
185 'We have already retried or attempted to retry job %d' %
186 old_job_id)
187 old_record['state'] = self.States.RETRIED
188 self.add_job(new_job_id=new_job_id,
189 retry_max=old_record['retry_max'] - 1)
Fang Deng443f1952015-01-02 14:51:49 -0800190 self._max_retries -= 1
Fang Denge3bc24b2014-03-17 15:19:46 -0700191
192
193 def set_attempted(self, job_id):
194 """Set the state of the job to ATTEMPTED.
195
196 @param job_id: afe_job_id of a job.
197
198 @raises KeyError if job_id isn't in the retry map.
199 @raises ValueError if the current state is not NOT_ATTEMPTED.
200
201 """
202 current_state = self._retry_map[job_id]['state']
203 if current_state != self.States.NOT_ATTEMPTED:
204 # We are supposed to retry or attempt to retry each job
205 # only once. Raise an error if this is not the case.
206 raise ValueError('Unexpected state transition: %s -> %s' %
207 (self.States.get_string(current_state),
208 self.States.get_string(self.States.ATTEMPTED)))
209 else:
210 self._retry_map[job_id]['state'] = self.States.ATTEMPTED
211
212
213 def has_following_retry(self, result):
214 """Check whether there will be a following retry.
215
216 We have the following cases for a given job id (result.id),
217 - no retry map entry -> retry not required, no following retry
218 - has retry map entry:
219 - already retried -> has following retry
220 - has not retried
221 (this branch can be handled by checking should_retry(result))
222 - retry_max == 0 --> the last retry job, no more retry
223 - retry_max > 0
224 - attempted, but has failed in scheduling a
225 following retry due to rpc error --> no more retry
226 - has not attempped --> has following retry if test failed.
227
228 @param result: A result, encapsulating the status of the job.
229
230 @returns: True, if there will be a following retry.
231 False otherwise.
232
233 """
234 return (result.test_executed and result.id in self._retry_map and (
235 self._retry_map[result.id]['state'] == self.States.RETRIED or
236 self.should_retry(result)))
237
238
239 def get_retry_max(self, job_id):
240 """Get the maximum times the job can still be retried.
241
242 @param job_id: afe_job_id of a job.
243
244 @returns: An int, representing the maximum times the job can still be
245 retried.
246 @raises KeyError if job_id isn't in the retry map.
247
248 """
249 return self._retry_map[job_id]['retry_max']
250
251
Chris Masone44e4d6c2012-08-15 14:25:53 -0700252class Suite(object):
253 """
254 A suite of tests, defined by some predicate over control file variables.
255
256 Given a place to search for control files a predicate to match the desired
257 tests, can gather tests and fire off jobs to run them, and then wait for
258 results.
259
260 @var _predicate: a function that should return True when run over a
261 ControlData representation of a control file that should be in
262 this Suite.
263 @var _tag: a string with which to tag jobs run in this suite.
Dan Shi36cfd832014-10-10 13:38:51 -0700264 @var _builds: the builds on which we're running this suite.
Chris Masone44e4d6c2012-08-15 14:25:53 -0700265 @var _afe: an instance of AFE as defined in server/frontend.py.
266 @var _tko: an instance of TKO as defined in server/frontend.py.
267 @var _jobs: currently scheduled jobs, if any.
Fang Denge3bc24b2014-03-17 15:19:46 -0700268 @var _jobs_to_tests: a dictionary that maps job ids to tests represented
269 ControlData objects.
Chris Masone44e4d6c2012-08-15 14:25:53 -0700270 @var _cf_getter: a control_file_getter.ControlFileGetter
Fang Denge3bc24b2014-03-17 15:19:46 -0700271 @var _retry: a bool value indicating whether jobs should be retried on
272 failure.
273 @var _retry_handler: a RetryHandler object.
274
Chris Masone44e4d6c2012-08-15 14:25:53 -0700275 """
276
277
278 @staticmethod
Chris Sosaaccb5ce2012-08-30 17:29:15 -0700279 def create_ds_getter(build, devserver):
Chris Masone44e4d6c2012-08-15 14:25:53 -0700280 """
281 @param build: the build on which we're running this suite.
Chris Sosaaccb5ce2012-08-30 17:29:15 -0700282 @param devserver: the devserver which contains the build.
Chris Masone44e4d6c2012-08-15 14:25:53 -0700283 @return a FileSystemGetter instance that looks under |autotest_dir|.
284 """
Chris Sosaaccb5ce2012-08-30 17:29:15 -0700285 return control_file_getter.DevServerGetter(build, devserver)
Chris Masone44e4d6c2012-08-15 14:25:53 -0700286
287
288 @staticmethod
289 def create_fs_getter(autotest_dir):
290 """
291 @param autotest_dir: the place to find autotests.
292 @return a FileSystemGetter instance that looks under |autotest_dir|.
293 """
294 # currently hard-coded places to look for tests.
295 subpaths = ['server/site_tests', 'client/site_tests',
296 'server/tests', 'client/tests']
297 directories = [os.path.join(autotest_dir, p) for p in subpaths]
298 return control_file_getter.FileSystemGetter(directories)
299
300
301 @staticmethod
302 def parse_tag(tag):
Aviv Keshet18308922013-02-19 17:49:49 -0800303 """Splits a string on ',' optionally surrounded by whitespace.
304 @param tag: string to split.
305 """
Chris Masone44e4d6c2012-08-15 14:25:53 -0700306 return map(lambda x: x.strip(), tag.split(','))
307
308
309 @staticmethod
310 def name_in_tag_predicate(name):
311 """Returns predicate that takes a control file and looks for |name|.
312
313 Builds a predicate that takes in a parsed control file (a ControlData)
314 and returns True if the SUITE tag is present and contains |name|.
315
316 @param name: the suite name to base the predicate on.
317 @return a callable that takes a ControlData and looks for |name| in that
318 ControlData object's suite member.
319 """
Dan Shi5783f8a2014-12-22 14:34:45 -0800320 return lambda t: (hasattr(t, 'suite') and
321 name in Suite.parse_tag(t.suite))
322
323
324 @staticmethod
325 def name_in_tag_similarity_predicate(name):
326 """Returns predicate that takes a control file and gets the similarity
327 of the suites in the control file and the given name.
328
329 Builds a predicate that takes in a parsed control file (a ControlData)
330 and returns a list of tuples of (suite name, ratio), where suite name
331 is each suite listed in the control file, and ratio is the similarity
332 between each suite and the given name.
333
334 @param name: the suite name to base the predicate on.
335 @return a callable that takes a ControlData and returns a list of tuples
336 of (suite name, ratio), where suite name is each suite listed in
337 the control file, and ratio is the similarity between each suite
338 and the given name.
339 """
340 return lambda t: ((None, 0) if not hasattr(t, 'suite') else
341 [(suite,
342 difflib.SequenceMatcher(a=suite, b=name).ratio())
343 for suite in Suite.parse_tag(t.suite)])
Chris Masone44e4d6c2012-08-15 14:25:53 -0700344
345
346 @staticmethod
Aviv Keshet40222a42013-06-04 16:25:49 -0700347 def not_in_blacklist_predicate(blacklist):
348 """Returns predicate that takes a control file and looks for its
349 path to not be in given blacklist.
350
351 @param blacklist: A list of strings both paths on control_files that
352 should be blacklisted.
353
354 @return a callable that takes a ControlData and looks for it to be
355 absent from blacklist.
356 """
357 return lambda t: hasattr(t, 'path') and \
358 not any(b.endswith(t.path) for b in blacklist)
359
360
361 @staticmethod
362 def test_name_equals_predicate(test_name):
363 """Returns predicate that matched based on a test's name.
364
365 Builds a predicate that takes in a parsed control file (a ControlData)
366 and returns True if the test name is equal to |test_name|.
367
368 @param test_name: the test name to base the predicate on.
369 @return a callable that takes a ControlData and looks for |test_name|
370 in that ControlData's name.
371 """
372 return lambda t: hasattr(t, 'name') and test_name == t.name
373
374
375 @staticmethod
Aviv Kesheta6adc7a2013-08-30 11:13:38 -0700376 def test_name_matches_pattern_predicate(test_name_pattern):
377 """Returns predicate that matches based on a test's name pattern.
378
379 Builds a predicate that takes in a parsed control file (a ControlData)
380 and returns True if the test name matches the given regular expression.
381
382 @param test_name_pattern: regular expression (string) to match against
383 test names.
384 @return a callable that takes a ControlData and returns
385 True if the name fields matches the pattern.
386 """
387 return lambda t: hasattr(t, 'name') and re.match(test_name_pattern,
388 t.name)
389
390
391 @staticmethod
392 def test_file_matches_pattern_predicate(test_file_pattern):
393 """Returns predicate that matches based on a test's file name pattern.
394
395 Builds a predicate that takes in a parsed control file (a ControlData)
396 and returns True if the test's control file name matches the given
397 regular expression.
398
399 @param test_file_pattern: regular expression (string) to match against
400 control file names.
401 @return a callable that takes a ControlData and and returns
402 True if control file name matches the pattern.
403 """
404 return lambda t: hasattr(t, 'path') and re.match(test_file_pattern,
405 t.path)
406
407
408 @staticmethod
Shuqian Zhaoab468812015-04-08 14:40:38 -0700409 def matches_attribute_expression_predicate(test_attr_boolstr):
410 """Returns predicate that matches based on boolean expression of
411 attributes.
412
413 Builds a predicate that takes in a parsed control file (a ControlData)
414 ans returns True if the test attributes satisfy the given attribute
415 boolean expression.
416
417 @param test_attr_boolstr: boolean expression of the attributes to be
418 test, like 'system:all and interval:daily'.
419
420 @return a callable that takes a ControlData and returns True if the test
421 attributes satisfy the given boolean expression.
422 """
423 return lambda t: boolparse_lib.BoolstrResult(
424 test_attr_boolstr, t.attributes)
425
426 @staticmethod
Dan Shi5783f8a2014-12-22 14:34:45 -0800427 def test_name_similarity_predicate(test_name):
428 """Returns predicate that matched based on a test's name.
429
430 Builds a predicate that takes in a parsed control file (a ControlData)
431 and returns a tuple of (test name, ratio), where ratio is the similarity
432 between the test name and the given test_name.
433
434 @param test_name: the test name to base the predicate on.
435 @return a callable that takes a ControlData and returns a tuple of
436 (test name, ratio), where ratio is the similarity between the
437 test name and the given test_name.
438 """
439 return lambda t: ((None, 0) if not hasattr(t, 'name') else
440 (t.name,
441 difflib.SequenceMatcher(a=t.name, b=test_name).ratio()))
442
443
444 @staticmethod
445 def test_file_similarity_predicate(test_file_pattern):
446 """Returns predicate that gets the similarity based on a test's file
447 name pattern.
448
449 Builds a predicate that takes in a parsed control file (a ControlData)
450 and returns a tuple of (file path, ratio), where ratio is the
451 similarity between the test file name and the given test_file_pattern.
452
453 @param test_file_pattern: regular expression (string) to match against
454 control file names.
455 @return a callable that takes a ControlData and and returns a tuple of
456 (file path, ratio), where ratio is the similarity between the
457 test file name and the given test_file_pattern.
458 """
459 return lambda t: ((None, 0) if not hasattr(t, 'path') else
460 (t.path, difflib.SequenceMatcher(a=t.path,
461 b=test_file_pattern).ratio()))
462
463
464 @staticmethod
Chris Sosaaccb5ce2012-08-30 17:29:15 -0700465 def list_all_suites(build, devserver, cf_getter=None):
Chris Masone44e4d6c2012-08-15 14:25:53 -0700466 """
467 Parses all ControlData objects with a SUITE tag and extracts all
468 defined suite names.
469
Chris Sosaaccb5ce2012-08-30 17:29:15 -0700470 @param build: the build on which we're running this suite.
471 @param devserver: the devserver which contains the build.
Chris Masone44e4d6c2012-08-15 14:25:53 -0700472 @param cf_getter: control_file_getter.ControlFileGetter. Defaults to
473 using DevServerGetter.
474
475 @return list of suites
476 """
477 if cf_getter is None:
Chris Sosaaccb5ce2012-08-30 17:29:15 -0700478 cf_getter = Suite.create_ds_getter(build, devserver)
Chris Masone44e4d6c2012-08-15 14:25:53 -0700479
480 suites = set()
481 predicate = lambda t: hasattr(t, 'suite')
482 for test in Suite.find_and_parse_tests(cf_getter, predicate,
483 add_experimental=True):
484 suites.update(Suite.parse_tag(test.suite))
485 return list(suites)
486
487
488 @staticmethod
Dan Shi36cfd832014-10-10 13:38:51 -0700489 def get_test_source_build(builds, **dargs):
490 """Get the build of test code.
491
492 Get the test source build from arguments. If parameter
493 `test_source_build` is set and has a value, return its value. Otherwise
494 returns the ChromeOS build name if it exists. If ChromeOS build is not
495 specified either, raise SuiteArgumentException.
496
497 @param builds: the builds on which we're running this suite. It's a
498 dictionary of version_prefix:build.
499 @param **dargs: Any other Suite constructor parameters, as described
500 in Suite.__init__ docstring.
501
502 @return: The build contains the test code.
503 @raise: SuiteArgumentException if both test_source_build and ChromeOS
504 build are not specified.
505
506 """
507 if dargs.get('test_source_build', None):
508 return dargs['test_source_build']
509 test_source_build = builds.get(provision.CROS_VERSION_PREFIX, None)
510 if not test_source_build:
511 raise error.SuiteArgumentException(
512 'test_source_build must be specified if CrOS build is not '
513 'specified.')
514 return test_source_build
515
516
517 @staticmethod
518 def create_from_predicates(predicates, builds, board, devserver,
Simran Basi5ace6f22016-01-06 17:30:44 -0800519 cf_getter=None, name='ad_hoc_suite',
520 run_prod_code=False, **dargs):
Aviv Keshet69ebb6c2013-06-11 13:58:44 -0700521 """
522 Create a Suite using a given predicate test filters.
523
524 Uses supplied predicate(s) to instantiate a Suite. Looks for tests in
525 |autotest_dir| and will schedule them using |afe|. Pulls control files
526 from the default dev server. Results will be pulled from |tko| upon
527 completion.
528
529 @param predicates: A list of callables that accept ControlData
530 representations of control files. A test will be
Aviv Keshet938a6772013-07-25 14:05:45 -0700531 included in suite if all callables in this list
Aviv Keshet69ebb6c2013-06-11 13:58:44 -0700532 return True on the given control file.
Dan Shi36cfd832014-10-10 13:38:51 -0700533 @param builds: the builds on which we're running this suite. It's a
534 dictionary of version_prefix:build.
Alex Millera0913072013-06-12 10:01:51 -0700535 @param board: the board on which we're running this suite.
Aviv Keshet69ebb6c2013-06-11 13:58:44 -0700536 @param devserver: the devserver which contains the build.
537 @param cf_getter: control_file_getter.ControlFileGetter. Defaults to
538 using DevServerGetter.
539 @param name: name of suite. Defaults to 'ad_hoc_suite'
Simran Basi5ace6f22016-01-06 17:30:44 -0800540 @param run_prod_code: If true, the suite will run the tests that
541 lives in prod aka the test code currently on the
542 lab servers.
Aviv Keshet69ebb6c2013-06-11 13:58:44 -0700543 @param **dargs: Any other Suite constructor parameters, as described
544 in Suite.__init__ docstring.
545 @return a Suite instance.
546 """
547 if cf_getter is None:
Simran Basi5ace6f22016-01-06 17:30:44 -0800548 if run_prod_code:
549 cf_getter = Suite.create_fs_getter(_AUTOTEST_DIR)
550 else:
Dan Shi2121a332016-02-25 14:22:22 -0800551 build = Suite.get_test_source_build(builds, **dargs)
Simran Basi5ace6f22016-01-06 17:30:44 -0800552 cf_getter = Suite.create_ds_getter(build, devserver)
Aviv Keshet69ebb6c2013-06-11 13:58:44 -0700553
554 return Suite(predicates,
Simran Basi5ace6f22016-01-06 17:30:44 -0800555 name, builds, board, cf_getter, run_prod_code, **dargs)
Aviv Keshet69ebb6c2013-06-11 13:58:44 -0700556
557
558 @staticmethod
Dan Shi36cfd832014-10-10 13:38:51 -0700559 def create_from_name(name, builds, board, devserver, cf_getter=None,
Alex Millera0913072013-06-12 10:01:51 -0700560 **dargs):
Chris Masone44e4d6c2012-08-15 14:25:53 -0700561 """
562 Create a Suite using a predicate based on the SUITE control file var.
563
564 Makes a predicate based on |name| and uses it to instantiate a Suite
565 that looks for tests in |autotest_dir| and will schedule them using
566 |afe|. Pulls control files from the default dev server.
567 Results will be pulled from |tko| upon completion.
568
569 @param name: a value of the SUITE control file variable to search for.
Dan Shi36cfd832014-10-10 13:38:51 -0700570 @param builds: the builds on which we're running this suite. It's a
571 dictionary of version_prefix:build.
Alex Millera0913072013-06-12 10:01:51 -0700572 @param board: the board on which we're running this suite.
Chris Sosaaccb5ce2012-08-30 17:29:15 -0700573 @param devserver: the devserver which contains the build.
Aviv Keshet813d6782013-06-04 17:11:03 -0700574 @param cf_getter: control_file_getter.ControlFileGetter. Defaults to
575 using DevServerGetter.
576 @param **dargs: Any other Suite constructor parameters, as described
577 in Suite.__init__ docstring.
Chris Masone44e4d6c2012-08-15 14:25:53 -0700578 @return a Suite instance.
579 """
580 if cf_getter is None:
Dan Shi36cfd832014-10-10 13:38:51 -0700581 build = Suite.get_test_source_build(builds, **dargs)
Chris Sosaaccb5ce2012-08-30 17:29:15 -0700582 cf_getter = Suite.create_ds_getter(build, devserver)
583
Aviv Keshet40222a42013-06-04 16:25:49 -0700584 return Suite([Suite.name_in_tag_predicate(name)],
Dan Shi36cfd832014-10-10 13:38:51 -0700585 name, builds, board, cf_getter, **dargs)
Chris Masone44e4d6c2012-08-15 14:25:53 -0700586
587
Allen Li6fff5502016-12-09 18:04:26 -0800588 def __init__(
589 self,
590 predicates,
591 tag,
592 builds,
593 board,
594 cf_getter,
595 run_prod_code=False,
596 afe=None,
597 tko=None,
598 pool=None,
599 results_dir=None,
600 max_runtime_mins=24*60,
601 timeout_mins=24*60,
602 file_bugs=False,
603 file_experimental_bugs=False,
604 suite_job_id=None,
605 ignore_deps=False,
Allen Li493eefa2016-12-09 18:05:35 -0800606 extra_deps=None,
Allen Li6fff5502016-12-09 18:04:26 -0800607 priority=priorities.Priority.DEFAULT,
608 forgiving_parser=True,
609 wait_for_results=True,
610 job_retry=False,
611 max_retries=sys.maxint,
612 offload_failures_only=False,
613 test_source_build=None
614 ):
Chris Masone44e4d6c2012-08-15 14:25:53 -0700615 """
616 Constructor
617
Aviv Keshet40222a42013-06-04 16:25:49 -0700618 @param predicates: A list of callables that accept ControlData
619 representations of control files. A test will be
620 included in suite is all callables in this list
621 return True on the given control file.
Chris Masone44e4d6c2012-08-15 14:25:53 -0700622 @param tag: a string with which to tag jobs run in this suite.
Dan Shi36cfd832014-10-10 13:38:51 -0700623 @param builds: the builds on which we're running this suite.
Alex Millera0913072013-06-12 10:01:51 -0700624 @param board: the board on which we're running this suite.
Chris Masone44e4d6c2012-08-15 14:25:53 -0700625 @param cf_getter: a control_file_getter.ControlFileGetter
626 @param afe: an instance of AFE as defined in server/frontend.py.
627 @param tko: an instance of TKO as defined in server/frontend.py.
628 @param pool: Specify the pool of machines to use for scheduling
629 purposes.
Simran Basi5ace6f22016-01-06 17:30:44 -0800630 @param run_prod_code: If true, the suite will run the test code that
631 lives in prod aka the test code currently on the
632 lab servers.
Chris Masone44e4d6c2012-08-15 14:25:53 -0700633 @param results_dir: The directory where the job can write results to.
634 This must be set if you want job_id of sub-jobs
635 list in the job keyvals.
Aviv Keshet18308922013-02-19 17:49:49 -0800636 @param max_runtime_mins: Maximum suite runtime, in minutes.
Alex Miller028b0312013-09-07 15:25:45 -0700637 @param timeout: Maximum job lifetime, in hours.
Aviv Keshet18308922013-02-19 17:49:49 -0800638 @param suite_job_id: Job id that will act as parent id to all sub jobs.
639 Default: None
Aviv Keshetd7959f32013-05-17 15:58:43 -0700640 @param ignore_deps: True if jobs should ignore the DEPENDENCIES
641 attribute and skip applying of dependency labels.
642 (Default:False)
Alex Miller47a03672013-08-27 09:09:53 -0700643 @param extra_deps: A list of strings which are the extra DEPENDENCIES
644 to add to each test being scheduled.
Alex Miller7d658cf2013-09-04 16:00:35 -0700645 @param priority: Integer priority level. Higher is more important.
Dan Shi95122412013-11-12 16:20:33 -0800646 @param wait_for_results: Set to False to run the suite job without
647 waiting for test jobs to finish. Default is
648 True.
Fang Denge3bc24b2014-03-17 15:19:46 -0700649 @param job_retry: A bool value indicating whether jobs should be retired
650 on failure. If True, the field 'JOB_RETRIES' in
651 control files will be respected. If False, do not
652 retry.
Fang Deng443f1952015-01-02 14:51:49 -0800653 @param max_retries: Maximum retry limit at suite level.
654 Regardless how many times each individual test
655 has been retried, the total number of retries
656 happening in the suite can't exceed _max_retries.
657 Default to sys.maxint.
Simran Basi1e10e922015-04-16 15:09:56 -0700658 @param offload_failures_only: Only enable gs_offloading for failed
659 jobs.
Dan Shi36cfd832014-10-10 13:38:51 -0700660 @param test_source_build: Build that contains the server-side test code.
Simran Basi1e10e922015-04-16 15:09:56 -0700661
Chris Masone44e4d6c2012-08-15 14:25:53 -0700662 """
Allen Li493eefa2016-12-09 18:05:35 -0800663 if extra_deps is None:
664 extra_deps = []
665
Chris Masone44e4d6c2012-08-15 14:25:53 -0700666 self._tag = tag
Dan Shi36cfd832014-10-10 13:38:51 -0700667 self._builds = builds
Alex Millera0913072013-06-12 10:01:51 -0700668 self._board = board
Chris Masone44e4d6c2012-08-15 14:25:53 -0700669 self._cf_getter = cf_getter
670 self._results_dir = results_dir
671 self._afe = afe or frontend_wrappers.RetryingAFE(timeout_min=30,
672 delay_sec=10,
673 debug=False)
674 self._tko = tko or frontend_wrappers.RetryingTKO(timeout_min=30,
675 delay_sec=10,
676 debug=False)
677 self._pool = pool
678 self._jobs = []
Fang Denge3bc24b2014-03-17 15:19:46 -0700679 self._jobs_to_tests = {}
Allen Lid0833e32016-12-12 15:32:26 -0800680 self.tests = Suite.find_and_parse_tests(
Allen Li8a649092016-12-09 18:07:39 -0800681 self._cf_getter,
Allen Lid69b9f02016-12-09 18:15:59 -0800682 lambda control_data: all(f(control_data) for f in predicates),
Allen Li8a649092016-12-09 18:07:39 -0800683 self._tag,
684 add_experimental=True,
685 forgiving_parser=forgiving_parser,
686 run_prod_code=run_prod_code,
687 )
beeps89f1e062013-09-18 12:00:17 -0700688
Simran Basic68cda42012-11-19 17:03:18 -0800689 self._max_runtime_mins = max_runtime_mins
Simran Basi8705d672013-11-19 15:56:58 -0800690 self._timeout_mins = timeout_mins
Alex Millera3a4fe72013-01-22 09:57:47 -0800691 self._file_bugs = file_bugs
beepsda5b7112013-05-30 11:34:14 -0700692 self._file_experimental_bugs = file_experimental_bugs
Aviv Keshet18308922013-02-19 17:49:49 -0800693 self._suite_job_id = suite_job_id
Aviv Keshetd7959f32013-05-17 15:58:43 -0700694 self._ignore_deps = ignore_deps
Alex Miller47a03672013-08-27 09:09:53 -0700695 self._extra_deps = extra_deps
Alex Miller7d658cf2013-09-04 16:00:35 -0700696 self._priority = priority
Fang Denge3bc24b2014-03-17 15:19:46 -0700697 self._job_retry=job_retry
Fang Deng443f1952015-01-02 14:51:49 -0800698 self._max_retries = max_retries
Fang Denge3bc24b2014-03-17 15:19:46 -0700699 # RetryHandler to be initialized in schedule()
700 self._retry_handler = None
Dan Shi95122412013-11-12 16:20:33 -0800701 self.wait_for_results = wait_for_results
Simran Basi1e10e922015-04-16 15:09:56 -0700702 self._offload_failures_only = offload_failures_only
Dan Shi36cfd832014-10-10 13:38:51 -0700703 self._test_source_build = test_source_build
Alex Millera3a4fe72013-01-22 09:57:47 -0800704
Chris Masone44e4d6c2012-08-15 14:25:53 -0700705
706 @property
Chris Masone44e4d6c2012-08-15 14:25:53 -0700707 def stable_tests(self):
708 """
709 |self.tests|, filtered for non-experimental tests.
710 """
711 return filter(lambda t: not t.experimental, self.tests)
712
713
Allen Lif8e72e62016-12-12 15:27:21 -0800714 @property
Chris Masone44e4d6c2012-08-15 14:25:53 -0700715 def unstable_tests(self):
716 """
717 |self.tests|, filtered for experimental tests.
718 """
719 return filter(lambda t: t.experimental, self.tests)
720
721
Allen Lidb8eafe2016-12-12 16:33:58 -0800722 @property
723 def _cros_build(self):
724 """Return the CrOS build or the first build in the builds dict."""
725 # TODO(ayatane): Note that the builds dict isn't ordered. I'm not
726 # sure what the implications of this are, but it's probably not a
727 # good thing.
728 return self._builds.get(provision.CROS_VERSION_PREFIX,
729 self._builds.values()[0])
730
731
Fang Denge3bc24b2014-03-17 15:19:46 -0700732 def _create_job(self, test, retry_for=None):
Chris Masone44e4d6c2012-08-15 14:25:53 -0700733 """
734 Thin wrapper around frontend.AFE.create_job().
735
736 @param test: ControlData object for a test to run.
Fang Denge3bc24b2014-03-17 15:19:46 -0700737 @param retry_for: If the to-be-created job is a retry for an
738 old job, the afe_job_id of the old job will
739 be passed in as |retry_for|, which will be
740 recorded in the new job's keyvals.
741 @returns: A frontend.Job object with an added test_name member.
742 test_name is used to preserve the higher level TEST_NAME
743 name of the job.
Chris Masone44e4d6c2012-08-15 14:25:53 -0700744 """
Allen Li069fc252016-12-12 16:26:21 -0800745 test_obj = self._afe.create_job(
746 control_file=test.text,
Allen Li468d6152016-12-12 16:35:01 -0800747 name=tools.create_job_name(
748 self._test_source_build or self._cros_build,
749 self._tag,
750 test.name),
Allen Li069fc252016-12-12 16:26:21 -0800751 control_type=test.test_type.capitalize(),
752 meta_hosts=[self._board]*test.sync_count,
Allen Lic68ca4a2016-12-12 17:28:36 -0800753 dependencies=self._create_job_deps(test),
Allen Lia4ae1352016-12-12 16:26:57 -0800754 keyvals=self._create_keyvals_for_test_job(test, retry_for),
Allen Li069fc252016-12-12 16:26:21 -0800755 max_runtime_mins=self._max_runtime_mins,
756 timeout_mins=self._timeout_mins,
757 parent_job_id=self._suite_job_id,
758 test_retry=test.retries,
759 priority=self._priority,
760 synch_count=test.sync_count,
761 require_ssp=test.require_ssp)
762
763 test_obj.test_name = test.name
764 return test_obj
765
766
Allen Lic68ca4a2016-12-12 17:28:36 -0800767 def _create_job_deps(self, test):
768 """Create job deps list for a test job.
769
770 @returns: A list of dependency strings.
771 """
772 if self._ignore_deps:
773 job_deps = []
774 else:
775 job_deps = list(test.dependencies)
776 job_deps.extend(self._extra_deps)
777 if self._pool:
778 job_deps.append(self._pool)
779 job_deps.append(self._board)
780 return job_deps
781
782
Allen Li069fc252016-12-12 16:26:21 -0800783 def _create_keyvals_for_test_job(self, test, retry_for=None):
784 """Create keyvals dict for creating a test job.
785
786 @param test: ControlData object for a test to run.
787 @param retry_for: If the to-be-created job is a retry for an
788 old job, the afe_job_id of the old job will
789 be passed in as |retry_for|, which will be
790 recorded in the new job's keyvals.
791 @returns: A keyvals dict for creating the test job.
792 """
Allen Li015e71b2016-12-12 16:37:25 -0800793 keyvals = {
794 constants.JOB_BUILD_KEY: self._cros_build,
795 constants.JOB_SUITE_KEY: self._tag,
796 constants.JOB_EXPERIMENTAL_KEY: test.experimental,
797 constants.JOB_BUILDS_KEY: self._builds
798 }
Dan Shi36cfd832014-10-10 13:38:51 -0700799 # test_source_build is saved to job_keyvals so scheduler can retrieve
800 # the build name from database when compiling autoserv commandline.
801 # This avoid a database change to add a new field in afe_jobs.
Allen Li015e71b2016-12-12 16:37:25 -0800802 #
Dan Shi36cfd832014-10-10 13:38:51 -0700803 # Only add `test_source_build` to job keyvals if the build is different
804 # from the CrOS build or the job uses more than one build, e.g., both
805 # firmware and CrOS will be updated in the dut.
806 # This is for backwards compatibility, so the update Autotest code can
807 # compile an autoserv command line to run in a SSP container using
808 # previous builds.
809 if (self._test_source_build and
Allen Li015e71b2016-12-12 16:37:25 -0800810 (self._cros_build != self._test_source_build or
811 len(self._builds) > 1)):
812 keyvals[constants.JOB_TEST_SOURCE_BUILD_KEY] = \
813 self._test_source_build
Dan Shidac462f2015-08-14 11:07:32 -0700814 for prefix, build in self._builds.iteritems():
815 if prefix == provision.FW_RW_VERSION_PREFIX:
816 keyvals[constants.FWRW_BUILD]= build
817 elif prefix == provision.FW_RO_VERSION_PREFIX:
818 keyvals[constants.FWRO_BUILD] = build
Allen Li015e71b2016-12-12 16:37:25 -0800819 # Add suite job id to keyvals so tko parser can read it from keyval
820 # file.
Dan Shidac462f2015-08-14 11:07:32 -0700821 if self._suite_job_id:
822 keyvals[constants.PARENT_JOB_ID] = self._suite_job_id
Allen Li015e71b2016-12-12 16:37:25 -0800823 # We drop the old job's id in the new job's keyval file so that
824 # later our tko parser can figure out the retry relationship and
825 # invalidate the results of the old job in tko database.
Fang Denge3bc24b2014-03-17 15:19:46 -0700826 if retry_for:
Fang Denge3bc24b2014-03-17 15:19:46 -0700827 keyvals[constants.RETRY_ORIGINAL_JOB_ID] = retry_for
Simran Basi1e10e922015-04-16 15:09:56 -0700828 if self._offload_failures_only:
829 keyvals[constants.JOB_OFFLOAD_FAILURES_KEY] = True
Allen Li069fc252016-12-12 16:26:21 -0800830 return keyvals
Chris Masone44e4d6c2012-08-15 14:25:53 -0700831
832
Fang Denge3bc24b2014-03-17 15:19:46 -0700833 def _schedule_test(self, record, test, retry_for=None, ignore_errors=False):
834 """Schedule a single test and return the job.
835
Allen Lie79b3cb2016-12-12 18:24:17 -0800836 Schedule a single test by creating a job, and then update relevant
837 data structures that are used to keep track of all running jobs.
Fang Denge3bc24b2014-03-17 15:19:46 -0700838
Allen Lie79b3cb2016-12-12 18:24:17 -0800839 Emits a TEST_NA status log entry if it failed to schedule the test due
840 to NoEligibleHostException or a non-existent board label.
841
842 Returns a frontend.Job object if the test is successfully scheduled.
843 If scheduling failed due to NoEligibleHostException or a non-existent
844 board label, returns None. If ignore_errors is True, all unknown
845 errors return None, otherwise the errors are raised as-is.
Fang Denge3bc24b2014-03-17 15:19:46 -0700846
847 @param record: A callable to use for logging.
848 prototype: record(base_job.status_log_entry)
849 @param test: ControlData for a test to run.
850 @param retry_for: If we are scheduling a test to retry an
851 old job, the afe_job_id of the old job
852 will be passed in as |retry_for|.
853 @param ignore_errors: If True, when an rpc error occur, ignore
854 the error and will return None.
855 If False, rpc errors will be raised.
856
Allen Lie79b3cb2016-12-12 18:24:17 -0800857 @returns: A frontend.Job object or None
Fang Denge3bc24b2014-03-17 15:19:46 -0700858 """
859 msg = 'Scheduling %s' % test.name
860 if retry_for:
861 msg = msg + ', to retry afe job %d' % retry_for
862 logging.debug(msg)
Dan Shidfea3682014-08-10 23:38:40 -0700863 begin_time_str = datetime.datetime.now().strftime(time_utils.TIME_FMT)
Fang Denge3bc24b2014-03-17 15:19:46 -0700864 try:
865 job = self._create_job(test, retry_for=retry_for)
Allen Li6fd440f2016-12-12 18:40:05 -0800866 except (error.NoEligibleHostException, proxy.ValidationError) as e:
867 if (isinstance(e, error.NoEligibleHostException)
868 or (isinstance(e, proxy.ValidationError)
869 and _is_nonexistent_board_error(e))):
870 # Treat a dependency on a non-existent board label the same as
871 # a dependency on a board that exists, but for which there's no
872 # hardware.
873 logging.debug('%s not applicable for this board/pool. '
874 'Emitting TEST_NA.', test.name)
875 Status('TEST_NA', test.name,
876 'Skipping: test not supported on this board/pool.',
Allen Li9fcd4b42016-12-12 16:15:14 -0800877 begin_time_str=begin_time_str).record_all(record)
878 return None
879 else:
Fang Denge3bc24b2014-03-17 15:19:46 -0700880 raise e
Fang Denge3bc24b2014-03-17 15:19:46 -0700881 except (error.RPCException, proxy.JSONRPCException) as e:
882 if retry_for:
883 # Mark that we've attempted to retry the old job.
884 self._retry_handler.set_attempted(job_id=retry_for)
Allen Li0ba59342016-12-12 15:57:02 -0800885
Fang Denge3bc24b2014-03-17 15:19:46 -0700886 if ignore_errors:
887 logging.error('Failed to schedule test: %s, Reason: %s',
888 test.name, e)
Allen Li0ba59342016-12-12 15:57:02 -0800889 return None
Fang Denge3bc24b2014-03-17 15:19:46 -0700890 else:
891 raise e
892 else:
893 self._jobs.append(job)
894 self._jobs_to_tests[job.id] = test
895 if retry_for:
896 # A retry job was just created, record it.
897 self._retry_handler.add_retry(
898 old_job_id=retry_for, new_job_id=job.id)
899 retry_count = (test.job_retries -
900 self._retry_handler.get_retry_max(job.id))
901 logging.debug('Job %d created to retry job %d. '
902 'Have retried for %d time(s)',
903 job.id, retry_for, retry_count)
Allen Li3cc73cd2016-12-12 16:02:21 -0800904 self._remember_provided_job_id(job)
Fang Denge3bc24b2014-03-17 15:19:46 -0700905 return job
Fang Denge3bc24b2014-03-17 15:19:46 -0700906
907
Alex Miller3a69adc2012-12-19 13:38:31 -0800908 def schedule(self, record, add_experimental=True):
Aviv Keshet18308922013-02-19 17:49:49 -0800909 #pylint: disable-msg=C0111
Chris Masone44e4d6c2012-08-15 14:25:53 -0700910 """
911 Schedule jobs using |self._afe|.
912
913 frontend.Job objects representing each scheduled job will be put in
914 |self._jobs|.
915
Fang Denge3bc24b2014-03-17 15:19:46 -0700916 @param record: A callable to use for logging.
917 prototype: record(base_job.status_log_entry)
Chris Masone44e4d6c2012-08-15 14:25:53 -0700918 @param add_experimental: schedule experimental tests as well, or not.
Aviv Keshete9170d92013-07-19 11:20:45 -0700919 @returns: The number of tests that were scheduled.
Chris Masone44e4d6c2012-08-15 14:25:53 -0700920 """
Allen Lif8e72e62016-12-12 15:27:21 -0800921 logging.debug('Discovered %d stable tests.', len(self.stable_tests))
Alex Miller3a69adc2012-12-19 13:38:31 -0800922 logging.debug('Discovered %d unstable tests.',
Allen Lif8e72e62016-12-12 15:27:21 -0800923 len(self.unstable_tests))
Chris Masone44e4d6c2012-08-15 14:25:53 -0700924
Alex Miller3a69adc2012-12-19 13:38:31 -0800925 Status('INFO', 'Start %s' % self._tag).record_result(record)
Allen Lia4d35022016-12-12 15:42:10 -0800926 scheduled_test_names = []
Alex Miller3a69adc2012-12-19 13:38:31 -0800927 try:
Allen Lif8e72e62016-12-12 15:27:21 -0800928 tests = self.stable_tests
Alex Miller3a69adc2012-12-19 13:38:31 -0800929 if add_experimental:
Allen Lif8e72e62016-12-12 15:27:21 -0800930 for test in self.unstable_tests:
Fang Deng7169ccd2014-04-29 13:07:47 -0700931 if not test.name.startswith(constants.EXPERIMENTAL_PREFIX):
932 test.name = constants.EXPERIMENTAL_PREFIX + test.name
Alex Millere1a2a292013-08-21 14:15:16 -0700933 tests.append(test)
934
935 for test in tests:
Allen Lida905732016-12-12 15:49:16 -0800936 scheduled_job = self._schedule_test(record, test)
937 if scheduled_job is not None:
Shuqian Zhaocd866f32016-11-29 20:14:34 -0800938 scheduled_test_names.append(test.name)
939
940 # Write the num of scheduled tests and name of them to keyval file.
Shuqian Zhaocd866f32016-11-29 20:14:34 -0800941 logging.debug('Scheduled %d tests, writing the total to keyval.',
Allen Lia4d35022016-12-12 15:42:10 -0800942 len(scheduled_test_names))
Allen Lid4d5dda2016-12-12 15:39:11 -0800943 utils.write_keyval(
944 self._results_dir,
Allen Lidda59b82016-12-12 18:20:04 -0800945 self._make_scheduled_tests_keyvals(scheduled_test_names))
Alex Miller3a69adc2012-12-19 13:38:31 -0800946 except Exception: # pylint: disable=W0703
Allen Lib892d9f2016-12-29 15:50:11 -0800947 logging.exception('Exception while scheduling suite')
Alex Miller3a69adc2012-12-19 13:38:31 -0800948 Status('FAIL', self._tag,
949 'Exception while scheduling suite').record_result(record)
950
Fang Deng7e655a92014-05-23 13:48:11 -0700951 if self._job_retry:
952 self._retry_handler = RetryHandler(
Fang Deng443f1952015-01-02 14:51:49 -0800953 initial_jobs_to_tests=self._jobs_to_tests,
954 max_retries=self._max_retries)
Allen Lia4d35022016-12-12 15:42:10 -0800955 return len(scheduled_test_names)
Aviv Keshete9170d92013-07-19 11:20:45 -0700956
Alex Miller3a69adc2012-12-19 13:38:31 -0800957
Allen Lidda59b82016-12-12 18:20:04 -0800958 def _make_scheduled_tests_keyvals(self, scheduled_test_names):
959 """Make a keyvals dict to write for scheduled test names.
960
961 @param scheduled_test_names: A list of scheduled test name strings.
962
963 @returns: A keyvals dict.
964 """
965 return {
966 constants.SCHEDULED_TEST_COUNT_KEY: len(scheduled_test_names),
967 constants.SCHEDULED_TEST_NAMES_KEY: repr(scheduled_test_names),
968 }
969
970
Allen Lid1cbccf2016-12-29 15:12:39 -0800971 def _should_report(self, result):
beepsda5b7112013-05-30 11:34:14 -0700972 """
Shuqian Zhaoe33ba4a2015-09-11 18:51:43 -0700973 Returns True if this failure requires to be reported.
beepsda5b7112013-05-30 11:34:14 -0700974
975 @param result: A result, encapsulating the status of the failed job.
Shuqian Zhaoe33ba4a2015-09-11 18:51:43 -0700976 @return: True if we should report this failure.
beepsda5b7112013-05-30 11:34:14 -0700977 """
Fang Deng7e655a92014-05-23 13:48:11 -0700978 if self._job_retry and self._retry_handler.has_following_retry(result):
Fang Denge3bc24b2014-03-17 15:19:46 -0700979 return False
980
beepsbeefc062013-08-02 11:17:09 -0700981 is_not_experimental = (
982 constants.EXPERIMENTAL_PREFIX not in result._test_name and
983 constants.EXPERIMENTAL_PREFIX not in result._job_name)
984
Alex Millerfcc119b2014-01-15 13:54:58 -0800985 return (self._file_bugs and result.test_executed and
beepsbeefc062013-08-02 11:17:09 -0700986 (is_not_experimental or self._file_experimental_bugs) and
Fang Dengd82c1c72014-07-29 10:43:01 -0700987 not result.is_testna() and
beeps32fa6772014-01-28 13:19:53 -0800988 result.is_worse_than(job_status.Status('GOOD', '', 'reason')))
beepsda5b7112013-05-30 11:34:14 -0700989
990
Allen Li18503452016-12-29 14:56:48 -0800991 def wait(self, record, bug_template=None):
Alex Miller3a69adc2012-12-19 13:38:31 -0800992 """
993 Polls for the job statuses, using |record| to print status when each
994 completes.
995
996 @param record: callable that records job status.
997 prototype:
998 record(base_job.status_log_entry)
beepsc8a875b2013-03-25 10:20:38 -0700999 @param bug_template: A template dictionary specifying the default bug
1000 filing options for failures in this suite.
Alex Miller3a69adc2012-12-19 13:38:31 -08001001 """
Dan Shie67bd6a2016-02-17 14:44:07 -08001002 # reporting modules have dependency on external packages, e.g., httplib2
1003 # Such dependency can cause issue to any module tries to import suite.py
1004 # without building site-packages first. Since the reporting modules are
1005 # only used in this function, move the imports here avoid the
1006 # requirement of building site packages to use other functions in this
1007 # module.
1008 from autotest_lib.server.cros.dynamic_suite import reporting
Dan Shie67bd6a2016-02-17 14:44:07 -08001009
Allen Li18503452016-12-29 14:56:48 -08001010 if bug_template is None:
1011 bug_template = {}
1012
Alex Millera3a4fe72013-01-22 09:57:47 -08001013 if self._file_bugs:
1014 bug_reporter = reporting.Reporter()
Allen Li733dab92016-12-29 15:07:50 -08001015 else:
1016 bug_reporter = reporting.NullReporter()
Alex Miller3a69adc2012-12-19 13:38:31 -08001017 try:
Aviv Keshet133beb12013-08-20 14:37:13 -07001018 if self._suite_job_id:
1019 results_generator = job_status.wait_for_child_results(
1020 self._afe, self._tko, self._suite_job_id)
1021 else:
Ilja H. Friedel04be2bd2014-05-07 21:29:59 -07001022 logging.warning('Unknown suite_job_id, falling back to less '
Dan Shi08ff1282016-02-18 19:51:16 -08001023 'efficient results_generator.')
Aviv Keshet133beb12013-08-20 14:37:13 -07001024 results_generator = job_status.wait_for_results(self._afe,
1025 self._tko,
1026 self._jobs)
1027 for result in results_generator:
Allen Li26b340d2016-12-29 15:23:01 -08001028 self._record_result(
1029 result=result,
1030 record=record,
1031 results_generator=results_generator,
1032 bug_reporter=bug_reporter,
1033 bug_template=bug_template)
beeps8ead53c2013-04-26 19:12:46 -07001034
Alex Miller3a69adc2012-12-19 13:38:31 -08001035 except Exception: # pylint: disable=W0703
Allen Lib892d9f2016-12-29 15:50:11 -08001036 logging.exception('Exception waiting for results')
Alex Miller3a69adc2012-12-19 13:38:31 -08001037 Status('FAIL', self._tag,
1038 'Exception waiting for results').record_result(record)
1039
1040
Allen Li26b340d2016-12-29 15:23:01 -08001041 def _record_result(self, result, record, results_generator, bug_reporter,
1042 bug_template):
1043 """
1044 Record a single test job result.
1045
1046 @param result: Status instance for job.
1047 @param record: callable that records job status.
1048 prototype:
1049 record(base_job.status_log_entry)
1050 @param results_generator: Results generator for sending job retries.
1051 @param bug_reporter: Reporter instance for reporting bugs.
1052 @param bug_template: A template dictionary specifying the default bug
1053 filing options for failures in this suite.
1054 """
1055 # reporting modules have dependency on external packages, e.g., httplib2
1056 # Such dependency can cause issue to any module tries to import suite.py
1057 # without building site-packages first. Since the reporting modules are
1058 # only used in this function, move the imports here avoid the
1059 # requirement of building site packages to use other functions in this
1060 # module.
1061 from autotest_lib.server.cros.dynamic_suite import reporting
1062 from autotest_lib.server.cros.dynamic_suite import reporting_utils
1063
1064 result.record_all(record)
1065 if job_status.is_for_infrastructure_fail(result):
1066 self._remember_provided_job_id(result)
1067 elif isinstance(result, Status):
1068 self._remember_test_status_job_id(result)
1069
1070 if self._job_retry and self._retry_handler.should_retry(result):
1071 new_job = self._schedule_test(
1072 record=record, test=self._jobs_to_tests[result.id],
1073 retry_for=result.id, ignore_errors=True)
1074 if new_job:
1075 results_generator.send([new_job])
1076
1077 # TODO (fdeng): If the suite times out before a retry could
1078 # finish, we would lose the chance to file a bug for the
1079 # original job.
1080 if self._should_report(result):
Allen Li003913e2016-12-29 15:53:34 -08001081 failure = self._get_test_bug(result)
Allen Li26b340d2016-12-29 15:23:01 -08001082
1083 # Try to merge with bug template in test control file.
1084 template = reporting_utils.BugTemplate(bug_template)
1085 try:
1086 test_data = self._jobs_to_tests[result.id]
1087 merged_template = template.finalize_bug_template(
1088 test_data.bug_template)
1089 except AttributeError:
1090 # Test control file does not have bug template defined.
1091 merged_template = template.bug_template
1092 except reporting_utils.InvalidBugTemplateException as e:
1093 merged_template = {}
1094 logging.error('Merging bug templates failed with '
1095 'error: %s An empty bug template will '
1096 'be used.', e)
1097
1098 # File bug when failure is one of the _FILE_BUG_SUITES,
1099 # otherwise send an email to the owner anc cc.
1100 if self._tag in _FILE_BUG_SUITES:
1101 bug_id, bug_count = bug_reporter.report(failure,
1102 merged_template)
1103
1104 # We use keyvals to communicate bugs filed with
1105 # run_suite.
1106 if bug_id is not None:
1107 bug_keyvals = tools.create_bug_keyvals(
1108 result.id, result.test_name,
1109 (bug_id, bug_count))
1110 try:
1111 utils.write_keyval(self._results_dir,
1112 bug_keyvals)
1113 except ValueError:
1114 logging.error('Unable to log bug keyval for:%s',
1115 result.test_name)
1116 else:
1117 reporting.send_email(failure, merged_template)
1118
1119
Allen Li003913e2016-12-29 15:53:34 -08001120 def _get_test_bug(self, result):
1121 """Get TestBug for the given result.
1122
1123 @param result: Status instance for a test job.
1124 @returns: TestBug instance.
1125 """
1126 # reporting modules have dependency on external packages, e.g., httplib2
1127 # Such dependency can cause issue to any module tries to import suite.py
1128 # without building site-packages first. Since the reporting modules are
1129 # only used in this function, move the imports here avoid the
1130 # requirement of building site packages to use other functions in this
1131 # module.
1132 from autotest_lib.server.cros.dynamic_suite import reporting
1133
1134 job_views = self._tko.run('get_detailed_test_views',
1135 afe_job_id=result.id)
1136 return reporting.TestBug(self._cros_build,
1137 site_utils.get_chrome_version(job_views),
1138 self._tag,
1139 result)
1140
1141
Alex Miller3a69adc2012-12-19 13:38:31 -08001142 def abort(self):
1143 """
1144 Abort all scheduled test jobs.
1145 """
1146 if self._jobs:
1147 job_ids = [job.id for job in self._jobs]
1148 self._afe.run('abort_host_queue_entries', job__id__in=job_ids)
Chris Masone44e4d6c2012-08-15 14:25:53 -07001149
1150
Allen Li4af568b2016-12-12 18:35:07 -08001151 # TODO(ayatane): This is identical to _remember_test_status_job_id. It
1152 # suggests that we can factor out a job-like interface that both jobs and
1153 # statuses support so we can merge the two methods to work on job-like
1154 # objects. This deduplication can probably be applied to other places.
Chris Masoned9f13c52012-08-29 10:37:08 -07001155 def _remember_provided_job_id(self, job):
1156 """
1157 Record provided job as a suite job keyval, for later referencing.
1158
1159 @param job: some representation of a job, including id, test_name
1160 and owner
1161 """
Allen Li3cc73cd2016-12-12 16:02:21 -08001162 if self._results_dir and job.id and job.owner and job.test_name:
Chris Masone44e4d6c2012-08-15 14:25:53 -07001163 job_id_owner = '%s-%s' % (job.id, job.owner)
Chris Masoned9f13c52012-08-29 10:37:08 -07001164 logging.debug('Adding job keyval for %s=%s',
Chris Sosaaccb5ce2012-08-30 17:29:15 -07001165 job.test_name, job_id_owner)
Chris Masone44e4d6c2012-08-15 14:25:53 -07001166 utils.write_keyval(
1167 self._results_dir,
1168 {hashlib.md5(job.test_name).hexdigest(): job_id_owner})
1169
Allen Li4af568b2016-12-12 18:35:07 -08001170 # TODO(ayatane): This is identical to _remember_provided_job_id. See that
1171 # method for details.
Dan Shid1521802013-05-24 13:08:37 -07001172 def _remember_test_status_job_id(self, status):
1173 """
1174 Record provided status as a test status keyval, for later referencing.
1175
1176 @param status: Test status, including properties such as id, test_name
1177 and owner.
1178 """
Allen Li3cc73cd2016-12-12 16:02:21 -08001179 if (self._results_dir
1180 and status.id and status.owner and status.test_name):
Dan Shid1521802013-05-24 13:08:37 -07001181 test_id_owner = '%s-%s' % (status.id, status.owner)
1182 logging.debug('Adding status keyval for %s=%s',
1183 status.test_name, test_id_owner)
1184 utils.write_keyval(
1185 self._results_dir,
1186 {hashlib.md5(status.test_name).hexdigest(): test_id_owner})
1187
1188
Chris Masone44e4d6c2012-08-15 14:25:53 -07001189 @staticmethod
Dan Shi5783f8a2014-12-22 14:34:45 -08001190 def find_all_tests(cf_getter, suite_name='', add_experimental=False,
Simran Basi5ace6f22016-01-06 17:30:44 -08001191 forgiving_parser=True, run_prod_code=False):
Chris Masone44e4d6c2012-08-15 14:25:53 -07001192 """
Dan Shi5783f8a2014-12-22 14:34:45 -08001193 Function to scan through all tests and find all tests.
Chris Masone44e4d6c2012-08-15 14:25:53 -07001194
xixuan0f7755d2016-04-18 14:49:12 -07001195 When this method is called with a file system ControlFileGetter, or
1196 enable_controls_in_batch is set as false, this function will looks at
Allen Lid69b9f02016-12-09 18:15:59 -08001197 control files returned by cf_getter.get_control_file_list() for tests.
xixuan0f7755d2016-04-18 14:49:12 -07001198
1199 If cf_getter is a File system ControlFileGetter, it performs a full
1200 parse of the root directory associated with the getter. This is the
1201 case when it's invoked from suite_preprocessor.
1202
1203 If cf_getter is a devserver getter it looks up the suite_name in a
1204 suite to control file map generated at build time, and parses the
1205 relevant control files alone. This lookup happens on the devserver,
1206 so as far as this method is concerned, both cases are equivalent. If
1207 enable_controls_in_batch is switched on, this function will call
1208 cf_getter.get_suite_info() to get a dict of control files and contents
1209 in batch.
Chris Masone44e4d6c2012-08-15 14:25:53 -07001210
1211 @param cf_getter: a control_file_getter.ControlFileGetter used to list
1212 and fetch the content of control files
Dan Shi5783f8a2014-12-22 14:34:45 -08001213 @param suite_name: If specified, this method will attempt to restrain
1214 the search space to just this suite's control files.
1215 @param add_experimental: add tests with experimental attribute set.
1216 @param forgiving_parser: If False, will raise ControlVariableExceptions
1217 if any are encountered when parsing control
1218 files. Note that this can raise an exception
1219 for syntax errors in unrelated files, because
1220 we parse them before applying the predicate.
Simran Basi5ace6f22016-01-06 17:30:44 -08001221 @param run_prod_code: If true, the suite will run the test code that
1222 lives in prod aka the test code currently on the
1223 lab servers by disabling SSP for the discovered
1224 tests.
Dan Shi5783f8a2014-12-22 14:34:45 -08001225
1226 @raises ControlVariableException: If forgiving_parser is False and there
1227 is a syntax error in a control file.
1228
1229 @returns a dictionary of ControlData objects that based on given
1230 parameters.
1231 """
1232 logging.debug('Getting control file list for suite: %s', suite_name)
1233 tests = {}
xixuan0f7755d2016-04-18 14:49:12 -07001234 use_batch = (ENABLE_CONTROLS_IN_BATCH and hasattr(
1235 cf_getter, '_dev_server'))
1236 if use_batch:
1237 suite_info = cf_getter.get_suite_info(suite_name=suite_name)
1238 files = suite_info.keys()
1239 else:
1240 files = cf_getter.get_control_file_list(suite_name=suite_name)
1241
Dan Shi5783f8a2014-12-22 14:34:45 -08001242
1243 logging.debug('Parsing control files ...')
1244 matcher = re.compile(r'[^/]+/(deps|profilers)/.+')
1245 for file in filter(lambda f: not matcher.match(f), files):
xixuan0f7755d2016-04-18 14:49:12 -07001246 if use_batch:
1247 text = suite_info[file]
1248 else:
1249 text = cf_getter.get_control_file_contents(file)
Dan Shi5783f8a2014-12-22 14:34:45 -08001250 try:
1251 found_test = control_data.parse_control_string(
Christopher Wiley10439d82016-03-07 12:45:26 -08001252 text, raise_warnings=True, path=file)
Dan Shi5783f8a2014-12-22 14:34:45 -08001253 if not add_experimental and found_test.experimental:
1254 continue
1255 found_test.text = text
Simran Basi5ace6f22016-01-06 17:30:44 -08001256 if run_prod_code:
1257 found_test.require_ssp = False
Dan Shi5783f8a2014-12-22 14:34:45 -08001258 tests[file] = found_test
1259 except control_data.ControlVariableException, e:
1260 if not forgiving_parser:
1261 msg = "Failed parsing %s\n%s" % (file, e)
1262 raise control_data.ControlVariableException(msg)
1263 logging.warning("Skipping %s\n%s", file, e)
1264 except Exception, e:
1265 logging.error("Bad %s\n%s", file, e)
1266 return tests
1267
1268
1269 @staticmethod
1270 def find_and_parse_tests(cf_getter, predicate, suite_name='',
Simran Basi5ace6f22016-01-06 17:30:44 -08001271 add_experimental=False, forgiving_parser=True,
1272 run_prod_code=False):
Dan Shi5783f8a2014-12-22 14:34:45 -08001273 """
1274 Function to scan through all tests and find eligible tests.
1275
1276 Search through all tests based on given cf_getter, suite_name,
1277 add_experimental and forgiving_parser, return the tests that match
1278 given predicate.
1279
1280 @param cf_getter: a control_file_getter.ControlFileGetter used to list
1281 and fetch the content of control files
Chris Masone44e4d6c2012-08-15 14:25:53 -07001282 @param predicate: a function that should return True when run over a
1283 ControlData representation of a control file that should be in
1284 this Suite.
beepsc594c1c2013-07-09 22:33:18 -07001285 @param suite_name: If specified, this method will attempt to restrain
1286 the search space to just this suite's control files.
Chris Masone44e4d6c2012-08-15 14:25:53 -07001287 @param add_experimental: add tests with experimental attribute set.
beeps89f1e062013-09-18 12:00:17 -07001288 @param forgiving_parser: If False, will raise ControlVariableExceptions
1289 if any are encountered when parsing control
1290 files. Note that this can raise an exception
1291 for syntax errors in unrelated files, because
1292 we parse them before applying the predicate.
Simran Basi5ace6f22016-01-06 17:30:44 -08001293 @param run_prod_code: If true, the suite will run the test code that
1294 lives in prod aka the test code currently on the
1295 lab servers by disabling SSP for the discovered
1296 tests.
beeps89f1e062013-09-18 12:00:17 -07001297
1298 @raises ControlVariableException: If forgiving_parser is False and there
1299 is a syntax error in a control file.
Chris Masone44e4d6c2012-08-15 14:25:53 -07001300
1301 @return list of ControlData objects that should be run, with control
Dan Shief5b53f2013-01-22 10:22:01 -08001302 file text added in |text| attribute. Results are sorted based
1303 on the TIME setting in control file, slowest test comes first.
Chris Masone44e4d6c2012-08-15 14:25:53 -07001304 """
Dan Shi5783f8a2014-12-22 14:34:45 -08001305 tests = Suite.find_all_tests(cf_getter, suite_name, add_experimental,
Simran Basi5ace6f22016-01-06 17:30:44 -08001306 forgiving_parser,
1307 run_prod_code=run_prod_code)
Dan Shi5783f8a2014-12-22 14:34:45 -08001308 logging.debug('Parsed %s control files.', len(tests))
Dan Shief5b53f2013-01-22 10:22:01 -08001309 tests = [test for test in tests.itervalues() if predicate(test)]
1310 tests.sort(key=lambda t:
1311 control_data.ControlData.get_test_time_index(t.time),
1312 reverse=True)
1313 return tests
Dan Shi5783f8a2014-12-22 14:34:45 -08001314
1315
1316 @staticmethod
1317 def find_possible_tests(cf_getter, predicate, suite_name='', count=10):
1318 """
1319 Function to scan through all tests and find possible tests.
1320
1321 Search through all tests based on given cf_getter, suite_name,
1322 add_experimental and forgiving_parser. Use the given predicate to
1323 calculate the similarity and return the top 10 matches.
1324
1325 @param cf_getter: a control_file_getter.ControlFileGetter used to list
1326 and fetch the content of control files
1327 @param predicate: a function that should return a tuple of (name, ratio)
1328 when run over a ControlData representation of a control file that
1329 should be in this Suite. `name` is the key to be compared, e.g.,
1330 a suite name or test name. `ratio` is a value between [0,1]
1331 indicating the similarity of `name` and the value to be compared.
1332 @param suite_name: If specified, this method will attempt to restrain
1333 the search space to just this suite's control files.
1334 @param count: Number of suggestions to return, default to 10.
1335
1336 @return list of top names that similar to the given test, sorted by
1337 match ratio.
1338 """
1339 tests = Suite.find_all_tests(cf_getter, suite_name,
1340 add_experimental=True,
1341 forgiving_parser=True)
1342 logging.debug('Parsed %s control files.', len(tests))
1343 similarities = {}
1344 for test in tests.itervalues():
1345 ratios = predicate(test)
1346 # Some predicates may return a list of tuples, e.g.,
1347 # name_in_tag_similarity_predicate. Convert all returns to a list.
1348 if not isinstance(ratios, list):
1349 ratios = [ratios]
1350 for name, ratio in ratios:
1351 similarities[name] = ratio
1352 return [s[0] for s in
1353 sorted(similarities.items(), key=operator.itemgetter(1),
1354 reverse=True)][:count]
Allen Li9fcd4b42016-12-12 16:15:14 -08001355
1356
1357def _is_nonexistent_board_error(e):
1358 """Return True if error is caused by nonexistent board label.
1359
1360 As of this writing, the particular case we want looks like this:
1361
1362 1) e.problem_keys is a dictionary
1363 2) e.problem_keys['meta_hosts'] exists as the only key
1364 in the dictionary.
1365 3) e.problem_keys['meta_hosts'] matches this pattern:
1366 "Label "board:.*" not found"
1367
1368 We check for conditions 1) and 2) on the
1369 theory that they're relatively immutable.
1370 We don't check condition 3) because it seems
1371 likely to be a maintenance burden, and for the
1372 times when we're wrong, being right shouldn't
1373 matter enough (we _hope_).
1374
1375 @param e: proxy.ValidationError instance
1376 @returns: boolean
1377 """
1378 return (isinstance(e.problem_keys, dict)
1379 and len(e.problem_keys) == 1
1380 and 'meta_hosts' in e.problem_keys)