blob: 3852644f0983276782bda3fdc335a7464ab81829 [file] [log] [blame]
Chris Masone8ac66712012-02-15 14:21:02 -08001# Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
Chris Masone6fed6462011-10-20 16:36:43 -07002# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5import common
6import compiler, logging, os, random, re, time
Chris Masone2ef1d4e2011-12-20 11:06:53 -08007from autotest_lib.client.common_lib import control_data, global_config, error
8from autotest_lib.client.common_lib import utils
Chris Masone8ac66712012-02-15 14:21:02 -08009from autotest_lib.server.cros import control_file_getter, frontend_wrappers
Chris Masone6fed6462011-10-20 16:36:43 -070010from autotest_lib.server import frontend
11
12
13VERSION_PREFIX = 'cros-version-'
Chris Masone2ef1d4e2011-12-20 11:06:53 -080014CONFIG = global_config.global_config
15
16
Chris Masone8b764252012-01-17 11:12:51 -080017def inject_vars(vars, control_file_in):
18 """
19 Inject the contents of |vars| into |control_file_in|
20
21 @param vars: a dict to shoehorn into the provided control file string.
22 @param control_file_in: the contents of a control file to munge.
23 @return the modified control file string.
24 """
25 control_file = ''
26 for key, value in vars.iteritems():
27 control_file += "%s='%s'\n" % (key, value)
28 return control_file + control_file_in
29
30
Chris Masone2ef1d4e2011-12-20 11:06:53 -080031def _image_url_pattern():
32 return CONFIG.get_config_value('CROS', 'image_url_pattern', type=str)
33
34
35def _package_url_pattern():
36 return CONFIG.get_config_value('CROS', 'package_url_pattern', type=str)
37
Chris Masone6fed6462011-10-20 16:36:43 -070038
39class Reimager(object):
40 """
41 A class that can run jobs to reimage devices.
42
43 @var _afe: a frontend.AFE instance used to talk to autotest.
44 @var _tko: a frontend.TKO instance used to query the autotest results db.
45 @var _cf_getter: a ControlFileGetter used to get the AU control file.
46 """
47
48
49 def __init__(self, autotest_dir, afe=None, tko=None):
50 """
51 Constructor
52
53 @param autotest_dir: the place to find autotests.
54 @param afe: an instance of AFE as defined in server/frontend.py.
55 @param tko: an instance of TKO as defined in server/frontend.py.
56 """
Chris Masone8ac66712012-02-15 14:21:02 -080057 self._afe = afe or frontend_wrappers.RetryingAFE(timeout_min=30,
58 delay_sec=10,
59 debug=False)
60 self._tko = tko or frontend_wrappers.RetryingTKO(timeout_min=30,
61 delay_sec=10,
62 debug=False)
Chris Masone6fed6462011-10-20 16:36:43 -070063 self._cf_getter = control_file_getter.FileSystemGetter(
64 [os.path.join(autotest_dir, 'server/site_tests')])
65
66
Chris Masone2ef1d4e2011-12-20 11:06:53 -080067 def skip(self, g):
68 return 'SKIP_IMAGE' in g and g['SKIP_IMAGE']
69
70
Chris Masone5552dd72012-02-15 15:01:04 -080071 def attempt(self, build, board, record, num=None):
Chris Masone6fed6462011-10-20 16:36:43 -070072 """
73 Synchronously attempt to reimage some machines.
74
75 Fire off attempts to reimage |num| machines of type |board|, using an
Chris Masone8abb6fc2012-01-31 09:27:36 -080076 image at |url| called |build|. Wait for completion, polling every
Chris Masone6fed6462011-10-20 16:36:43 -070077 10s, and log results with |record| upon completion.
78
Chris Masone8abb6fc2012-01-31 09:27:36 -080079 @param build: the build to install e.g.
80 x86-alex-release/R18-1655.0.0-a1-b1584.
Chris Masone6fed6462011-10-20 16:36:43 -070081 @param board: which kind of devices to reimage.
82 @param record: callable that records job status.
83 prototype:
84 record(status, subdir, name, reason)
Chris Masone5552dd72012-02-15 15:01:04 -080085 @param num: how many devices to reimage.
Chris Masone6fed6462011-10-20 16:36:43 -070086 @return True if all reimaging jobs succeed, false otherwise.
87 """
Chris Masone5552dd72012-02-15 15:01:04 -080088 if not num:
89 num = CONFIG.get_config_value('CROS', 'sharding_factor', type=int)
90 logging.debug("scheduling reiamging across %d machines", num)
Chris Masone73f65022012-01-31 14:00:43 -080091 wrapper_job_name = 'try new image'
92 record('START', None, wrapper_job_name)
Chris Masone8abb6fc2012-01-31 09:27:36 -080093 self._ensure_version_label(VERSION_PREFIX + build)
94 canary = self._schedule_reimage_job(build, num, board)
Chris Masone6fed6462011-10-20 16:36:43 -070095 logging.debug('Created re-imaging job: %d', canary.id)
96 while len(self._afe.get_jobs(id=canary.id, not_yet_run=True)) > 0:
97 time.sleep(10)
98 logging.debug('Re-imaging job running.')
99 while len(self._afe.get_jobs(id=canary.id, finished=True)) == 0:
100 time.sleep(10)
101 logging.debug('Re-imaging job finished.')
102 canary.result = self._afe.poll_job_results(self._tko, canary, 0)
103
104 if canary.result is True:
105 self._report_results(canary, record)
Chris Masone73f65022012-01-31 14:00:43 -0800106 record('END GOOD', None, wrapper_job_name)
Chris Masone6fed6462011-10-20 16:36:43 -0700107 return True
108
109 if canary.result is None:
110 record('FAIL', None, canary.name, 're-imaging tasks did not run')
111 else: # canary.result is False
112 self._report_results(canary, record)
113
Chris Masone73f65022012-01-31 14:00:43 -0800114 record('END FAIL', None, wrapper_job_name)
Chris Masone6fed6462011-10-20 16:36:43 -0700115 return False
116
117
118 def _ensure_version_label(self, name):
119 """
120 Ensure that a label called |name| exists in the autotest DB.
121
122 @param name: the label to check for/create.
123 """
124 labels = self._afe.get_labels(name=name)
125 if len(labels) == 0:
126 self._afe.create_label(name=name)
127
128
Chris Masone8abb6fc2012-01-31 09:27:36 -0800129 def _schedule_reimage_job(self, build, num_machines, board):
Chris Masone6fed6462011-10-20 16:36:43 -0700130 """
131 Schedules the reimaging of |num_machines| |board| devices with |image|.
132
133 Sends an RPC to the autotest frontend to enqueue reimaging jobs on
134 |num_machines| devices of type |board|
135
Chris Masone8abb6fc2012-01-31 09:27:36 -0800136 @param build: the build to install (must be unique).
Chris Masone2ef1d4e2011-12-20 11:06:53 -0800137 @param num_machines: how many devices to reimage.
Chris Masone6fed6462011-10-20 16:36:43 -0700138 @param board: which kind of devices to reimage.
139 @return a frontend.Job object for the reimaging job we scheduled.
140 """
Chris Masone8b764252012-01-17 11:12:51 -0800141 control_file = inject_vars(
Chris Masone8abb6fc2012-01-31 09:27:36 -0800142 {'image_url': _image_url_pattern() % build, 'image_name': build},
Chris Masone6fed6462011-10-20 16:36:43 -0700143 self._cf_getter.get_control_file_contents_by_name('autoupdate'))
144
Chris Masone2ef1d4e2011-12-20 11:06:53 -0800145 return self._afe.create_job(control_file=control_file,
Chris Masone8abb6fc2012-01-31 09:27:36 -0800146 name=build + '-try',
Chris Masone2ef1d4e2011-12-20 11:06:53 -0800147 control_type='Server',
148 meta_hosts=[board] * num_machines)
Chris Masone6fed6462011-10-20 16:36:43 -0700149
150
151 def _report_results(self, job, record):
152 """
153 Record results from a completed frontend.Job object.
154
155 @param job: a completed frontend.Job object populated by
156 frontend.AFE.poll_job_results.
157 @param record: callable that records job status.
158 prototype:
159 record(status, subdir, name, reason)
160 """
161 if job.result == True:
162 record('GOOD', None, job.name)
163 return
164
165 for platform in job.results_platform_map:
166 for status in job.results_platform_map[platform]:
167 if status == 'Total':
168 continue
169 for host in job.results_platform_map[platform][status]:
170 if host not in job.test_status:
171 record('ERROR', None, host, 'Job failed to run.')
172 elif status == 'Failed':
173 for test_status in job.test_status[host].fail:
174 record('FAIL', None, host, test_status.reason)
175 elif status == 'Aborted':
176 for test_status in job.test_status[host].fail:
177 record('ABORT', None, host, test_status.reason)
178 elif status == 'Completed':
179 record('GOOD', None, host)
180
181
182class Suite(object):
183 """
184 A suite of tests, defined by some predicate over control file variables.
185
186 Given a place to search for control files a predicate to match the desired
187 tests, can gather tests and fire off jobs to run them, and then wait for
188 results.
189
190 @var _predicate: a function that should return True when run over a
191 ControlData representation of a control file that should be in
192 this Suite.
193 @var _tag: a string with which to tag jobs run in this suite.
194 @var _afe: an instance of AFE as defined in server/frontend.py.
195 @var _tko: an instance of TKO as defined in server/frontend.py.
196 @var _jobs: currently scheduled jobs, if any.
197 @var _cf_getter: a control_file_getter.ControlFileGetter
198 """
199
200
Chris Masonefef21382012-01-17 11:16:32 -0800201 @staticmethod
202 def create_fs_getter(autotest_dir):
203 """
204 @param autotest_dir: the place to find autotests.
205 @return a FileSystemGetter instance that looks under |autotest_dir|.
206 """
207 # currently hard-coded places to look for tests.
208 subpaths = ['server/site_tests', 'client/site_tests']
209 directories = [os.path.join(autotest_dir, p) for p in subpaths]
210 return control_file_getter.FileSystemGetter(directories)
211
212
213 @staticmethod
214 def create_from_name(name, autotest_dir, afe=None, tko=None):
Chris Masone6fed6462011-10-20 16:36:43 -0700215 """
216 Create a Suite using a predicate based on the SUITE control file var.
217
218 Makes a predicate based on |name| and uses it to instantiate a Suite
219 that looks for tests in |autotest_dir| and will schedule them using
220 |afe|. Results will be pulled from |tko| upon completion
221
222 @param name: a value of the SUITE control file variable to search for.
223 @param autotest_dir: the place to find autotests.
224 @param afe: an instance of AFE as defined in server/frontend.py.
225 @param tko: an instance of TKO as defined in server/frontend.py.
226 @return a Suite instance.
227 """
228 return Suite(lambda t: hasattr(t, 'suite') and t.suite == name,
229 name, autotest_dir, afe, tko)
230
231
232 def __init__(self, predicate, tag, autotest_dir, afe=None, tko=None):
233 """
234 Constructor
235
236 @param predicate: a function that should return True when run over a
237 ControlData representation of a control file that should be in
238 this Suite.
239 @param tag: a string with which to tag jobs run in this suite.
240 @param autotest_dir: the place to find autotests.
241 @param afe: an instance of AFE as defined in server/frontend.py.
242 @param tko: an instance of TKO as defined in server/frontend.py.
243 """
244 self._predicate = predicate
245 self._tag = tag
Chris Masone8ac66712012-02-15 14:21:02 -0800246 self._afe = afe or frontend_wrappers.RetryingAFE(timeout_min=30,
247 delay_sec=10,
248 debug=False)
249 self._tko = tko or frontend_wrappers.RetryingTKO(timeout_min=30,
250 delay_sec=10,
251 debug=False)
Chris Masone6fed6462011-10-20 16:36:43 -0700252 self._jobs = []
253
Chris Masonefef21382012-01-17 11:16:32 -0800254 self._cf_getter = Suite.create_fs_getter(autotest_dir)
Chris Masone6fed6462011-10-20 16:36:43 -0700255
256 self._tests = Suite.find_and_parse_tests(self._cf_getter,
257 self._predicate,
258 add_experimental=True)
259
260
261 @property
262 def tests(self):
263 """
264 A list of ControlData objects in the suite, with added |text| attr.
265 """
266 return self._tests
267
268
269 def stable_tests(self):
270 """
271 |self.tests|, filtered for non-experimental tests.
272 """
273 return filter(lambda t: not t.experimental, self.tests)
274
275
276 def unstable_tests(self):
277 """
278 |self.tests|, filtered for experimental tests.
279 """
280 return filter(lambda t: t.experimental, self.tests)
281
282
283 def _create_job(self, test, image_name):
284 """
285 Thin wrapper around frontend.AFE.create_job().
286
287 @param test: ControlData object for a test to run.
288 @param image_name: the name of an image against which to test.
289 @return frontend.Job object for the job just scheduled.
290 """
291 return self._afe.create_job(
292 control_file=test.text,
293 name='/'.join([image_name, self._tag, test.name]),
294 control_type=test.test_type.capitalize(),
295 meta_hosts=[VERSION_PREFIX+image_name])
296
297
298 def run_and_wait(self, image_name, record, add_experimental=True):
299 """
300 Synchronously run tests in |self.tests|.
301
302 Schedules tests against a device running image |image_name|, and
303 then polls for status, using |record| to print status when each
304 completes.
305
306 Tests returned by self.stable_tests() will always be run, while tests
307 in self.unstable_tests() will only be run if |add_experimental| is true.
308
309 @param image_name: the name of an image against which to test.
310 @param record: callable that records job status.
311 prototype:
312 record(status, subdir, name, reason)
313 @param add_experimental: schedule experimental tests as well, or not.
314 """
315 try:
Scott Zawalskiab25bd62012-02-10 18:29:12 -0500316 record('INFO', None, 'Start %s' % self._tag)
Chris Masone6fed6462011-10-20 16:36:43 -0700317 self.schedule(image_name, add_experimental)
318 try:
319 for result in self.wait_for_results():
Scott Zawalskiab25bd62012-02-10 18:29:12 -0500320 # |result| will be a tuple of a maximum of 4 entries and a
321 # minimum of 3. We use the first 3 for START and END
322 # entries so we separate those variables out for legible
323 # variable names, nothing more.
324 status = result[0]
325 test_name = result[2]
326 record('START', None, test_name)
Chris Masone6fed6462011-10-20 16:36:43 -0700327 record(*result)
Scott Zawalskiab25bd62012-02-10 18:29:12 -0500328 record('END %s' % status, None, test_name)
Chris Masone6fed6462011-10-20 16:36:43 -0700329 except Exception as e:
330 logging.error(e)
Scott Zawalskiab25bd62012-02-10 18:29:12 -0500331 record('FAIL', None, self._tag,
332 'Exception waiting for results')
Chris Masone6fed6462011-10-20 16:36:43 -0700333 except Exception as e:
334 logging.error(e)
Scott Zawalskiab25bd62012-02-10 18:29:12 -0500335 record('FAIL', None, self._tag,
336 'Exception while scheduling suite')
Chris Masone6fed6462011-10-20 16:36:43 -0700337
338
339 def schedule(self, image_name, add_experimental=True):
340 """
341 Schedule jobs using |self._afe|.
342
343 frontend.Job objects representing each scheduled job will be put in
344 |self._jobs|.
345
346 @param image_name: the name of an image against which to test.
347 @param add_experimental: schedule experimental tests as well, or not.
348 """
349 for test in self.stable_tests():
350 logging.debug('Scheduling %s', test.name)
351 self._jobs.append(self._create_job(test, image_name))
352
353 if add_experimental:
354 # TODO(cmasone): ensure I can log results from these differently.
355 for test in self.unstable_tests():
356 logging.debug('Scheduling %s', test.name)
357 self._jobs.append(self._create_job(test, image_name))
358
359
360 def _status_is_relevant(self, status):
361 """
362 Indicates whether the status of a given test is meaningful or not.
363
364 @param status: frontend.TestStatus object to look at.
365 @return True if this is a test result worth looking at further.
366 """
367 return not (status.test_name.startswith('SERVER_JOB') or
368 status.test_name.startswith('CLIENT_JOB'))
369
370
371 def _collate_aborted(self, current_value, entry):
372 """
373 reduce() over a list of HostQueueEntries for a job; True if any aborted.
374
375 Functor that can be reduced()ed over a list of
376 HostQueueEntries for a job. If any were aborted
377 (|entry.aborted| exists and is True), then the reduce() will
378 return True.
379
380 Ex:
381 entries = self._afe.run('get_host_queue_entries', job=job.id)
382 reduce(self._collate_aborted, entries, False)
383
384 @param current_value: the current accumulator (a boolean).
385 @param entry: the current entry under consideration.
386 @return the value of |entry.aborted| if it exists, False if not.
387 """
388 return current_value or ('aborted' in entry and entry['aborted'])
389
390
391 def wait_for_results(self):
392 """
393 Wait for results of all tests in all jobs in |self._jobs|.
394
395 Currently polls for results every 5s. When all results are available,
396 @return a list of tuples, one per test: (status, subdir, name, reason)
397 """
Chris Masone6fed6462011-10-20 16:36:43 -0700398 while self._jobs:
399 for job in list(self._jobs):
400 if not self._afe.get_jobs(id=job.id, finished=True):
401 continue
402
403 self._jobs.remove(job)
404
405 entries = self._afe.run('get_host_queue_entries', job=job.id)
406 if reduce(self._collate_aborted, entries, False):
Scott Zawalskiab25bd62012-02-10 18:29:12 -0500407 yield('ABORT', None, job.name)
Chris Masone6fed6462011-10-20 16:36:43 -0700408 else:
409 statuses = self._tko.get_status_counts(job=job.id)
410 for s in filter(self._status_is_relevant, statuses):
Scott Zawalskiab25bd62012-02-10 18:29:12 -0500411 yield(s.status, None, s.test_name, s.reason)
Chris Masone6fed6462011-10-20 16:36:43 -0700412 time.sleep(5)
413
Chris Masone6fed6462011-10-20 16:36:43 -0700414
Chris Masonefef21382012-01-17 11:16:32 -0800415 @staticmethod
416 def find_and_parse_tests(cf_getter, predicate, add_experimental=False):
Chris Masone6fed6462011-10-20 16:36:43 -0700417 """
418 Function to scan through all tests and find eligible tests.
419
420 Looks at control files returned by _cf_getter.get_control_file_list()
421 for tests that pass self._predicate().
422
423 @param cf_getter: a control_file_getter.ControlFileGetter used to list
424 and fetch the content of control files
425 @param predicate: a function that should return True when run over a
426 ControlData representation of a control file that should be in
427 this Suite.
428 @param add_experimental: add tests with experimental attribute set.
429
430 @return list of ControlData objects that should be run, with control
431 file text added in |text| attribute.
432 """
433 tests = {}
434 files = cf_getter.get_control_file_list()
435 for file in files:
436 text = cf_getter.get_control_file_contents(file)
437 try:
438 found_test = control_data.parse_control_string(text,
439 raise_warnings=True)
440 if not add_experimental and found_test.experimental:
441 continue
442
443 found_test.text = text
444 tests[file] = found_test
445 except control_data.ControlVariableException, e:
446 logging.warn("Skipping %s\n%s", file, e)
447 except Exception, e:
448 logging.error("Bad %s\n%s", file, e)
449
450 return [test for test in tests.itervalues() if predicate(test)]