blob: d184e689dd8f6acc152e3858550b917e93b7cc7b [file] [log] [blame]
Chris Masone8ac66712012-02-15 14:21:02 -08001# Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
Chris Masone6fed6462011-10-20 16:36:43 -07002# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5import common
6import compiler, logging, os, random, re, time
Chris Masone2ef1d4e2011-12-20 11:06:53 -08007from autotest_lib.client.common_lib import control_data, global_config, error
8from autotest_lib.client.common_lib import utils
Chris Masone8b7cd422012-02-22 13:16:11 -08009from autotest_lib.client.common_lib.cros import dev_server
Chris Masone8ac66712012-02-15 14:21:02 -080010from autotest_lib.server.cros import control_file_getter, frontend_wrappers
Chris Masone6fed6462011-10-20 16:36:43 -070011from autotest_lib.server import frontend
12
13
Scott Zawalski65650172012-02-16 11:48:26 -050014VERSION_PREFIX = 'cros-version:'
Chris Masone2ef1d4e2011-12-20 11:06:53 -080015CONFIG = global_config.global_config
16
17
Chris Masone8b764252012-01-17 11:12:51 -080018def inject_vars(vars, control_file_in):
19 """
20 Inject the contents of |vars| into |control_file_in|
21
22 @param vars: a dict to shoehorn into the provided control file string.
23 @param control_file_in: the contents of a control file to munge.
24 @return the modified control file string.
25 """
26 control_file = ''
27 for key, value in vars.iteritems():
28 control_file += "%s='%s'\n" % (key, value)
29 return control_file + control_file_in
30
31
Chris Masone2ef1d4e2011-12-20 11:06:53 -080032def _image_url_pattern():
33 return CONFIG.get_config_value('CROS', 'image_url_pattern', type=str)
34
35
36def _package_url_pattern():
37 return CONFIG.get_config_value('CROS', 'package_url_pattern', type=str)
38
Chris Masone6fed6462011-10-20 16:36:43 -070039
40class Reimager(object):
41 """
42 A class that can run jobs to reimage devices.
43
44 @var _afe: a frontend.AFE instance used to talk to autotest.
45 @var _tko: a frontend.TKO instance used to query the autotest results db.
46 @var _cf_getter: a ControlFileGetter used to get the AU control file.
47 """
48
49
Scott Zawalski65650172012-02-16 11:48:26 -050050 def __init__(self, autotest_dir, afe=None, tko=None, pool=None):
Chris Masone6fed6462011-10-20 16:36:43 -070051 """
52 Constructor
53
54 @param autotest_dir: the place to find autotests.
55 @param afe: an instance of AFE as defined in server/frontend.py.
56 @param tko: an instance of TKO as defined in server/frontend.py.
Scott Zawalski65650172012-02-16 11:48:26 -050057 @param pool: Specify the pool of machines to use for scheduling
58 purposes.
Chris Masone6fed6462011-10-20 16:36:43 -070059 """
Chris Masone8ac66712012-02-15 14:21:02 -080060 self._afe = afe or frontend_wrappers.RetryingAFE(timeout_min=30,
61 delay_sec=10,
62 debug=False)
63 self._tko = tko or frontend_wrappers.RetryingTKO(timeout_min=30,
64 delay_sec=10,
65 debug=False)
Scott Zawalski65650172012-02-16 11:48:26 -050066 self._pool = pool
Chris Masone6fed6462011-10-20 16:36:43 -070067 self._cf_getter = control_file_getter.FileSystemGetter(
68 [os.path.join(autotest_dir, 'server/site_tests')])
69
70
Chris Masone2ef1d4e2011-12-20 11:06:53 -080071 def skip(self, g):
72 return 'SKIP_IMAGE' in g and g['SKIP_IMAGE']
73
74
Scott Zawalski65650172012-02-16 11:48:26 -050075 def attempt(self, build, board, record, num=None, pool=None):
Chris Masone6fed6462011-10-20 16:36:43 -070076 """
77 Synchronously attempt to reimage some machines.
78
79 Fire off attempts to reimage |num| machines of type |board|, using an
Chris Masone8abb6fc2012-01-31 09:27:36 -080080 image at |url| called |build|. Wait for completion, polling every
Chris Masone6fed6462011-10-20 16:36:43 -070081 10s, and log results with |record| upon completion.
82
Chris Masone8abb6fc2012-01-31 09:27:36 -080083 @param build: the build to install e.g.
84 x86-alex-release/R18-1655.0.0-a1-b1584.
Chris Masone6fed6462011-10-20 16:36:43 -070085 @param board: which kind of devices to reimage.
86 @param record: callable that records job status.
87 prototype:
88 record(status, subdir, name, reason)
Chris Masone5552dd72012-02-15 15:01:04 -080089 @param num: how many devices to reimage.
Scott Zawalski65650172012-02-16 11:48:26 -050090 @param pool: Specify the pool of machines to use for scheduling
91 purposes.
Chris Masone6fed6462011-10-20 16:36:43 -070092 @return True if all reimaging jobs succeed, false otherwise.
93 """
Chris Masone5552dd72012-02-15 15:01:04 -080094 if not num:
95 num = CONFIG.get_config_value('CROS', 'sharding_factor', type=int)
Scott Zawalski65650172012-02-16 11:48:26 -050096 if pool:
97 self._pool = pool
98 logging.debug("scheduling reimaging across %d machines", num)
Chris Masone73f65022012-01-31 14:00:43 -080099 wrapper_job_name = 'try new image'
100 record('START', None, wrapper_job_name)
Chris Masone8abb6fc2012-01-31 09:27:36 -0800101 self._ensure_version_label(VERSION_PREFIX + build)
102 canary = self._schedule_reimage_job(build, num, board)
Chris Masone6fed6462011-10-20 16:36:43 -0700103 logging.debug('Created re-imaging job: %d', canary.id)
104 while len(self._afe.get_jobs(id=canary.id, not_yet_run=True)) > 0:
105 time.sleep(10)
106 logging.debug('Re-imaging job running.')
107 while len(self._afe.get_jobs(id=canary.id, finished=True)) == 0:
108 time.sleep(10)
109 logging.debug('Re-imaging job finished.')
110 canary.result = self._afe.poll_job_results(self._tko, canary, 0)
111
112 if canary.result is True:
113 self._report_results(canary, record)
Chris Masone73f65022012-01-31 14:00:43 -0800114 record('END GOOD', None, wrapper_job_name)
Chris Masone6fed6462011-10-20 16:36:43 -0700115 return True
116
117 if canary.result is None:
118 record('FAIL', None, canary.name, 're-imaging tasks did not run')
119 else: # canary.result is False
120 self._report_results(canary, record)
121
Chris Masone73f65022012-01-31 14:00:43 -0800122 record('END FAIL', None, wrapper_job_name)
Chris Masone6fed6462011-10-20 16:36:43 -0700123 return False
124
125
126 def _ensure_version_label(self, name):
127 """
128 Ensure that a label called |name| exists in the autotest DB.
129
130 @param name: the label to check for/create.
131 """
132 labels = self._afe.get_labels(name=name)
133 if len(labels) == 0:
134 self._afe.create_label(name=name)
135
136
Chris Masone8abb6fc2012-01-31 09:27:36 -0800137 def _schedule_reimage_job(self, build, num_machines, board):
Chris Masone6fed6462011-10-20 16:36:43 -0700138 """
139 Schedules the reimaging of |num_machines| |board| devices with |image|.
140
141 Sends an RPC to the autotest frontend to enqueue reimaging jobs on
142 |num_machines| devices of type |board|
143
Chris Masone8abb6fc2012-01-31 09:27:36 -0800144 @param build: the build to install (must be unique).
Chris Masone2ef1d4e2011-12-20 11:06:53 -0800145 @param num_machines: how many devices to reimage.
Chris Masone6fed6462011-10-20 16:36:43 -0700146 @param board: which kind of devices to reimage.
147 @return a frontend.Job object for the reimaging job we scheduled.
148 """
Chris Masone8b764252012-01-17 11:12:51 -0800149 control_file = inject_vars(
Chris Masone8abb6fc2012-01-31 09:27:36 -0800150 {'image_url': _image_url_pattern() % build, 'image_name': build},
Chris Masone6fed6462011-10-20 16:36:43 -0700151 self._cf_getter.get_control_file_contents_by_name('autoupdate'))
Scott Zawalski65650172012-02-16 11:48:26 -0500152 job_deps = []
153 if self._pool:
154 meta_host = 'pool:%s' % self._pool
155 board_label = 'board:%s' % board
156 job_deps.append(board_label)
157 else:
158 # No pool specified use board.
159 meta_host = 'board:%s' % board
Chris Masone6fed6462011-10-20 16:36:43 -0700160
Chris Masone2ef1d4e2011-12-20 11:06:53 -0800161 return self._afe.create_job(control_file=control_file,
Chris Masone8abb6fc2012-01-31 09:27:36 -0800162 name=build + '-try',
Chris Masone2ef1d4e2011-12-20 11:06:53 -0800163 control_type='Server',
Scott Zawalski65650172012-02-16 11:48:26 -0500164 meta_hosts=[meta_host] * num_machines,
165 dependencies=job_deps)
Chris Masone6fed6462011-10-20 16:36:43 -0700166
167
168 def _report_results(self, job, record):
169 """
170 Record results from a completed frontend.Job object.
171
172 @param job: a completed frontend.Job object populated by
173 frontend.AFE.poll_job_results.
174 @param record: callable that records job status.
175 prototype:
176 record(status, subdir, name, reason)
177 """
178 if job.result == True:
179 record('GOOD', None, job.name)
180 return
181
182 for platform in job.results_platform_map:
183 for status in job.results_platform_map[platform]:
184 if status == 'Total':
185 continue
186 for host in job.results_platform_map[platform][status]:
187 if host not in job.test_status:
188 record('ERROR', None, host, 'Job failed to run.')
189 elif status == 'Failed':
190 for test_status in job.test_status[host].fail:
191 record('FAIL', None, host, test_status.reason)
192 elif status == 'Aborted':
193 for test_status in job.test_status[host].fail:
194 record('ABORT', None, host, test_status.reason)
195 elif status == 'Completed':
196 record('GOOD', None, host)
197
198
199class Suite(object):
200 """
201 A suite of tests, defined by some predicate over control file variables.
202
203 Given a place to search for control files a predicate to match the desired
204 tests, can gather tests and fire off jobs to run them, and then wait for
205 results.
206
207 @var _predicate: a function that should return True when run over a
208 ControlData representation of a control file that should be in
209 this Suite.
210 @var _tag: a string with which to tag jobs run in this suite.
Chris Masone8b7cd422012-02-22 13:16:11 -0800211 @var _build: the build on which we're running this suite.
Chris Masone6fed6462011-10-20 16:36:43 -0700212 @var _afe: an instance of AFE as defined in server/frontend.py.
213 @var _tko: an instance of TKO as defined in server/frontend.py.
214 @var _jobs: currently scheduled jobs, if any.
215 @var _cf_getter: a control_file_getter.ControlFileGetter
216 """
217
218
Chris Masonefef21382012-01-17 11:16:32 -0800219 @staticmethod
Chris Masone8b7cd422012-02-22 13:16:11 -0800220 def create_cf_getter(build):
Chris Masonefef21382012-01-17 11:16:32 -0800221 """
Chris Masone8b7cd422012-02-22 13:16:11 -0800222 @param build: the build on which we're running this suite.
Chris Masonefef21382012-01-17 11:16:32 -0800223 @return a FileSystemGetter instance that looks under |autotest_dir|.
224 """
Chris Masone8b7cd422012-02-22 13:16:11 -0800225 return control_file_getter.DevServerGetter(
226 build, dev_server.DevServer.create())
Chris Masonefef21382012-01-17 11:16:32 -0800227
228
229 @staticmethod
Chris Masone8b7cd422012-02-22 13:16:11 -0800230 def create_from_name(name, build, afe=None, tko=None, pool=None):
Chris Masone6fed6462011-10-20 16:36:43 -0700231 """
232 Create a Suite using a predicate based on the SUITE control file var.
233
234 Makes a predicate based on |name| and uses it to instantiate a Suite
235 that looks for tests in |autotest_dir| and will schedule them using
236 |afe|. Results will be pulled from |tko| upon completion
237
238 @param name: a value of the SUITE control file variable to search for.
Chris Masone8b7cd422012-02-22 13:16:11 -0800239 @param build: the build on which we're running this suite.
Chris Masone6fed6462011-10-20 16:36:43 -0700240 @param afe: an instance of AFE as defined in server/frontend.py.
241 @param tko: an instance of TKO as defined in server/frontend.py.
Scott Zawalski65650172012-02-16 11:48:26 -0500242 @param pool: Specify the pool of machines to use for scheduling
243 purposes.
Chris Masone6fed6462011-10-20 16:36:43 -0700244 @return a Suite instance.
245 """
246 return Suite(lambda t: hasattr(t, 'suite') and t.suite == name,
Chris Masone8b7cd422012-02-22 13:16:11 -0800247 name, build, afe, tko, pool)
Chris Masone6fed6462011-10-20 16:36:43 -0700248
249
Chris Masone8b7cd422012-02-22 13:16:11 -0800250 def __init__(self, predicate, tag, build, afe=None, tko=None,
Scott Zawalski65650172012-02-16 11:48:26 -0500251 pool=None):
Chris Masone6fed6462011-10-20 16:36:43 -0700252 """
253 Constructor
254
255 @param predicate: a function that should return True when run over a
256 ControlData representation of a control file that should be in
257 this Suite.
258 @param tag: a string with which to tag jobs run in this suite.
Chris Masone8b7cd422012-02-22 13:16:11 -0800259 @param build: the build on which we're running this suite.
Chris Masone6fed6462011-10-20 16:36:43 -0700260 @param afe: an instance of AFE as defined in server/frontend.py.
261 @param tko: an instance of TKO as defined in server/frontend.py.
Scott Zawalski65650172012-02-16 11:48:26 -0500262 @param pool: Specify the pool of machines to use for scheduling
263 purposes.
Chris Masone6fed6462011-10-20 16:36:43 -0700264 """
265 self._predicate = predicate
266 self._tag = tag
Chris Masone8b7cd422012-02-22 13:16:11 -0800267 self._build = build
Chris Masone8ac66712012-02-15 14:21:02 -0800268 self._afe = afe or frontend_wrappers.RetryingAFE(timeout_min=30,
269 delay_sec=10,
270 debug=False)
271 self._tko = tko or frontend_wrappers.RetryingTKO(timeout_min=30,
272 delay_sec=10,
273 debug=False)
Scott Zawalski65650172012-02-16 11:48:26 -0500274 self._pool = pool
Chris Masone6fed6462011-10-20 16:36:43 -0700275 self._jobs = []
276
Chris Masone8b7cd422012-02-22 13:16:11 -0800277 self._cf_getter = Suite.create_cf_getter(self._build)
Chris Masone6fed6462011-10-20 16:36:43 -0700278
279 self._tests = Suite.find_and_parse_tests(self._cf_getter,
280 self._predicate,
281 add_experimental=True)
282
283
284 @property
285 def tests(self):
286 """
287 A list of ControlData objects in the suite, with added |text| attr.
288 """
289 return self._tests
290
291
292 def stable_tests(self):
293 """
294 |self.tests|, filtered for non-experimental tests.
295 """
296 return filter(lambda t: not t.experimental, self.tests)
297
298
299 def unstable_tests(self):
300 """
301 |self.tests|, filtered for experimental tests.
302 """
303 return filter(lambda t: t.experimental, self.tests)
304
305
Chris Masone8b7cd422012-02-22 13:16:11 -0800306 def _create_job(self, test):
Chris Masone6fed6462011-10-20 16:36:43 -0700307 """
308 Thin wrapper around frontend.AFE.create_job().
309
310 @param test: ControlData object for a test to run.
Chris Masone6fed6462011-10-20 16:36:43 -0700311 @return frontend.Job object for the job just scheduled.
312 """
Scott Zawalski65650172012-02-16 11:48:26 -0500313 job_deps = []
314 if self._pool:
315 meta_hosts = 'pool:%s' % self._pool
Chris Masone8b7cd422012-02-22 13:16:11 -0800316 cros_label = VERSION_PREFIX + self._build
Scott Zawalski65650172012-02-16 11:48:26 -0500317 job_deps.append(cros_label)
318 else:
319 # No pool specified use any machines with the following label.
Chris Masone8b7cd422012-02-22 13:16:11 -0800320 meta_hosts = VERSION_PREFIX + self._build
Scott Zawalski65650172012-02-16 11:48:26 -0500321
Chris Masone6fed6462011-10-20 16:36:43 -0700322 return self._afe.create_job(
323 control_file=test.text,
Chris Masone8b7cd422012-02-22 13:16:11 -0800324 name='/'.join([self._build, self._tag, test.name]),
Chris Masone6fed6462011-10-20 16:36:43 -0700325 control_type=test.test_type.capitalize(),
Scott Zawalski65650172012-02-16 11:48:26 -0500326 meta_hosts=[meta_hosts],
327 dependencies=job_deps)
Chris Masone6fed6462011-10-20 16:36:43 -0700328
329
Chris Masone8b7cd422012-02-22 13:16:11 -0800330 def run_and_wait(self, record, add_experimental=True):
Chris Masone6fed6462011-10-20 16:36:43 -0700331 """
332 Synchronously run tests in |self.tests|.
333
Chris Masone8b7cd422012-02-22 13:16:11 -0800334 Schedules tests against a device running image |self._build|, and
Chris Masone6fed6462011-10-20 16:36:43 -0700335 then polls for status, using |record| to print status when each
336 completes.
337
338 Tests returned by self.stable_tests() will always be run, while tests
339 in self.unstable_tests() will only be run if |add_experimental| is true.
340
Chris Masone6fed6462011-10-20 16:36:43 -0700341 @param record: callable that records job status.
342 prototype:
343 record(status, subdir, name, reason)
344 @param add_experimental: schedule experimental tests as well, or not.
345 """
346 try:
Scott Zawalskiab25bd62012-02-10 18:29:12 -0500347 record('INFO', None, 'Start %s' % self._tag)
Chris Masone8b7cd422012-02-22 13:16:11 -0800348 self.schedule(add_experimental)
Chris Masone6fed6462011-10-20 16:36:43 -0700349 try:
350 for result in self.wait_for_results():
Scott Zawalskiab25bd62012-02-10 18:29:12 -0500351 # |result| will be a tuple of a maximum of 4 entries and a
352 # minimum of 3. We use the first 3 for START and END
353 # entries so we separate those variables out for legible
354 # variable names, nothing more.
355 status = result[0]
356 test_name = result[2]
357 record('START', None, test_name)
Chris Masone6fed6462011-10-20 16:36:43 -0700358 record(*result)
Scott Zawalskiab25bd62012-02-10 18:29:12 -0500359 record('END %s' % status, None, test_name)
Chris Masone6fed6462011-10-20 16:36:43 -0700360 except Exception as e:
361 logging.error(e)
Scott Zawalskiab25bd62012-02-10 18:29:12 -0500362 record('FAIL', None, self._tag,
363 'Exception waiting for results')
Chris Masone6fed6462011-10-20 16:36:43 -0700364 except Exception as e:
365 logging.error(e)
Scott Zawalskiab25bd62012-02-10 18:29:12 -0500366 record('FAIL', None, self._tag,
367 'Exception while scheduling suite')
Chris Masone6fed6462011-10-20 16:36:43 -0700368
369
Chris Masone8b7cd422012-02-22 13:16:11 -0800370 def schedule(self, add_experimental=True):
Chris Masone6fed6462011-10-20 16:36:43 -0700371 """
372 Schedule jobs using |self._afe|.
373
374 frontend.Job objects representing each scheduled job will be put in
375 |self._jobs|.
376
Chris Masone6fed6462011-10-20 16:36:43 -0700377 @param add_experimental: schedule experimental tests as well, or not.
378 """
379 for test in self.stable_tests():
380 logging.debug('Scheduling %s', test.name)
Chris Masone8b7cd422012-02-22 13:16:11 -0800381 self._jobs.append(self._create_job(test))
Chris Masone6fed6462011-10-20 16:36:43 -0700382
383 if add_experimental:
384 # TODO(cmasone): ensure I can log results from these differently.
385 for test in self.unstable_tests():
386 logging.debug('Scheduling %s', test.name)
Chris Masone8b7cd422012-02-22 13:16:11 -0800387 self._jobs.append(self._create_job(test))
Chris Masone6fed6462011-10-20 16:36:43 -0700388
389
390 def _status_is_relevant(self, status):
391 """
392 Indicates whether the status of a given test is meaningful or not.
393
394 @param status: frontend.TestStatus object to look at.
395 @return True if this is a test result worth looking at further.
396 """
397 return not (status.test_name.startswith('SERVER_JOB') or
398 status.test_name.startswith('CLIENT_JOB'))
399
400
401 def _collate_aborted(self, current_value, entry):
402 """
403 reduce() over a list of HostQueueEntries for a job; True if any aborted.
404
405 Functor that can be reduced()ed over a list of
406 HostQueueEntries for a job. If any were aborted
407 (|entry.aborted| exists and is True), then the reduce() will
408 return True.
409
410 Ex:
411 entries = self._afe.run('get_host_queue_entries', job=job.id)
412 reduce(self._collate_aborted, entries, False)
413
414 @param current_value: the current accumulator (a boolean).
415 @param entry: the current entry under consideration.
416 @return the value of |entry.aborted| if it exists, False if not.
417 """
418 return current_value or ('aborted' in entry and entry['aborted'])
419
420
421 def wait_for_results(self):
422 """
423 Wait for results of all tests in all jobs in |self._jobs|.
424
425 Currently polls for results every 5s. When all results are available,
426 @return a list of tuples, one per test: (status, subdir, name, reason)
427 """
Chris Masone6fed6462011-10-20 16:36:43 -0700428 while self._jobs:
429 for job in list(self._jobs):
430 if not self._afe.get_jobs(id=job.id, finished=True):
431 continue
432
433 self._jobs.remove(job)
434
435 entries = self._afe.run('get_host_queue_entries', job=job.id)
436 if reduce(self._collate_aborted, entries, False):
Scott Zawalskiab25bd62012-02-10 18:29:12 -0500437 yield('ABORT', None, job.name)
Chris Masone6fed6462011-10-20 16:36:43 -0700438 else:
439 statuses = self._tko.get_status_counts(job=job.id)
440 for s in filter(self._status_is_relevant, statuses):
Scott Zawalskiab25bd62012-02-10 18:29:12 -0500441 yield(s.status, None, s.test_name, s.reason)
Chris Masone6fed6462011-10-20 16:36:43 -0700442 time.sleep(5)
443
Chris Masone6fed6462011-10-20 16:36:43 -0700444
Chris Masonefef21382012-01-17 11:16:32 -0800445 @staticmethod
446 def find_and_parse_tests(cf_getter, predicate, add_experimental=False):
Chris Masone6fed6462011-10-20 16:36:43 -0700447 """
448 Function to scan through all tests and find eligible tests.
449
450 Looks at control files returned by _cf_getter.get_control_file_list()
451 for tests that pass self._predicate().
452
453 @param cf_getter: a control_file_getter.ControlFileGetter used to list
454 and fetch the content of control files
455 @param predicate: a function that should return True when run over a
456 ControlData representation of a control file that should be in
457 this Suite.
458 @param add_experimental: add tests with experimental attribute set.
459
460 @return list of ControlData objects that should be run, with control
461 file text added in |text| attribute.
462 """
463 tests = {}
464 files = cf_getter.get_control_file_list()
465 for file in files:
466 text = cf_getter.get_control_file_contents(file)
467 try:
468 found_test = control_data.parse_control_string(text,
469 raise_warnings=True)
470 if not add_experimental and found_test.experimental:
471 continue
472
473 found_test.text = text
474 tests[file] = found_test
475 except control_data.ControlVariableException, e:
476 logging.warn("Skipping %s\n%s", file, e)
477 except Exception, e:
478 logging.error("Bad %s\n%s", file, e)
479
480 return [test for test in tests.itervalues() if predicate(test)]