blob: e14054dc4299436cbfa17233a6ef8b45c0e08915 [file] [log] [blame]
Chris Masone6fed6462011-10-20 16:36:43 -07001# Copyright (c) 2011 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5import common
6import compiler, logging, os, random, re, time
Chris Masone2ef1d4e2011-12-20 11:06:53 -08007from autotest_lib.client.common_lib import control_data, global_config, error
8from autotest_lib.client.common_lib import utils
Chris Masone6fed6462011-10-20 16:36:43 -07009from autotest_lib.server.cros import control_file_getter
10from autotest_lib.server import frontend
11
12
13VERSION_PREFIX = 'cros-version-'
Chris Masone2ef1d4e2011-12-20 11:06:53 -080014CONFIG = global_config.global_config
15
16
Chris Masone8b764252012-01-17 11:12:51 -080017def inject_vars(vars, control_file_in):
18 """
19 Inject the contents of |vars| into |control_file_in|
20
21 @param vars: a dict to shoehorn into the provided control file string.
22 @param control_file_in: the contents of a control file to munge.
23 @return the modified control file string.
24 """
25 control_file = ''
26 for key, value in vars.iteritems():
27 control_file += "%s='%s'\n" % (key, value)
28 return control_file + control_file_in
29
30
Chris Masone2ef1d4e2011-12-20 11:06:53 -080031def _image_url_pattern():
32 return CONFIG.get_config_value('CROS', 'image_url_pattern', type=str)
33
34
35def _package_url_pattern():
36 return CONFIG.get_config_value('CROS', 'package_url_pattern', type=str)
37
Chris Masone6fed6462011-10-20 16:36:43 -070038
39class Reimager(object):
40 """
41 A class that can run jobs to reimage devices.
42
43 @var _afe: a frontend.AFE instance used to talk to autotest.
44 @var _tko: a frontend.TKO instance used to query the autotest results db.
45 @var _cf_getter: a ControlFileGetter used to get the AU control file.
46 """
47
48
49 def __init__(self, autotest_dir, afe=None, tko=None):
50 """
51 Constructor
52
53 @param autotest_dir: the place to find autotests.
54 @param afe: an instance of AFE as defined in server/frontend.py.
55 @param tko: an instance of TKO as defined in server/frontend.py.
56 """
57 self._afe = afe or frontend.AFE(debug=False)
58 self._tko = tko or frontend.TKO(debug=False)
59 self._cf_getter = control_file_getter.FileSystemGetter(
60 [os.path.join(autotest_dir, 'server/site_tests')])
61
62
Chris Masone2ef1d4e2011-12-20 11:06:53 -080063 def skip(self, g):
64 return 'SKIP_IMAGE' in g and g['SKIP_IMAGE']
65
66
67 def attempt(self, name, num, board, record):
Chris Masone6fed6462011-10-20 16:36:43 -070068 """
69 Synchronously attempt to reimage some machines.
70
71 Fire off attempts to reimage |num| machines of type |board|, using an
72 image at |url| called |name|. Wait for completion, polling every
73 10s, and log results with |record| upon completion.
74
Chris Masone6fed6462011-10-20 16:36:43 -070075 @param name: the name of the image to install (must be unique).
76 @param num: how many devices to reimage.
77 @param board: which kind of devices to reimage.
78 @param record: callable that records job status.
79 prototype:
80 record(status, subdir, name, reason)
81 @return True if all reimaging jobs succeed, false otherwise.
82 """
83 record('START', None, 'try new image')
84 self._ensure_version_label(VERSION_PREFIX+name)
Chris Masone2ef1d4e2011-12-20 11:06:53 -080085 canary = self._schedule_reimage_job(name, num, board)
Chris Masone6fed6462011-10-20 16:36:43 -070086 logging.debug('Created re-imaging job: %d', canary.id)
87 while len(self._afe.get_jobs(id=canary.id, not_yet_run=True)) > 0:
88 time.sleep(10)
89 logging.debug('Re-imaging job running.')
90 while len(self._afe.get_jobs(id=canary.id, finished=True)) == 0:
91 time.sleep(10)
92 logging.debug('Re-imaging job finished.')
93 canary.result = self._afe.poll_job_results(self._tko, canary, 0)
94
95 if canary.result is True:
96 self._report_results(canary, record)
97 record('END GOOD', None, None)
98 return True
99
100 if canary.result is None:
101 record('FAIL', None, canary.name, 're-imaging tasks did not run')
102 else: # canary.result is False
103 self._report_results(canary, record)
104
105 record('END FAIL', None, None)
106 return False
107
108
109 def _ensure_version_label(self, name):
110 """
111 Ensure that a label called |name| exists in the autotest DB.
112
113 @param name: the label to check for/create.
114 """
115 labels = self._afe.get_labels(name=name)
116 if len(labels) == 0:
117 self._afe.create_label(name=name)
118
119
Chris Masone2ef1d4e2011-12-20 11:06:53 -0800120 def _schedule_reimage_job(self, name, num_machines, board):
Chris Masone6fed6462011-10-20 16:36:43 -0700121 """
122 Schedules the reimaging of |num_machines| |board| devices with |image|.
123
124 Sends an RPC to the autotest frontend to enqueue reimaging jobs on
125 |num_machines| devices of type |board|
126
Chris Masone6fed6462011-10-20 16:36:43 -0700127 @param name: the name of the image to install (must be unique).
Chris Masone2ef1d4e2011-12-20 11:06:53 -0800128 @param num_machines: how many devices to reimage.
Chris Masone6fed6462011-10-20 16:36:43 -0700129 @param board: which kind of devices to reimage.
130 @return a frontend.Job object for the reimaging job we scheduled.
131 """
Chris Masone8b764252012-01-17 11:12:51 -0800132 control_file = inject_vars(
Chris Masone2ef1d4e2011-12-20 11:06:53 -0800133 { 'image_url': _image_url_pattern() % name,
Chris Masone6fed6462011-10-20 16:36:43 -0700134 'image_name': name },
135 self._cf_getter.get_control_file_contents_by_name('autoupdate'))
136
Chris Masone2ef1d4e2011-12-20 11:06:53 -0800137 return self._afe.create_job(control_file=control_file,
138 name=name + '-try',
139 control_type='Server',
140 meta_hosts=[board] * num_machines)
Chris Masone6fed6462011-10-20 16:36:43 -0700141
142
143 def _report_results(self, job, record):
144 """
145 Record results from a completed frontend.Job object.
146
147 @param job: a completed frontend.Job object populated by
148 frontend.AFE.poll_job_results.
149 @param record: callable that records job status.
150 prototype:
151 record(status, subdir, name, reason)
152 """
153 if job.result == True:
154 record('GOOD', None, job.name)
155 return
156
157 for platform in job.results_platform_map:
158 for status in job.results_platform_map[platform]:
159 if status == 'Total':
160 continue
161 for host in job.results_platform_map[platform][status]:
162 if host not in job.test_status:
163 record('ERROR', None, host, 'Job failed to run.')
164 elif status == 'Failed':
165 for test_status in job.test_status[host].fail:
166 record('FAIL', None, host, test_status.reason)
167 elif status == 'Aborted':
168 for test_status in job.test_status[host].fail:
169 record('ABORT', None, host, test_status.reason)
170 elif status == 'Completed':
171 record('GOOD', None, host)
172
173
174class Suite(object):
175 """
176 A suite of tests, defined by some predicate over control file variables.
177
178 Given a place to search for control files a predicate to match the desired
179 tests, can gather tests and fire off jobs to run them, and then wait for
180 results.
181
182 @var _predicate: a function that should return True when run over a
183 ControlData representation of a control file that should be in
184 this Suite.
185 @var _tag: a string with which to tag jobs run in this suite.
186 @var _afe: an instance of AFE as defined in server/frontend.py.
187 @var _tko: an instance of TKO as defined in server/frontend.py.
188 @var _jobs: currently scheduled jobs, if any.
189 @var _cf_getter: a control_file_getter.ControlFileGetter
190 """
191
192
Chris Masonefef21382012-01-17 11:16:32 -0800193 @staticmethod
194 def create_fs_getter(autotest_dir):
195 """
196 @param autotest_dir: the place to find autotests.
197 @return a FileSystemGetter instance that looks under |autotest_dir|.
198 """
199 # currently hard-coded places to look for tests.
200 subpaths = ['server/site_tests', 'client/site_tests']
201 directories = [os.path.join(autotest_dir, p) for p in subpaths]
202 return control_file_getter.FileSystemGetter(directories)
203
204
205 @staticmethod
206 def create_from_name(name, autotest_dir, afe=None, tko=None):
Chris Masone6fed6462011-10-20 16:36:43 -0700207 """
208 Create a Suite using a predicate based on the SUITE control file var.
209
210 Makes a predicate based on |name| and uses it to instantiate a Suite
211 that looks for tests in |autotest_dir| and will schedule them using
212 |afe|. Results will be pulled from |tko| upon completion
213
214 @param name: a value of the SUITE control file variable to search for.
215 @param autotest_dir: the place to find autotests.
216 @param afe: an instance of AFE as defined in server/frontend.py.
217 @param tko: an instance of TKO as defined in server/frontend.py.
218 @return a Suite instance.
219 """
220 return Suite(lambda t: hasattr(t, 'suite') and t.suite == name,
221 name, autotest_dir, afe, tko)
222
223
224 def __init__(self, predicate, tag, autotest_dir, afe=None, tko=None):
225 """
226 Constructor
227
228 @param predicate: a function that should return True when run over a
229 ControlData representation of a control file that should be in
230 this Suite.
231 @param tag: a string with which to tag jobs run in this suite.
232 @param autotest_dir: the place to find autotests.
233 @param afe: an instance of AFE as defined in server/frontend.py.
234 @param tko: an instance of TKO as defined in server/frontend.py.
235 """
236 self._predicate = predicate
237 self._tag = tag
238 self._afe = afe or frontend.AFE(debug=False)
239 self._tko = tko or frontend.TKO(debug=False)
240 self._jobs = []
241
Chris Masonefef21382012-01-17 11:16:32 -0800242 self._cf_getter = Suite.create_fs_getter(autotest_dir)
Chris Masone6fed6462011-10-20 16:36:43 -0700243
244 self._tests = Suite.find_and_parse_tests(self._cf_getter,
245 self._predicate,
246 add_experimental=True)
247
248
249 @property
250 def tests(self):
251 """
252 A list of ControlData objects in the suite, with added |text| attr.
253 """
254 return self._tests
255
256
257 def stable_tests(self):
258 """
259 |self.tests|, filtered for non-experimental tests.
260 """
261 return filter(lambda t: not t.experimental, self.tests)
262
263
264 def unstable_tests(self):
265 """
266 |self.tests|, filtered for experimental tests.
267 """
268 return filter(lambda t: t.experimental, self.tests)
269
270
271 def _create_job(self, test, image_name):
272 """
273 Thin wrapper around frontend.AFE.create_job().
274
275 @param test: ControlData object for a test to run.
276 @param image_name: the name of an image against which to test.
277 @return frontend.Job object for the job just scheduled.
278 """
279 return self._afe.create_job(
280 control_file=test.text,
281 name='/'.join([image_name, self._tag, test.name]),
282 control_type=test.test_type.capitalize(),
283 meta_hosts=[VERSION_PREFIX+image_name])
284
285
286 def run_and_wait(self, image_name, record, add_experimental=True):
287 """
288 Synchronously run tests in |self.tests|.
289
290 Schedules tests against a device running image |image_name|, and
291 then polls for status, using |record| to print status when each
292 completes.
293
294 Tests returned by self.stable_tests() will always be run, while tests
295 in self.unstable_tests() will only be run if |add_experimental| is true.
296
297 @param image_name: the name of an image against which to test.
298 @param record: callable that records job status.
299 prototype:
300 record(status, subdir, name, reason)
301 @param add_experimental: schedule experimental tests as well, or not.
302 """
303 try:
304 record('START', None, self._tag)
305 self.schedule(image_name, add_experimental)
306 try:
307 for result in self.wait_for_results():
308 record(*result)
309 record('END GOOD', None, None)
310 except Exception as e:
311 logging.error(e)
312 record('END ERROR', None, None, 'Exception waiting for results')
313 except Exception as e:
314 logging.error(e)
315 record('END ERROR', None, None, 'Exception while scheduling suite')
316
317
318 def schedule(self, image_name, add_experimental=True):
319 """
320 Schedule jobs using |self._afe|.
321
322 frontend.Job objects representing each scheduled job will be put in
323 |self._jobs|.
324
325 @param image_name: the name of an image against which to test.
326 @param add_experimental: schedule experimental tests as well, or not.
327 """
328 for test in self.stable_tests():
329 logging.debug('Scheduling %s', test.name)
330 self._jobs.append(self._create_job(test, image_name))
331
332 if add_experimental:
333 # TODO(cmasone): ensure I can log results from these differently.
334 for test in self.unstable_tests():
335 logging.debug('Scheduling %s', test.name)
336 self._jobs.append(self._create_job(test, image_name))
337
338
339 def _status_is_relevant(self, status):
340 """
341 Indicates whether the status of a given test is meaningful or not.
342
343 @param status: frontend.TestStatus object to look at.
344 @return True if this is a test result worth looking at further.
345 """
346 return not (status.test_name.startswith('SERVER_JOB') or
347 status.test_name.startswith('CLIENT_JOB'))
348
349
350 def _collate_aborted(self, current_value, entry):
351 """
352 reduce() over a list of HostQueueEntries for a job; True if any aborted.
353
354 Functor that can be reduced()ed over a list of
355 HostQueueEntries for a job. If any were aborted
356 (|entry.aborted| exists and is True), then the reduce() will
357 return True.
358
359 Ex:
360 entries = self._afe.run('get_host_queue_entries', job=job.id)
361 reduce(self._collate_aborted, entries, False)
362
363 @param current_value: the current accumulator (a boolean).
364 @param entry: the current entry under consideration.
365 @return the value of |entry.aborted| if it exists, False if not.
366 """
367 return current_value or ('aborted' in entry and entry['aborted'])
368
369
370 def wait_for_results(self):
371 """
372 Wait for results of all tests in all jobs in |self._jobs|.
373
374 Currently polls for results every 5s. When all results are available,
375 @return a list of tuples, one per test: (status, subdir, name, reason)
376 """
377 results = []
378 while self._jobs:
379 for job in list(self._jobs):
380 if not self._afe.get_jobs(id=job.id, finished=True):
381 continue
382
383 self._jobs.remove(job)
384
385 entries = self._afe.run('get_host_queue_entries', job=job.id)
386 if reduce(self._collate_aborted, entries, False):
387 results.append(('ABORT', None, job.name))
388 else:
389 statuses = self._tko.get_status_counts(job=job.id)
390 for s in filter(self._status_is_relevant, statuses):
391 results.append((s.status, None, s.test_name, s.reason))
392 time.sleep(5)
393
394 return results
395
396
Chris Masonefef21382012-01-17 11:16:32 -0800397 @staticmethod
398 def find_and_parse_tests(cf_getter, predicate, add_experimental=False):
Chris Masone6fed6462011-10-20 16:36:43 -0700399 """
400 Function to scan through all tests and find eligible tests.
401
402 Looks at control files returned by _cf_getter.get_control_file_list()
403 for tests that pass self._predicate().
404
405 @param cf_getter: a control_file_getter.ControlFileGetter used to list
406 and fetch the content of control files
407 @param predicate: a function that should return True when run over a
408 ControlData representation of a control file that should be in
409 this Suite.
410 @param add_experimental: add tests with experimental attribute set.
411
412 @return list of ControlData objects that should be run, with control
413 file text added in |text| attribute.
414 """
415 tests = {}
416 files = cf_getter.get_control_file_list()
417 for file in files:
418 text = cf_getter.get_control_file_contents(file)
419 try:
420 found_test = control_data.parse_control_string(text,
421 raise_warnings=True)
422 if not add_experimental and found_test.experimental:
423 continue
424
425 found_test.text = text
426 tests[file] = found_test
427 except control_data.ControlVariableException, e:
428 logging.warn("Skipping %s\n%s", file, e)
429 except Exception, e:
430 logging.error("Bad %s\n%s", file, e)
431
432 return [test for test in tests.itervalues() if predicate(test)]