blob: 066661d30256b4d1a9c4c7f8403b6a08697b8a5d [file] [log] [blame]
Chris Masone6fed6462011-10-20 16:36:43 -07001# Copyright (c) 2011 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
4
5import common
6import compiler, logging, os, random, re, time
7from autotest_lib.client.common_lib import control_data, error, utils
8from autotest_lib.server.cros import control_file_getter
9from autotest_lib.server import frontend
10
11
12VERSION_PREFIX = 'cros-version-'
13
14class Reimager(object):
15 """
16 A class that can run jobs to reimage devices.
17
18 @var _afe: a frontend.AFE instance used to talk to autotest.
19 @var _tko: a frontend.TKO instance used to query the autotest results db.
20 @var _cf_getter: a ControlFileGetter used to get the AU control file.
21 """
22
23
24 def __init__(self, autotest_dir, afe=None, tko=None):
25 """
26 Constructor
27
28 @param autotest_dir: the place to find autotests.
29 @param afe: an instance of AFE as defined in server/frontend.py.
30 @param tko: an instance of TKO as defined in server/frontend.py.
31 """
32 self._afe = afe or frontend.AFE(debug=False)
33 self._tko = tko or frontend.TKO(debug=False)
34 self._cf_getter = control_file_getter.FileSystemGetter(
35 [os.path.join(autotest_dir, 'server/site_tests')])
36
37
38 def attempt(self, url, name, num, board, record):
39 """
40 Synchronously attempt to reimage some machines.
41
42 Fire off attempts to reimage |num| machines of type |board|, using an
43 image at |url| called |name|. Wait for completion, polling every
44 10s, and log results with |record| upon completion.
45
46 @param url: the URL of the image to install.
47 @param name: the name of the image to install (must be unique).
48 @param num: how many devices to reimage.
49 @param board: which kind of devices to reimage.
50 @param record: callable that records job status.
51 prototype:
52 record(status, subdir, name, reason)
53 @return True if all reimaging jobs succeed, false otherwise.
54 """
55 record('START', None, 'try new image')
56 self._ensure_version_label(VERSION_PREFIX+name)
57 canary = self._schedule_reimage_job(url, name, num, board)
58 logging.debug('Created re-imaging job: %d', canary.id)
59 while len(self._afe.get_jobs(id=canary.id, not_yet_run=True)) > 0:
60 time.sleep(10)
61 logging.debug('Re-imaging job running.')
62 while len(self._afe.get_jobs(id=canary.id, finished=True)) == 0:
63 time.sleep(10)
64 logging.debug('Re-imaging job finished.')
65 canary.result = self._afe.poll_job_results(self._tko, canary, 0)
66
67 if canary.result is True:
68 self._report_results(canary, record)
69 record('END GOOD', None, None)
70 return True
71
72 if canary.result is None:
73 record('FAIL', None, canary.name, 're-imaging tasks did not run')
74 else: # canary.result is False
75 self._report_results(canary, record)
76
77 record('END FAIL', None, None)
78 return False
79
80
81 def _ensure_version_label(self, name):
82 """
83 Ensure that a label called |name| exists in the autotest DB.
84
85 @param name: the label to check for/create.
86 """
87 labels = self._afe.get_labels(name=name)
88 if len(labels) == 0:
89 self._afe.create_label(name=name)
90
91
92 def _inject_vars(self, vars, control_file_in):
93 """
94 Inject the contents of |vars| into |control_file_in|
95
96 @param vars: a dict to shoehorn into the provided control file string.
97 @param control_file_in: the contents of a control file to munge.
98 @return the modified control file string.
99 """
100 control_file = ''
101 for key, value in vars.iteritems():
102 control_file += "%s='%s'\n" % (key, value)
103 return control_file + control_file_in
104
105
106 def _schedule_reimage_job(self, url, name, num_machines, board):
107 """
108 Schedules the reimaging of |num_machines| |board| devices with |image|.
109
110 Sends an RPC to the autotest frontend to enqueue reimaging jobs on
111 |num_machines| devices of type |board|
112
113 @param url: the URL of the image to install.
114 @param name: the name of the image to install (must be unique).
115 @param num: how many devices to reimage.
116 @param board: which kind of devices to reimage.
117 @return a frontend.Job object for the reimaging job we scheduled.
118 """
119 control_file = self._inject_vars(
120 { 'image_url': url,
121 'image_name': name },
122 self._cf_getter.get_control_file_contents_by_name('autoupdate'))
123
124 dargs = { 'control_file': control_file,
125 'name': name + '-try',
126 'control_type': 'Server',
127 'meta_hosts': [board] * num_machines }
128
129 return self._afe.create_job(**dargs)
130
131
132 def _report_results(self, job, record):
133 """
134 Record results from a completed frontend.Job object.
135
136 @param job: a completed frontend.Job object populated by
137 frontend.AFE.poll_job_results.
138 @param record: callable that records job status.
139 prototype:
140 record(status, subdir, name, reason)
141 """
142 if job.result == True:
143 record('GOOD', None, job.name)
144 return
145
146 for platform in job.results_platform_map:
147 for status in job.results_platform_map[platform]:
148 if status == 'Total':
149 continue
150 for host in job.results_platform_map[platform][status]:
151 if host not in job.test_status:
152 record('ERROR', None, host, 'Job failed to run.')
153 elif status == 'Failed':
154 for test_status in job.test_status[host].fail:
155 record('FAIL', None, host, test_status.reason)
156 elif status == 'Aborted':
157 for test_status in job.test_status[host].fail:
158 record('ABORT', None, host, test_status.reason)
159 elif status == 'Completed':
160 record('GOOD', None, host)
161
162
163class Suite(object):
164 """
165 A suite of tests, defined by some predicate over control file variables.
166
167 Given a place to search for control files a predicate to match the desired
168 tests, can gather tests and fire off jobs to run them, and then wait for
169 results.
170
171 @var _predicate: a function that should return True when run over a
172 ControlData representation of a control file that should be in
173 this Suite.
174 @var _tag: a string with which to tag jobs run in this suite.
175 @var _afe: an instance of AFE as defined in server/frontend.py.
176 @var _tko: an instance of TKO as defined in server/frontend.py.
177 @var _jobs: currently scheduled jobs, if any.
178 @var _cf_getter: a control_file_getter.ControlFileGetter
179 """
180
181
182 @classmethod
183 def create_from_name(cls, name, autotest_dir, afe=None, tko=None):
184 """
185 Create a Suite using a predicate based on the SUITE control file var.
186
187 Makes a predicate based on |name| and uses it to instantiate a Suite
188 that looks for tests in |autotest_dir| and will schedule them using
189 |afe|. Results will be pulled from |tko| upon completion
190
191 @param name: a value of the SUITE control file variable to search for.
192 @param autotest_dir: the place to find autotests.
193 @param afe: an instance of AFE as defined in server/frontend.py.
194 @param tko: an instance of TKO as defined in server/frontend.py.
195 @return a Suite instance.
196 """
197 return Suite(lambda t: hasattr(t, 'suite') and t.suite == name,
198 name, autotest_dir, afe, tko)
199
200
201 def __init__(self, predicate, tag, autotest_dir, afe=None, tko=None):
202 """
203 Constructor
204
205 @param predicate: a function that should return True when run over a
206 ControlData representation of a control file that should be in
207 this Suite.
208 @param tag: a string with which to tag jobs run in this suite.
209 @param autotest_dir: the place to find autotests.
210 @param afe: an instance of AFE as defined in server/frontend.py.
211 @param tko: an instance of TKO as defined in server/frontend.py.
212 """
213 self._predicate = predicate
214 self._tag = tag
215 self._afe = afe or frontend.AFE(debug=False)
216 self._tko = tko or frontend.TKO(debug=False)
217 self._jobs = []
218
219 # currently hard-coded places to look for tests.
220 subpaths = [ 'server/site_tests', 'client/site_tests']
221 directories = [ os.path.join(autotest_dir, p) for p in subpaths ]
222 self._cf_getter = control_file_getter.FileSystemGetter(directories)
223
224 self._tests = Suite.find_and_parse_tests(self._cf_getter,
225 self._predicate,
226 add_experimental=True)
227
228
229 @property
230 def tests(self):
231 """
232 A list of ControlData objects in the suite, with added |text| attr.
233 """
234 return self._tests
235
236
237 def stable_tests(self):
238 """
239 |self.tests|, filtered for non-experimental tests.
240 """
241 return filter(lambda t: not t.experimental, self.tests)
242
243
244 def unstable_tests(self):
245 """
246 |self.tests|, filtered for experimental tests.
247 """
248 return filter(lambda t: t.experimental, self.tests)
249
250
251 def _create_job(self, test, image_name):
252 """
253 Thin wrapper around frontend.AFE.create_job().
254
255 @param test: ControlData object for a test to run.
256 @param image_name: the name of an image against which to test.
257 @return frontend.Job object for the job just scheduled.
258 """
259 return self._afe.create_job(
260 control_file=test.text,
261 name='/'.join([image_name, self._tag, test.name]),
262 control_type=test.test_type.capitalize(),
263 meta_hosts=[VERSION_PREFIX+image_name])
264
265
266 def run_and_wait(self, image_name, record, add_experimental=True):
267 """
268 Synchronously run tests in |self.tests|.
269
270 Schedules tests against a device running image |image_name|, and
271 then polls for status, using |record| to print status when each
272 completes.
273
274 Tests returned by self.stable_tests() will always be run, while tests
275 in self.unstable_tests() will only be run if |add_experimental| is true.
276
277 @param image_name: the name of an image against which to test.
278 @param record: callable that records job status.
279 prototype:
280 record(status, subdir, name, reason)
281 @param add_experimental: schedule experimental tests as well, or not.
282 """
283 try:
284 record('START', None, self._tag)
285 self.schedule(image_name, add_experimental)
286 try:
287 for result in self.wait_for_results():
288 record(*result)
289 record('END GOOD', None, None)
290 except Exception as e:
291 logging.error(e)
292 record('END ERROR', None, None, 'Exception waiting for results')
293 except Exception as e:
294 logging.error(e)
295 record('END ERROR', None, None, 'Exception while scheduling suite')
296
297
298 def schedule(self, image_name, add_experimental=True):
299 """
300 Schedule jobs using |self._afe|.
301
302 frontend.Job objects representing each scheduled job will be put in
303 |self._jobs|.
304
305 @param image_name: the name of an image against which to test.
306 @param add_experimental: schedule experimental tests as well, or not.
307 """
308 for test in self.stable_tests():
309 logging.debug('Scheduling %s', test.name)
310 self._jobs.append(self._create_job(test, image_name))
311
312 if add_experimental:
313 # TODO(cmasone): ensure I can log results from these differently.
314 for test in self.unstable_tests():
315 logging.debug('Scheduling %s', test.name)
316 self._jobs.append(self._create_job(test, image_name))
317
318
319 def _status_is_relevant(self, status):
320 """
321 Indicates whether the status of a given test is meaningful or not.
322
323 @param status: frontend.TestStatus object to look at.
324 @return True if this is a test result worth looking at further.
325 """
326 return not (status.test_name.startswith('SERVER_JOB') or
327 status.test_name.startswith('CLIENT_JOB'))
328
329
330 def _collate_aborted(self, current_value, entry):
331 """
332 reduce() over a list of HostQueueEntries for a job; True if any aborted.
333
334 Functor that can be reduced()ed over a list of
335 HostQueueEntries for a job. If any were aborted
336 (|entry.aborted| exists and is True), then the reduce() will
337 return True.
338
339 Ex:
340 entries = self._afe.run('get_host_queue_entries', job=job.id)
341 reduce(self._collate_aborted, entries, False)
342
343 @param current_value: the current accumulator (a boolean).
344 @param entry: the current entry under consideration.
345 @return the value of |entry.aborted| if it exists, False if not.
346 """
347 return current_value or ('aborted' in entry and entry['aborted'])
348
349
350 def wait_for_results(self):
351 """
352 Wait for results of all tests in all jobs in |self._jobs|.
353
354 Currently polls for results every 5s. When all results are available,
355 @return a list of tuples, one per test: (status, subdir, name, reason)
356 """
357 results = []
358 while self._jobs:
359 for job in list(self._jobs):
360 if not self._afe.get_jobs(id=job.id, finished=True):
361 continue
362
363 self._jobs.remove(job)
364
365 entries = self._afe.run('get_host_queue_entries', job=job.id)
366 if reduce(self._collate_aborted, entries, False):
367 results.append(('ABORT', None, job.name))
368 else:
369 statuses = self._tko.get_status_counts(job=job.id)
370 for s in filter(self._status_is_relevant, statuses):
371 results.append((s.status, None, s.test_name, s.reason))
372 time.sleep(5)
373
374 return results
375
376
377 @classmethod
378 def find_and_parse_tests(cls, cf_getter, predicate, add_experimental=False):
379 """
380 Function to scan through all tests and find eligible tests.
381
382 Looks at control files returned by _cf_getter.get_control_file_list()
383 for tests that pass self._predicate().
384
385 @param cf_getter: a control_file_getter.ControlFileGetter used to list
386 and fetch the content of control files
387 @param predicate: a function that should return True when run over a
388 ControlData representation of a control file that should be in
389 this Suite.
390 @param add_experimental: add tests with experimental attribute set.
391
392 @return list of ControlData objects that should be run, with control
393 file text added in |text| attribute.
394 """
395 tests = {}
396 files = cf_getter.get_control_file_list()
397 for file in files:
398 text = cf_getter.get_control_file_contents(file)
399 try:
400 found_test = control_data.parse_control_string(text,
401 raise_warnings=True)
402 if not add_experimental and found_test.experimental:
403 continue
404
405 found_test.text = text
406 tests[file] = found_test
407 except control_data.ControlVariableException, e:
408 logging.warn("Skipping %s\n%s", file, e)
409 except Exception, e:
410 logging.error("Bad %s\n%s", file, e)
411
412 return [test for test in tests.itervalues() if predicate(test)]