blob: 1d8c86f268cf5a04c420af0fe016243148fa310e [file] [log] [blame]
mbligh57e78662008-06-17 19:53:49 +00001"""
2The main job wrapper for the server side.
3
4This is the core infrastructure. Derived from the client side job.py
5
6Copyright Martin J. Bligh, Andy Whitcroft 2007
7"""
8
jadmanski6bb32d72009-03-19 20:25:24 +00009import getpass, os, sys, re, stat, tempfile, time, select, subprocess
showardb18134f2009-03-20 20:52:18 +000010import traceback, shutil, warnings, fcntl, pickle, logging, logging.config
jadmanskic09fc152008-10-15 17:56:59 +000011from autotest_lib.client.bin import fd_stack, sysinfo
mbligh09108442008-10-15 16:27:38 +000012from autotest_lib.client.common_lib import error, log, utils, packages
jadmanski043e1132008-11-19 17:10:32 +000013from autotest_lib.server import test, subcommand, profilers
jadmanski10646442008-08-13 14:05:21 +000014from autotest_lib.tko import db as tko_db, status_lib, utils as tko_utils
jadmanski10646442008-08-13 14:05:21 +000015
16
mbligh084bc172008-10-18 14:02:45 +000017def _control_segment_path(name):
18 """Get the pathname of the named control segment file."""
jadmanski10646442008-08-13 14:05:21 +000019 server_dir = os.path.dirname(os.path.abspath(__file__))
mbligh084bc172008-10-18 14:02:45 +000020 return os.path.join(server_dir, "control_segments", name)
jadmanski10646442008-08-13 14:05:21 +000021
22
mbligh084bc172008-10-18 14:02:45 +000023CLIENT_CONTROL_FILENAME = 'control'
24SERVER_CONTROL_FILENAME = 'control.srv'
25MACHINES_FILENAME = '.machines'
jadmanski10646442008-08-13 14:05:21 +000026
mbligh084bc172008-10-18 14:02:45 +000027CLIENT_WRAPPER_CONTROL_FILE = _control_segment_path('client_wrapper')
28CRASHDUMPS_CONTROL_FILE = _control_segment_path('crashdumps')
29CRASHINFO_CONTROL_FILE = _control_segment_path('crashinfo')
mbligh084bc172008-10-18 14:02:45 +000030INSTALL_CONTROL_FILE = _control_segment_path('install')
showard45ae8192008-11-05 19:32:53 +000031CLEANUP_CONTROL_FILE = _control_segment_path('cleanup')
jadmanski10646442008-08-13 14:05:21 +000032
mbligh084bc172008-10-18 14:02:45 +000033VERIFY_CONTROL_FILE = _control_segment_path('verify')
mbligh084bc172008-10-18 14:02:45 +000034REPAIR_CONTROL_FILE = _control_segment_path('repair')
jadmanski10646442008-08-13 14:05:21 +000035
36
mbligh062ed152009-01-13 00:57:14 +000037# by default provide a stub that generates no site data
38def _get_site_job_data_dummy(job):
39 return {}
40
41
jadmanski10646442008-08-13 14:05:21 +000042# load up site-specific code for generating site-specific job data
mbligh062ed152009-01-13 00:57:14 +000043get_site_job_data = utils.import_site_function(__file__,
jadmanskic0a623d2009-03-03 21:11:48 +000044 "autotest_lib.server.site_server_job", "get_site_job_data",
mbligh062ed152009-01-13 00:57:14 +000045 _get_site_job_data_dummy)
jadmanski10646442008-08-13 14:05:21 +000046
47
48class base_server_job(object):
mbligh2b92b862008-11-22 13:25:32 +000049 """
50 The actual job against which we do everything.
jadmanski10646442008-08-13 14:05:21 +000051
52 Properties:
53 autodir
54 The top level autotest directory (/usr/local/autotest).
55 serverdir
56 <autodir>/server/
57 clientdir
58 <autodir>/client/
59 conmuxdir
60 <autodir>/conmux/
61 testdir
62 <autodir>/server/tests/
63 site_testdir
64 <autodir>/server/site_tests/
65 control
66 the control file for this job
mblighb5dac432008-11-27 00:38:44 +000067 drop_caches_between_iterations
68 drop the pagecache between each iteration
jadmanski10646442008-08-13 14:05:21 +000069 """
70
71 STATUS_VERSION = 1
72
73
74 def __init__(self, control, args, resultdir, label, user, machines,
75 client=False, parse_job='',
76 ssh_user='root', ssh_port=22, ssh_pass=''):
77 """
mblighb5dac432008-11-27 00:38:44 +000078 Server side job object.
79
80 Parameters:
81 control: The control file (pathname of)
82 args: args to pass to the control file
83 resultdir: where to throw the results
84 label: label for the job
85 user: Username for the job (email address)
86 client: True if a client-side control file
jadmanski10646442008-08-13 14:05:21 +000087 """
88 path = os.path.dirname(__file__)
89 self.autodir = os.path.abspath(os.path.join(path, '..'))
90 self.serverdir = os.path.join(self.autodir, 'server')
91 self.testdir = os.path.join(self.serverdir, 'tests')
92 self.site_testdir = os.path.join(self.serverdir, 'site_tests')
93 self.tmpdir = os.path.join(self.serverdir, 'tmp')
94 self.conmuxdir = os.path.join(self.autodir, 'conmux')
95 self.clientdir = os.path.join(self.autodir, 'client')
96 self.toolsdir = os.path.join(self.autodir, 'client/tools')
97 if control:
jadmanskie432dd22009-01-30 15:04:51 +000098 self.control = self._load_control_file(control)
jadmanski10646442008-08-13 14:05:21 +000099 else:
showard45ae8192008-11-05 19:32:53 +0000100 self.control = ''
jadmanski10646442008-08-13 14:05:21 +0000101 self.resultdir = resultdir
mbligha788dc42009-03-26 21:10:16 +0000102 self.uncollected_log_file = None
mbligh80e1eba2008-11-19 00:26:18 +0000103 if resultdir:
mbligha788dc42009-03-26 21:10:16 +0000104 self.uncollected_log_file = os.path.join(resultdir, "uncollected_logs")
105 self.debugdir = os.path.join(resultdir, 'debug')
106
mbligh80e1eba2008-11-19 00:26:18 +0000107 if not os.path.exists(resultdir):
108 os.mkdir(resultdir)
jadmanskidef0c3c2009-03-25 20:07:10 +0000109 if not os.path.exists(self.uncollected_log_file):
110 log_file = open(self.uncollected_log_file, "w")
111 pickle.dump([], log_file)
112 log_file.close()
mbligh80e1eba2008-11-19 00:26:18 +0000113 if not os.path.exists(self.debugdir):
114 os.mkdir(self.debugdir)
jadmanski779bd292009-03-19 17:33:33 +0000115 status_log = self.get_status_log_path()
116 if os.path.exists(status_log):
117 os.remove(status_log)
jadmanski10646442008-08-13 14:05:21 +0000118 self.label = label
119 self.user = user
120 self.args = args
121 self.machines = machines
122 self.client = client
123 self.record_prefix = ''
124 self.warning_loggers = set()
jadmanskif37df842009-02-11 00:03:26 +0000125 self.warning_manager = warning_manager()
jadmanski10646442008-08-13 14:05:21 +0000126 self.ssh_user = ssh_user
127 self.ssh_port = ssh_port
128 self.ssh_pass = ssh_pass
jadmanski23afbec2008-09-17 18:12:07 +0000129 self.run_test_cleanup = True
mbligh09108442008-10-15 16:27:38 +0000130 self.last_boot_tag = None
jadmanski53aaf382008-11-17 16:22:31 +0000131 self.hosts = set()
mblighb5dac432008-11-27 00:38:44 +0000132 self.drop_caches_between_iterations = False
jadmanski10646442008-08-13 14:05:21 +0000133
134 self.stdout = fd_stack.fd_stack(1, sys.stdout)
135 self.stderr = fd_stack.fd_stack(2, sys.stderr)
136
mbligh80e1eba2008-11-19 00:26:18 +0000137 if resultdir:
138 self.sysinfo = sysinfo.sysinfo(self.resultdir)
jadmanski043e1132008-11-19 17:10:32 +0000139 self.profilers = profilers.profilers(self)
jadmanskic09fc152008-10-15 17:56:59 +0000140
jadmanski025099d2008-09-23 14:13:48 +0000141 if not os.access(self.tmpdir, os.W_OK):
142 try:
143 os.makedirs(self.tmpdir, 0700)
144 except os.error, e:
145 # Thrown if the directory already exists, which it may.
146 pass
147
mbligh2b92b862008-11-22 13:25:32 +0000148 if not (os.access(self.tmpdir, os.W_OK) and os.path.isdir(self.tmpdir)):
jadmanski025099d2008-09-23 14:13:48 +0000149 self.tmpdir = os.path.join(tempfile.gettempdir(),
150 'autotest-' + getpass.getuser())
151 try:
152 os.makedirs(self.tmpdir, 0700)
153 except os.error, e:
154 # Thrown if the directory already exists, which it may.
155 # If the problem was something other than the
156 # directory already existing, this chmod should throw as well
157 # exception.
158 os.chmod(self.tmpdir, stat.S_IRWXU)
159
jadmanski10646442008-08-13 14:05:21 +0000160 job_data = {'label' : label, 'user' : user,
161 'hostname' : ','.join(machines),
showard170873e2009-01-07 00:22:26 +0000162 'status_version' : str(self.STATUS_VERSION),
163 'job_started' : str(int(time.time()))}
mbligh80e1eba2008-11-19 00:26:18 +0000164 if self.resultdir:
165 job_data.update(get_site_job_data(self))
166 utils.write_keyval(self.resultdir, job_data)
jadmanski10646442008-08-13 14:05:21 +0000167
168 self.parse_job = parse_job
169 if self.parse_job and len(machines) == 1:
170 self.using_parser = True
171 self.init_parser(resultdir)
172 else:
173 self.using_parser = False
mbligh2b92b862008-11-22 13:25:32 +0000174 self.pkgmgr = packages.PackageManager(self.autodir,
175 run_function_dargs={'timeout':600})
jadmanski10646442008-08-13 14:05:21 +0000176 self.pkgdir = os.path.join(self.autodir, 'packages')
177
showard21baa452008-10-21 00:08:39 +0000178 self.num_tests_run = 0
179 self.num_tests_failed = 0
180
jadmanski550fdc22008-11-20 16:32:08 +0000181 self._register_subcommand_hooks()
182
183
jadmanskie432dd22009-01-30 15:04:51 +0000184 @staticmethod
185 def _load_control_file(path):
186 f = open(path)
187 try:
188 control_file = f.read()
189 finally:
190 f.close()
191 return re.sub('\r', '', control_file)
192
193
jadmanski550fdc22008-11-20 16:32:08 +0000194 def _register_subcommand_hooks(self):
mbligh2b92b862008-11-22 13:25:32 +0000195 """
196 Register some hooks into the subcommand modules that allow us
197 to properly clean up self.hosts created in forked subprocesses.
198 """
jadmanski550fdc22008-11-20 16:32:08 +0000199 def on_fork(cmd):
200 self._existing_hosts_on_fork = set(self.hosts)
201 def on_join(cmd):
202 new_hosts = self.hosts - self._existing_hosts_on_fork
203 for host in new_hosts:
204 host.close()
205 subcommand.subcommand.register_fork_hook(on_fork)
206 subcommand.subcommand.register_join_hook(on_join)
207
jadmanski10646442008-08-13 14:05:21 +0000208
209 def init_parser(self, resultdir):
mbligh2b92b862008-11-22 13:25:32 +0000210 """
211 Start the continuous parsing of resultdir. This sets up
jadmanski10646442008-08-13 14:05:21 +0000212 the database connection and inserts the basic job object into
mbligh2b92b862008-11-22 13:25:32 +0000213 the database if necessary.
214 """
jadmanski10646442008-08-13 14:05:21 +0000215 # redirect parser debugging to .parse.log
216 parse_log = os.path.join(resultdir, '.parse.log')
217 parse_log = open(parse_log, 'w', 0)
218 tko_utils.redirect_parser_debugging(parse_log)
219 # create a job model object and set up the db
220 self.results_db = tko_db.db(autocommit=True)
221 self.parser = status_lib.parser(self.STATUS_VERSION)
222 self.job_model = self.parser.make_job(resultdir)
223 self.parser.start(self.job_model)
224 # check if a job already exists in the db and insert it if
225 # it does not
226 job_idx = self.results_db.find_job(self.parse_job)
227 if job_idx is None:
mbligh2b92b862008-11-22 13:25:32 +0000228 self.results_db.insert_job(self.parse_job, self.job_model)
jadmanski10646442008-08-13 14:05:21 +0000229 else:
mbligh2b92b862008-11-22 13:25:32 +0000230 machine_idx = self.results_db.lookup_machine(self.job_model.machine)
jadmanski10646442008-08-13 14:05:21 +0000231 self.job_model.index = job_idx
232 self.job_model.machine_idx = machine_idx
233
234
235 def cleanup_parser(self):
mbligh2b92b862008-11-22 13:25:32 +0000236 """
237 This should be called after the server job is finished
jadmanski10646442008-08-13 14:05:21 +0000238 to carry out any remaining cleanup (e.g. flushing any
mbligh2b92b862008-11-22 13:25:32 +0000239 remaining test results to the results db)
240 """
jadmanski10646442008-08-13 14:05:21 +0000241 if not self.using_parser:
242 return
243 final_tests = self.parser.end()
244 for test in final_tests:
245 self.__insert_test(test)
246 self.using_parser = False
247
248
249 def verify(self):
250 if not self.machines:
mbligh084bc172008-10-18 14:02:45 +0000251 raise error.AutoservError('No machines specified to verify')
mbligh0fce4112008-11-27 00:37:17 +0000252 if self.resultdir:
253 os.chdir(self.resultdir)
jadmanski10646442008-08-13 14:05:21 +0000254 try:
jadmanskicdd0c402008-09-19 21:21:31 +0000255 namespace = {'machines' : self.machines, 'job' : self,
256 'ssh_user' : self.ssh_user,
257 'ssh_port' : self.ssh_port,
258 'ssh_pass' : self.ssh_pass}
mbligh084bc172008-10-18 14:02:45 +0000259 self._execute_code(VERIFY_CONTROL_FILE, namespace, protect=False)
jadmanski10646442008-08-13 14:05:21 +0000260 except Exception, e:
mbligh2b92b862008-11-22 13:25:32 +0000261 msg = ('Verify failed\n' + str(e) + '\n' + traceback.format_exc())
jadmanski10646442008-08-13 14:05:21 +0000262 self.record('ABORT', None, None, msg)
263 raise
264
265
266 def repair(self, host_protection):
267 if not self.machines:
268 raise error.AutoservError('No machines specified to repair')
mbligh0fce4112008-11-27 00:37:17 +0000269 if self.resultdir:
270 os.chdir(self.resultdir)
jadmanski10646442008-08-13 14:05:21 +0000271 namespace = {'machines': self.machines, 'job': self,
272 'ssh_user': self.ssh_user, 'ssh_port': self.ssh_port,
273 'ssh_pass': self.ssh_pass,
274 'protection_level': host_protection}
mbligh25c0b8c2009-01-24 01:44:17 +0000275 # no matter what happens during repair (except if it succeeded in
276 # initiating hardware repair procedure), go on to try to reverify
jadmanski10646442008-08-13 14:05:21 +0000277 try:
mbligh2b92b862008-11-22 13:25:32 +0000278 self._execute_code(REPAIR_CONTROL_FILE, namespace, protect=False)
mbligh25c0b8c2009-01-24 01:44:17 +0000279 except error.AutoservHardwareRepairRequestedError:
280 raise
jadmanski10646442008-08-13 14:05:21 +0000281 except Exception, exc:
282 print 'Exception occured during repair'
283 traceback.print_exc()
mbligh25c0b8c2009-01-24 01:44:17 +0000284
jadmanski10646442008-08-13 14:05:21 +0000285 self.verify()
286
287
288 def precheck(self):
289 """
290 perform any additional checks in derived classes.
291 """
292 pass
293
294
295 def enable_external_logging(self):
mbligh2b92b862008-11-22 13:25:32 +0000296 """
297 Start or restart external logging mechanism.
jadmanski10646442008-08-13 14:05:21 +0000298 """
299 pass
300
301
302 def disable_external_logging(self):
mbligh2b92b862008-11-22 13:25:32 +0000303 """
304 Pause or stop external logging mechanism.
jadmanski10646442008-08-13 14:05:21 +0000305 """
306 pass
307
308
jadmanski23afbec2008-09-17 18:12:07 +0000309 def enable_test_cleanup(self):
mbligh2b92b862008-11-22 13:25:32 +0000310 """
311 By default tests run test.cleanup
312 """
jadmanski23afbec2008-09-17 18:12:07 +0000313 self.run_test_cleanup = True
314
315
316 def disable_test_cleanup(self):
mbligh2b92b862008-11-22 13:25:32 +0000317 """
318 By default tests do not run test.cleanup
319 """
jadmanski23afbec2008-09-17 18:12:07 +0000320 self.run_test_cleanup = False
321
322
jadmanski10646442008-08-13 14:05:21 +0000323 def use_external_logging(self):
mbligh2b92b862008-11-22 13:25:32 +0000324 """
325 Return True if external logging should be used.
jadmanski10646442008-08-13 14:05:21 +0000326 """
327 return False
328
329
330 def parallel_simple(self, function, machines, log=True, timeout=None):
mbligh2b92b862008-11-22 13:25:32 +0000331 """
332 Run 'function' using parallel_simple, with an extra wrapper to handle
333 the necessary setup for continuous parsing, if possible. If continuous
334 parsing is already properly initialized then this should just work.
335 """
336 is_forking = not (len(machines) == 1 and self.machines == machines)
jadmanski4dd1a002008-09-05 20:27:30 +0000337 if self.parse_job and is_forking and log:
jadmanski10646442008-08-13 14:05:21 +0000338 def wrapper(machine):
339 self.parse_job += "/" + machine
340 self.using_parser = True
341 self.machines = [machine]
mbligh2b92b862008-11-22 13:25:32 +0000342 self.resultdir = os.path.join(self.resultdir, machine)
jadmanski609a5f42008-08-26 20:52:42 +0000343 os.chdir(self.resultdir)
showard2bab8f42008-11-12 18:15:22 +0000344 utils.write_keyval(self.resultdir, {"hostname": machine})
jadmanski10646442008-08-13 14:05:21 +0000345 self.init_parser(self.resultdir)
346 result = function(machine)
347 self.cleanup_parser()
348 return result
jadmanski4dd1a002008-09-05 20:27:30 +0000349 elif len(machines) > 1 and log:
jadmanski10646442008-08-13 14:05:21 +0000350 def wrapper(machine):
351 self.resultdir = os.path.join(self.resultdir, machine)
jadmanski609a5f42008-08-26 20:52:42 +0000352 os.chdir(self.resultdir)
mbligh838d82d2009-03-11 17:14:31 +0000353 machine_data = {'hostname' : machine,
354 'status_version' : str(self.STATUS_VERSION)}
355 utils.write_keyval(self.resultdir, machine_data)
jadmanski10646442008-08-13 14:05:21 +0000356 result = function(machine)
357 return result
358 else:
359 wrapper = function
360 subcommand.parallel_simple(wrapper, machines, log, timeout)
361
362
jadmanskie432dd22009-01-30 15:04:51 +0000363 USE_TEMP_DIR = object()
mbligh2b92b862008-11-22 13:25:32 +0000364 def run(self, cleanup=False, install_before=False, install_after=False,
jadmanskie432dd22009-01-30 15:04:51 +0000365 collect_crashdumps=True, namespace={}, control=None,
jadmanskidef0c3c2009-03-25 20:07:10 +0000366 control_file_dir=None, only_collect_crashinfo=False):
jadmanski10646442008-08-13 14:05:21 +0000367 # use a copy so changes don't affect the original dictionary
368 namespace = namespace.copy()
369 machines = self.machines
jadmanskie432dd22009-01-30 15:04:51 +0000370 if control is None:
371 control = self.control
372 if control_file_dir is None:
373 control_file_dir = self.resultdir
jadmanski10646442008-08-13 14:05:21 +0000374
375 self.aborted = False
376 namespace['machines'] = machines
377 namespace['args'] = self.args
378 namespace['job'] = self
379 namespace['ssh_user'] = self.ssh_user
380 namespace['ssh_port'] = self.ssh_port
381 namespace['ssh_pass'] = self.ssh_pass
382 test_start_time = int(time.time())
383
mbligh80e1eba2008-11-19 00:26:18 +0000384 if self.resultdir:
385 os.chdir(self.resultdir)
jadmanski779bd292009-03-19 17:33:33 +0000386 # touch status.log so that the parser knows a job is running here
387 open(self.get_status_log_path(), 'w').close()
mbligh80e1eba2008-11-19 00:26:18 +0000388 self.enable_external_logging()
jadmanskie432dd22009-01-30 15:04:51 +0000389
jadmanskicdd0c402008-09-19 21:21:31 +0000390 collect_crashinfo = True
mblighaebe3b62008-12-22 14:45:40 +0000391 temp_control_file_dir = None
jadmanski10646442008-08-13 14:05:21 +0000392 try:
393 if install_before and machines:
mbligh084bc172008-10-18 14:02:45 +0000394 self._execute_code(INSTALL_CONTROL_FILE, namespace)
jadmanskie432dd22009-01-30 15:04:51 +0000395
jadmanskidef0c3c2009-03-25 20:07:10 +0000396 if not only_collect_crashinfo:
397 # determine the dir to write the control files to
398 cfd_specified = (control_file_dir
399 and control_file_dir is not self.USE_TEMP_DIR)
400 if cfd_specified:
401 temp_control_file_dir = None
402 else:
403 temp_control_file_dir = tempfile.mkdtemp(
404 suffix='temp_control_file_dir')
405 control_file_dir = temp_control_file_dir
406 server_control_file = os.path.join(control_file_dir,
407 SERVER_CONTROL_FILENAME)
408 client_control_file = os.path.join(control_file_dir,
409 CLIENT_CONTROL_FILENAME)
410 if self.client:
411 namespace['control'] = control
412 utils.open_write_close(client_control_file, control)
413 shutil.copy(CLIENT_WRAPPER_CONTROL_FILE,
414 server_control_file)
415 else:
416 utils.open_write_close(server_control_file, control)
417 self._execute_code(server_control_file, namespace)
jadmanski10646442008-08-13 14:05:21 +0000418
jadmanskidef0c3c2009-03-25 20:07:10 +0000419 # no error occured, so we don't need to collect crashinfo
420 collect_crashinfo = False
jadmanski10646442008-08-13 14:05:21 +0000421 finally:
mblighaebe3b62008-12-22 14:45:40 +0000422 if temp_control_file_dir:
jadmanskie432dd22009-01-30 15:04:51 +0000423 # Clean up temp directory used for copies of the control files
mblighaebe3b62008-12-22 14:45:40 +0000424 try:
425 shutil.rmtree(temp_control_file_dir)
426 except Exception, e:
jadmanskie432dd22009-01-30 15:04:51 +0000427 print 'Error %s removing dir %s' % (e,
428 temp_control_file_dir)
429
jadmanskicdd0c402008-09-19 21:21:31 +0000430 if machines and (collect_crashdumps or collect_crashinfo):
jadmanski10646442008-08-13 14:05:21 +0000431 namespace['test_start_time'] = test_start_time
jadmanskicdd0c402008-09-19 21:21:31 +0000432 if collect_crashinfo:
mbligh084bc172008-10-18 14:02:45 +0000433 # includes crashdumps
434 self._execute_code(CRASHINFO_CONTROL_FILE, namespace)
jadmanskicdd0c402008-09-19 21:21:31 +0000435 else:
mbligh084bc172008-10-18 14:02:45 +0000436 self._execute_code(CRASHDUMPS_CONTROL_FILE, namespace)
mbligha788dc42009-03-26 21:10:16 +0000437 if self.uncollected_log_file:
438 os.remove(self.uncollected_log_file)
jadmanski10646442008-08-13 14:05:21 +0000439 self.disable_external_logging()
showard45ae8192008-11-05 19:32:53 +0000440 if cleanup and machines:
441 self._execute_code(CLEANUP_CONTROL_FILE, namespace)
jadmanski10646442008-08-13 14:05:21 +0000442 if install_after and machines:
mbligh084bc172008-10-18 14:02:45 +0000443 self._execute_code(INSTALL_CONTROL_FILE, namespace)
jadmanski10646442008-08-13 14:05:21 +0000444
445
446 def run_test(self, url, *args, **dargs):
mbligh2b92b862008-11-22 13:25:32 +0000447 """
448 Summon a test object and run it.
jadmanski10646442008-08-13 14:05:21 +0000449
450 tag
451 tag to add to testname
452 url
453 url of the test to run
454 """
455
456 (group, testname) = self.pkgmgr.get_package_name(url, 'test')
jadmanski10646442008-08-13 14:05:21 +0000457
458 tag = dargs.pop('tag', None)
459 if tag:
mbligh8ad24202009-01-07 16:49:36 +0000460 testname += '.' + str(tag)
jadmanskide292df2008-08-26 20:51:14 +0000461 subdir = testname
jadmanski10646442008-08-13 14:05:21 +0000462
463 outputdir = os.path.join(self.resultdir, subdir)
464 if os.path.exists(outputdir):
465 msg = ("%s already exists, test <%s> may have"
mbligh2b92b862008-11-22 13:25:32 +0000466 " already run with tag <%s>" % (outputdir, testname, tag))
jadmanski10646442008-08-13 14:05:21 +0000467 raise error.TestError(msg)
468 os.mkdir(outputdir)
469
470 def group_func():
471 try:
472 test.runtest(self, url, tag, args, dargs)
473 except error.TestBaseException, e:
474 self.record(e.exit_status, subdir, testname, str(e))
475 raise
476 except Exception, e:
477 info = str(e) + "\n" + traceback.format_exc()
478 self.record('FAIL', subdir, testname, info)
479 raise
480 else:
mbligh2b92b862008-11-22 13:25:32 +0000481 self.record('GOOD', subdir, testname, 'completed successfully')
jadmanskide292df2008-08-26 20:51:14 +0000482
483 result, exc_info = self._run_group(testname, subdir, group_func)
484 if exc_info and isinstance(exc_info[1], error.TestBaseException):
485 return False
486 elif exc_info:
487 raise exc_info[0], exc_info[1], exc_info[2]
488 else:
489 return True
jadmanski10646442008-08-13 14:05:21 +0000490
491
492 def _run_group(self, name, subdir, function, *args, **dargs):
493 """\
494 Underlying method for running something inside of a group.
495 """
jadmanskide292df2008-08-26 20:51:14 +0000496 result, exc_info = None, None
jadmanski10646442008-08-13 14:05:21 +0000497 old_record_prefix = self.record_prefix
498 try:
499 self.record('START', subdir, name)
500 self.record_prefix += '\t'
501 try:
502 result = function(*args, **dargs)
503 finally:
504 self.record_prefix = old_record_prefix
505 except error.TestBaseException, e:
jadmanskib88d6dc2009-01-10 00:33:18 +0000506 self.record("END %s" % e.exit_status, subdir, name)
jadmanskide292df2008-08-26 20:51:14 +0000507 exc_info = sys.exc_info()
jadmanski10646442008-08-13 14:05:21 +0000508 except Exception, e:
509 err_msg = str(e) + '\n'
510 err_msg += traceback.format_exc()
511 self.record('END ABORT', subdir, name, err_msg)
512 raise error.JobError(name + ' failed\n' + traceback.format_exc())
513 else:
514 self.record('END GOOD', subdir, name)
515
jadmanskide292df2008-08-26 20:51:14 +0000516 return result, exc_info
jadmanski10646442008-08-13 14:05:21 +0000517
518
519 def run_group(self, function, *args, **dargs):
520 """\
521 function:
522 subroutine to run
523 *args:
524 arguments for the function
525 """
526
527 name = function.__name__
528
529 # Allow the tag for the group to be specified.
530 tag = dargs.pop('tag', None)
531 if tag:
532 name = tag
533
jadmanskide292df2008-08-26 20:51:14 +0000534 return self._run_group(name, None, function, *args, **dargs)[0]
jadmanski10646442008-08-13 14:05:21 +0000535
536
537 def run_reboot(self, reboot_func, get_kernel_func):
538 """\
539 A specialization of run_group meant specifically for handling
540 a reboot. Includes support for capturing the kernel version
541 after the reboot.
542
543 reboot_func: a function that carries out the reboot
544
545 get_kernel_func: a function that returns a string
546 representing the kernel version.
547 """
548
549 old_record_prefix = self.record_prefix
550 try:
551 self.record('START', None, 'reboot')
552 self.record_prefix += '\t'
553 reboot_func()
554 except Exception, e:
555 self.record_prefix = old_record_prefix
556 err_msg = str(e) + '\n' + traceback.format_exc()
557 self.record('END FAIL', None, 'reboot', err_msg)
558 else:
559 kernel = get_kernel_func()
560 self.record_prefix = old_record_prefix
561 self.record('END GOOD', None, 'reboot',
562 optional_fields={"kernel": kernel})
563
564
jadmanskie432dd22009-01-30 15:04:51 +0000565 def run_control(self, path):
566 """Execute a control file found at path (relative to the autotest
567 path). Intended for executing a control file within a control file,
568 not for running the top-level job control file."""
569 path = os.path.join(self.autodir, path)
570 control_file = self._load_control_file(path)
571 self.run(control=control_file, control_file_dir=self.USE_TEMP_DIR)
572
573
jadmanskic09fc152008-10-15 17:56:59 +0000574 def add_sysinfo_command(self, command, logfile=None, on_every_test=False):
mbligh4395bbd2009-03-25 19:34:17 +0000575 self._add_sysinfo_loggable(sysinfo.command(command, logf=logfile),
jadmanskic09fc152008-10-15 17:56:59 +0000576 on_every_test)
577
578
579 def add_sysinfo_logfile(self, file, on_every_test=False):
580 self._add_sysinfo_loggable(sysinfo.logfile(file), on_every_test)
581
582
583 def _add_sysinfo_loggable(self, loggable, on_every_test):
584 if on_every_test:
585 self.sysinfo.test_loggables.add(loggable)
586 else:
587 self.sysinfo.boot_loggables.add(loggable)
588
589
jadmanski10646442008-08-13 14:05:21 +0000590 def record(self, status_code, subdir, operation, status='',
591 optional_fields=None):
592 """
593 Record job-level status
594
595 The intent is to make this file both machine parseable and
596 human readable. That involves a little more complexity, but
597 really isn't all that bad ;-)
598
599 Format is <status code>\t<subdir>\t<operation>\t<status>
600
mbligh1b3b3762008-09-25 02:46:34 +0000601 status code: see common_lib.log.is_valid_status()
jadmanski10646442008-08-13 14:05:21 +0000602 for valid status definition
603
604 subdir: MUST be a relevant subdirectory in the results,
605 or None, which will be represented as '----'
606
607 operation: description of what you ran (e.g. "dbench", or
608 "mkfs -t foobar /dev/sda9")
609
610 status: error message or "completed sucessfully"
611
612 ------------------------------------------------------------
613
614 Initial tabs indicate indent levels for grouping, and is
615 governed by self.record_prefix
616
617 multiline messages have secondary lines prefaced by a double
618 space (' ')
619
620 Executing this method will trigger the logging of all new
621 warnings to date from the various console loggers.
622 """
623 # poll all our warning loggers for new warnings
624 warnings = self._read_warnings()
625 for timestamp, msg in warnings:
626 self._record("WARN", None, None, msg, timestamp)
627
628 # write out the actual status log line
629 self._record(status_code, subdir, operation, status,
630 optional_fields=optional_fields)
631
632
633 def _read_warnings(self):
jadmanskif37df842009-02-11 00:03:26 +0000634 """Poll all the warning loggers and extract any new warnings that have
635 been logged. If the warnings belong to a category that is currently
636 disabled, this method will discard them and they will no longer be
637 retrievable.
638
639 Returns a list of (timestamp, message) tuples, where timestamp is an
640 integer epoch timestamp."""
jadmanski10646442008-08-13 14:05:21 +0000641 warnings = []
642 while True:
643 # pull in a line of output from every logger that has
644 # output ready to be read
mbligh2b92b862008-11-22 13:25:32 +0000645 loggers, _, _ = select.select(self.warning_loggers, [], [], 0)
jadmanski10646442008-08-13 14:05:21 +0000646 closed_loggers = set()
647 for logger in loggers:
648 line = logger.readline()
649 # record any broken pipes (aka line == empty)
650 if len(line) == 0:
651 closed_loggers.add(logger)
652 continue
jadmanskif37df842009-02-11 00:03:26 +0000653 # parse out the warning
654 timestamp, msgtype, msg = line.split('\t', 2)
655 timestamp = int(timestamp)
656 # if the warning is valid, add it to the results
657 if self.warning_manager.is_valid(timestamp, msgtype):
658 warnings.append((timestamp, msg.strip()))
jadmanski10646442008-08-13 14:05:21 +0000659
660 # stop listening to loggers that are closed
661 self.warning_loggers -= closed_loggers
662
663 # stop if none of the loggers have any output left
664 if not loggers:
665 break
666
667 # sort into timestamp order
668 warnings.sort()
669 return warnings
670
671
jadmanskif37df842009-02-11 00:03:26 +0000672 def disable_warnings(self, warning_type, record=True):
673 self.warning_manager.disable_warnings(warning_type)
674 if record:
675 self.record("INFO", None, None,
676 "disabling %s warnings" % warning_type,
677 {"warnings.disable": warning_type})
678
679
680 def enable_warnings(self, warning_type, record=True):
681 self.warning_manager.enable_warnings(warning_type)
682 if record:
683 self.record("INFO", None, None,
684 "enabling %s warnings" % warning_type,
685 {"warnings.enable": warning_type})
686
687
jadmanski779bd292009-03-19 17:33:33 +0000688 def get_status_log_path(self, subdir=None):
689 """Return the path to the job status log.
690
691 @param subdir - Optional paramter indicating that you want the path
692 to a subdirectory status log.
693
694 @returns The path where the status log should be.
695 """
696 if subdir:
697 return os.path.join(self.resultdir, subdir, "status.log")
698 else:
699 return os.path.join(self.resultdir, "status.log")
700
701
jadmanski6bb32d72009-03-19 20:25:24 +0000702 def _update_uncollected_logs_list(self, update_func):
703 """Updates the uncollected logs list in a multi-process safe manner.
704
705 @param update_func - a function that updates the list of uncollected
706 logs. Should take one parameter, the list to be updated.
707 """
mbligha788dc42009-03-26 21:10:16 +0000708 if self.uncollected_log_file:
709 log_file = open(self.uncollected_log_file, "r+")
710 fcntl.flock(log_file, fcntl.LOCK_EX)
jadmanski6bb32d72009-03-19 20:25:24 +0000711 try:
712 uncollected_logs = pickle.load(log_file)
713 update_func(uncollected_logs)
714 log_file.seek(0)
715 log_file.truncate()
716 pickle.dump(uncollected_logs, log_file)
717 finally:
718 fcntl.flock(log_file, fcntl.LOCK_UN)
719 log_file.close()
720
721
722 def add_client_log(self, hostname, remote_path, local_path):
723 """Adds a new set of client logs to the list of uncollected logs,
724 to allow for future log recovery.
725
726 @param host - the hostname of the machine holding the logs
727 @param remote_path - the directory on the remote machine holding logs
728 @param local_path - the local directory to copy the logs into
729 """
730 def update_func(logs_list):
731 logs_list.append((hostname, remote_path, local_path))
732 self._update_uncollected_logs_list(update_func)
733
734
735 def remove_client_log(self, hostname, remote_path, local_path):
736 """Removes a set of client logs from the list of uncollected logs,
737 to allow for future log recovery.
738
739 @param host - the hostname of the machine holding the logs
740 @param remote_path - the directory on the remote machine holding logs
741 @param local_path - the local directory to copy the logs into
742 """
743 def update_func(logs_list):
744 logs_list.remove((hostname, remote_path, local_path))
745 self._update_uncollected_logs_list(update_func)
746
747
jadmanski10646442008-08-13 14:05:21 +0000748 def _render_record(self, status_code, subdir, operation, status='',
749 epoch_time=None, record_prefix=None,
750 optional_fields=None):
751 """
752 Internal Function to generate a record to be written into a
753 status log. For use by server_job.* classes only.
754 """
755 if subdir:
756 if re.match(r'[\n\t]', subdir):
mbligh2b92b862008-11-22 13:25:32 +0000757 raise ValueError('Invalid character in subdir string')
jadmanski10646442008-08-13 14:05:21 +0000758 substr = subdir
759 else:
760 substr = '----'
761
mbligh1b3b3762008-09-25 02:46:34 +0000762 if not log.is_valid_status(status_code):
mbligh2b92b862008-11-22 13:25:32 +0000763 raise ValueError('Invalid status code supplied: %s' % status_code)
jadmanski10646442008-08-13 14:05:21 +0000764 if not operation:
765 operation = '----'
766 if re.match(r'[\n\t]', operation):
mbligh2b92b862008-11-22 13:25:32 +0000767 raise ValueError('Invalid character in operation string')
jadmanski10646442008-08-13 14:05:21 +0000768 operation = operation.rstrip()
769 status = status.rstrip()
770 status = re.sub(r"\t", " ", status)
771 # Ensure any continuation lines are marked so we can
772 # detect them in the status file to ensure it is parsable.
773 status = re.sub(r"\n", "\n" + self.record_prefix + " ", status)
774
775 if not optional_fields:
776 optional_fields = {}
777
778 # Generate timestamps for inclusion in the logs
779 if epoch_time is None:
780 epoch_time = int(time.time())
781 local_time = time.localtime(epoch_time)
782 optional_fields["timestamp"] = str(epoch_time)
783 optional_fields["localtime"] = time.strftime("%b %d %H:%M:%S",
784 local_time)
785
786 fields = [status_code, substr, operation]
787 fields += ["%s=%s" % x for x in optional_fields.iteritems()]
788 fields.append(status)
789
790 if record_prefix is None:
791 record_prefix = self.record_prefix
792
793 msg = '\t'.join(str(x) for x in fields)
jadmanski10646442008-08-13 14:05:21 +0000794 return record_prefix + msg + '\n'
795
796
797 def _record_prerendered(self, msg):
798 """
799 Record a pre-rendered msg into the status logs. The only
800 change this makes to the message is to add on the local
801 indentation. Should not be called outside of server_job.*
802 classes. Unlike _record, this does not write the message
803 to standard output.
804 """
805 lines = []
jadmanski779bd292009-03-19 17:33:33 +0000806 status_file = self.get_status_log_path()
jadmanski10646442008-08-13 14:05:21 +0000807 status_log = open(status_file, 'a')
808 for line in msg.splitlines():
809 line = self.record_prefix + line + '\n'
810 lines.append(line)
811 status_log.write(line)
812 status_log.close()
813 self.__parse_status(lines)
814
815
mbligh084bc172008-10-18 14:02:45 +0000816 def _fill_server_control_namespace(self, namespace, protect=True):
mbligh2b92b862008-11-22 13:25:32 +0000817 """
818 Prepare a namespace to be used when executing server control files.
mbligh084bc172008-10-18 14:02:45 +0000819
820 This sets up the control file API by importing modules and making them
821 available under the appropriate names within namespace.
822
823 For use by _execute_code().
824
825 Args:
826 namespace: The namespace dictionary to fill in.
827 protect: Boolean. If True (the default) any operation that would
828 clobber an existing entry in namespace will cause an error.
829 Raises:
830 error.AutoservError: When a name would be clobbered by import.
831 """
832 def _import_names(module_name, names=()):
mbligh2b92b862008-11-22 13:25:32 +0000833 """
834 Import a module and assign named attributes into namespace.
mbligh084bc172008-10-18 14:02:45 +0000835
836 Args:
837 module_name: The string module name.
838 names: A limiting list of names to import from module_name. If
839 empty (the default), all names are imported from the module
840 similar to a "from foo.bar import *" statement.
841 Raises:
842 error.AutoservError: When a name being imported would clobber
843 a name already in namespace.
844 """
845 module = __import__(module_name, {}, {}, names)
846
847 # No names supplied? Import * from the lowest level module.
848 # (Ugh, why do I have to implement this part myself?)
849 if not names:
850 for submodule_name in module_name.split('.')[1:]:
851 module = getattr(module, submodule_name)
852 if hasattr(module, '__all__'):
853 names = getattr(module, '__all__')
854 else:
855 names = dir(module)
856
857 # Install each name into namespace, checking to make sure it
858 # doesn't override anything that already exists.
859 for name in names:
860 # Check for conflicts to help prevent future problems.
861 if name in namespace and protect:
862 if namespace[name] is not getattr(module, name):
863 raise error.AutoservError('importing name '
864 '%s from %s %r would override %r' %
865 (name, module_name, getattr(module, name),
866 namespace[name]))
867 else:
868 # Encourage cleanliness and the use of __all__ for a
869 # more concrete API with less surprises on '*' imports.
870 warnings.warn('%s (%r) being imported from %s for use '
871 'in server control files is not the '
872 'first occurrance of that import.' %
873 (name, namespace[name], module_name))
874
875 namespace[name] = getattr(module, name)
876
877
878 # This is the equivalent of prepending a bunch of import statements to
879 # the front of the control script.
880 namespace.update(os=os, sys=sys)
881 _import_names('autotest_lib.server',
882 ('hosts', 'autotest', 'kvm', 'git', 'standalone_profiler',
883 'source_kernel', 'rpm_kernel', 'deb_kernel', 'git_kernel'))
884 _import_names('autotest_lib.server.subcommand',
885 ('parallel', 'parallel_simple', 'subcommand'))
886 _import_names('autotest_lib.server.utils',
887 ('run', 'get_tmp_dir', 'sh_escape', 'parse_machine'))
888 _import_names('autotest_lib.client.common_lib.error')
889 _import_names('autotest_lib.client.common_lib.barrier', ('barrier',))
890
891 # Inject ourself as the job object into other classes within the API.
892 # (Yuck, this injection is a gross thing be part of a public API. -gps)
893 #
894 # XXX Base & SiteAutotest do not appear to use .job. Who does?
895 namespace['autotest'].Autotest.job = self
896 # server.hosts.base_classes.Host uses .job.
897 namespace['hosts'].Host.job = self
898
899
900 def _execute_code(self, code_file, namespace, protect=True):
mbligh2b92b862008-11-22 13:25:32 +0000901 """
902 Execute code using a copy of namespace as a server control script.
mbligh084bc172008-10-18 14:02:45 +0000903
904 Unless protect_namespace is explicitly set to False, the dict will not
905 be modified.
906
907 Args:
908 code_file: The filename of the control file to execute.
909 namespace: A dict containing names to make available during execution.
910 protect: Boolean. If True (the default) a copy of the namespace dict
911 is used during execution to prevent the code from modifying its
912 contents outside of this function. If False the raw dict is
913 passed in and modifications will be allowed.
914 """
915 if protect:
916 namespace = namespace.copy()
917 self._fill_server_control_namespace(namespace, protect=protect)
918 # TODO: Simplify and get rid of the special cases for only 1 machine.
showard3e66e8c2008-10-27 19:20:51 +0000919 if len(self.machines) > 1:
mbligh084bc172008-10-18 14:02:45 +0000920 machines_text = '\n'.join(self.machines) + '\n'
921 # Only rewrite the file if it does not match our machine list.
922 try:
923 machines_f = open(MACHINES_FILENAME, 'r')
924 existing_machines_text = machines_f.read()
925 machines_f.close()
926 except EnvironmentError:
927 existing_machines_text = None
928 if machines_text != existing_machines_text:
929 utils.open_write_close(MACHINES_FILENAME, machines_text)
930 execfile(code_file, namespace, namespace)
jadmanski10646442008-08-13 14:05:21 +0000931
932
933 def _record(self, status_code, subdir, operation, status='',
934 epoch_time=None, optional_fields=None):
935 """
936 Actual function for recording a single line into the status
937 logs. Should never be called directly, only by job.record as
938 this would bypass the console monitor logging.
939 """
940
mbligh2b92b862008-11-22 13:25:32 +0000941 msg = self._render_record(status_code, subdir, operation, status,
942 epoch_time, optional_fields=optional_fields)
jadmanski10646442008-08-13 14:05:21 +0000943
jadmanski779bd292009-03-19 17:33:33 +0000944 status_file = self.get_status_log_path()
jadmanski10646442008-08-13 14:05:21 +0000945 sys.stdout.write(msg)
946 open(status_file, "a").write(msg)
947 if subdir:
jadmanski779bd292009-03-19 17:33:33 +0000948 sub_status_file = self.get_status_log_path(subdir)
949 open(sub_status_file, "a").write(msg)
jadmanski10646442008-08-13 14:05:21 +0000950 self.__parse_status(msg.splitlines())
951
952
953 def __parse_status(self, new_lines):
954 if not self.using_parser:
955 return
956 new_tests = self.parser.process_lines(new_lines)
957 for test in new_tests:
958 self.__insert_test(test)
959
960
961 def __insert_test(self, test):
mbligh2b92b862008-11-22 13:25:32 +0000962 """
963 An internal method to insert a new test result into the
jadmanski10646442008-08-13 14:05:21 +0000964 database. This method will not raise an exception, even if an
965 error occurs during the insert, to avoid failing a test
966 simply because of unexpected database issues."""
showard21baa452008-10-21 00:08:39 +0000967 self.num_tests_run += 1
968 if status_lib.is_worse_than_or_equal_to(test.status, 'FAIL'):
969 self.num_tests_failed += 1
jadmanski10646442008-08-13 14:05:21 +0000970 try:
971 self.results_db.insert_test(self.job_model, test)
972 except Exception:
973 msg = ("WARNING: An unexpected error occured while "
974 "inserting test results into the database. "
975 "Ignoring error.\n" + traceback.format_exc())
976 print >> sys.stderr, msg
977
mblighcaa62c22008-04-07 21:51:17 +0000978
mbligha7007722009-01-13 00:37:11 +0000979site_server_job = utils.import_site_class(
980 __file__, "autotest_lib.server.site_server_job", "site_server_job",
981 base_server_job)
jadmanski0afbb632008-06-06 21:10:57 +0000982
jadmanski10646442008-08-13 14:05:21 +0000983class server_job(site_server_job, base_server_job):
jadmanski0afbb632008-06-06 21:10:57 +0000984 pass
jadmanskif37df842009-02-11 00:03:26 +0000985
986
987class warning_manager(object):
988 """Class for controlling warning logs. Manages the enabling and disabling
989 of warnings."""
990 def __init__(self):
991 # a map of warning types to a list of disabled time intervals
992 self.disabled_warnings = {}
993
994
995 def is_valid(self, timestamp, warning_type):
996 """Indicates if a warning (based on the time it occured and its type)
997 is a valid warning. A warning is considered "invalid" if this type of
998 warning was marked as "disabled" at the time the warning occured."""
999 disabled_intervals = self.disabled_warnings.get(warning_type, [])
1000 for start, end in disabled_intervals:
1001 if timestamp >= start and (end is None or timestamp < end):
1002 return False
1003 return True
1004
1005
1006 def disable_warnings(self, warning_type, current_time_func=time.time):
1007 """As of now, disables all further warnings of this type."""
1008 intervals = self.disabled_warnings.setdefault(warning_type, [])
1009 if not intervals or intervals[-1][1] is not None:
1010 intervals.append((current_time_func(), None))
1011
1012
1013 def enable_warnings(self, warning_type, current_time_func=time.time):
1014 """As of now, enables all further warnings of this type."""
1015 intervals = self.disabled_warnings.get(warning_type, [])
1016 if intervals and intervals[-1][1] is None:
1017 intervals[-1] = (intervals[-1][0], current_time_func())