blob: d146848d4c0dd98d37bd2726780c8296e49c5464 [file] [log] [blame]
mblighc86b0b42006-07-28 17:35:28 +00001"""The main job wrapper
mbligha2508052006-05-28 21:29:53 +00002
mblighc86b0b42006-07-28 17:35:28 +00003This is the core infrastructure.
4"""
5
6__author__ = """Copyright Andy Whitcroft, Martin J. Bligh 2006"""
mbligha2508052006-05-28 21:29:53 +00007
mbligh8f243ec2006-10-10 05:55:49 +00008# standard stuff
mbligh366ff1b2008-04-25 16:07:56 +00009import os, sys, re, pickle, shutil, time, traceback, types, copy
mbligh302482e2008-05-01 20:06:16 +000010
mbligh8f243ec2006-10-10 05:55:49 +000011# autotest stuff
mblighc61fb362008-06-05 16:22:15 +000012from autotest_lib.client.bin import autotest_utils, parallel, kernel, xen
13from autotest_lib.client.bin import profilers, fd_stack, boottool, harness
14from autotest_lib.client.bin import config, sysinfo, cpuset, test, filesystem
mblighe829ba52008-06-03 15:04:08 +000015from autotest_lib.client.common_lib import error, barrier, logging, utils
mbligh302482e2008-05-01 20:06:16 +000016
mbligh366ff1b2008-04-25 16:07:56 +000017JOB_PREAMBLE = """
mbligh539d8762008-06-11 22:11:43 +000018from autotest_lib.client.common_lib.error import *
mbligh4700d942008-06-11 21:42:31 +000019from autotest_lib.client.bin.autotest_utils import *
mbligh366ff1b2008-04-25 16:07:56 +000020"""
21
mbligh302482e2008-05-01 20:06:16 +000022class StepError(error.AutotestError):
jadmanski0afbb632008-06-06 21:10:57 +000023 pass
mbligh12a04cb2008-04-25 16:07:20 +000024
25
mbligh6e83b6f2008-06-10 16:26:59 +000026class base_job(object):
jadmanski0afbb632008-06-06 21:10:57 +000027 """The actual job against which we do everything.
mblighc86b0b42006-07-28 17:35:28 +000028
jadmanski0afbb632008-06-06 21:10:57 +000029 Properties:
30 autodir
31 The top level autotest directory (/usr/local/autotest).
32 Comes from os.environ['AUTODIR'].
33 bindir
34 <autodir>/bin/
35 libdir
36 <autodir>/lib/
37 testdir
38 <autodir>/tests/
39 site_testdir
40 <autodir>/site_tests/
41 profdir
42 <autodir>/profilers/
43 tmpdir
44 <autodir>/tmp/
45 resultdir
46 <autodir>/results/<jobtag>
47 stdout
48 fd_stack object for stdout
49 stderr
50 fd_stack object for stderr
51 profilers
52 the profilers object for this job
53 harness
54 the server harness object for this job
55 config
56 the job configuration for this job
57 """
mblighc86b0b42006-07-28 17:35:28 +000058
jadmanski0afbb632008-06-06 21:10:57 +000059 DEFAULT_LOG_FILENAME = "status"
mblighd528d302007-12-19 16:19:05 +000060
jadmanski0afbb632008-06-06 21:10:57 +000061 def __init__(self, control, jobtag, cont, harness_type=None,
62 use_external_logging = False):
63 """
64 control
65 The control file (pathname of)
66 jobtag
67 The job tag string (eg "default")
68 cont
69 If this is the continuation of this job
70 harness_type
71 An alternative server harness
72 """
73 self.autodir = os.environ['AUTODIR']
74 self.bindir = os.path.join(self.autodir, 'bin')
75 self.libdir = os.path.join(self.autodir, 'lib')
76 self.testdir = os.path.join(self.autodir, 'tests')
77 self.site_testdir = os.path.join(self.autodir, 'site_tests')
78 self.profdir = os.path.join(self.autodir, 'profilers')
79 self.tmpdir = os.path.join(self.autodir, 'tmp')
80 self.resultdir = os.path.join(self.autodir, 'results', jobtag)
81 self.sysinfodir = os.path.join(self.resultdir, 'sysinfo')
82 self.control = os.path.abspath(control)
83 self.state_file = self.control + '.state'
84 self.current_step_ancestry = []
85 self.next_step_index = 0
mbligh6e83b6f2008-06-10 16:26:59 +000086 self._load_state()
mbligha2508052006-05-28 21:29:53 +000087
jadmanski0afbb632008-06-06 21:10:57 +000088 if not cont:
89 """
90 Don't cleanup the tmp dir (which contains the lockfile)
91 in the constructor, this would be a problem for multiple
92 jobs starting at the same time on the same client. Instead
93 do the delete at the server side. We simply create the tmp
94 directory here if it does not already exist.
95 """
96 if not os.path.exists(self.tmpdir):
97 os.mkdir(self.tmpdir)
apw96da1a42006-11-02 00:23:18 +000098
jadmanski0afbb632008-06-06 21:10:57 +000099 results = os.path.join(self.autodir, 'results')
100 if not os.path.exists(results):
101 os.mkdir(results)
mblighc1cbc992008-05-27 20:01:45 +0000102
jadmanski0afbb632008-06-06 21:10:57 +0000103 download = os.path.join(self.testdir, 'download')
104 if not os.path.exists(download):
105 os.mkdir(download)
apw96da1a42006-11-02 00:23:18 +0000106
jadmanski0afbb632008-06-06 21:10:57 +0000107 if os.path.exists(self.resultdir):
mbligh15440802008-06-06 22:48:19 +0000108 utils.system('rm -rf ' + self.resultdir)
jadmanski0afbb632008-06-06 21:10:57 +0000109 os.mkdir(self.resultdir)
110 os.mkdir(self.sysinfodir)
apw870988b2007-09-25 16:50:53 +0000111
jadmanski0afbb632008-06-06 21:10:57 +0000112 os.mkdir(os.path.join(self.resultdir, 'debug'))
113 os.mkdir(os.path.join(self.resultdir, 'analysis'))
mblighf4ca14f2008-03-03 16:03:05 +0000114
jadmanski0afbb632008-06-06 21:10:57 +0000115 shutil.copyfile(self.control,
116 os.path.join(self.resultdir, 'control'))
mbligh4b089662006-06-14 22:34:58 +0000117
mblighf4c35322006-03-13 01:01:10 +0000118
jadmanski0afbb632008-06-06 21:10:57 +0000119 self.control = control
120 self.jobtag = jobtag
121 self.log_filename = self.DEFAULT_LOG_FILENAME
122 self.container = None
jadmanskia9c75c42008-05-01 22:05:31 +0000123
jadmanski0afbb632008-06-06 21:10:57 +0000124 self.stdout = fd_stack.fd_stack(1, sys.stdout)
125 self.stderr = fd_stack.fd_stack(2, sys.stderr)
mblighf4c35322006-03-13 01:01:10 +0000126
jadmanski0afbb632008-06-06 21:10:57 +0000127 self._init_group_level()
apw059e1b12006-10-12 17:18:26 +0000128
jadmanski0afbb632008-06-06 21:10:57 +0000129 self.config = config.config(self)
jadmanski0afbb632008-06-06 21:10:57 +0000130 self.harness = harness.select(harness_type, self)
jadmanski0afbb632008-06-06 21:10:57 +0000131 self.profilers = profilers.profilers(self)
mblighcaa605c2006-10-02 00:37:35 +0000132
jadmanski0afbb632008-06-06 21:10:57 +0000133 try:
134 tool = self.config_get('boottool.executable')
135 self.bootloader = boottool.boottool(tool)
136 except:
137 pass
mbligh3a6d6ca2006-04-23 15:50:24 +0000138
jadmanski0afbb632008-06-06 21:10:57 +0000139 sysinfo.log_per_reboot_data(self.sysinfodir)
apw357f50f2006-12-01 11:22:39 +0000140
jadmanski0afbb632008-06-06 21:10:57 +0000141 if not cont:
142 self.record('START', None, None)
143 self._increment_group_level()
apwf91efaf2007-11-24 17:32:13 +0000144
jadmanski0afbb632008-06-06 21:10:57 +0000145 self.harness.run_start()
jadmanski8415f962008-05-06 20:38:53 +0000146
jadmanski0afbb632008-06-06 21:10:57 +0000147 if use_external_logging:
148 self.enable_external_logging()
jadmanski8415f962008-05-06 20:38:53 +0000149
jadmanski0afbb632008-06-06 21:10:57 +0000150 # load the max disk usage rate - default to no monitoring
mbligh15440802008-06-06 22:48:19 +0000151 self.max_disk_usage_rate = self.get_state('__monitor_disk', default=0.0)
jadmanski8415f962008-05-06 20:38:53 +0000152
jadmanski8415f962008-05-06 20:38:53 +0000153
jadmanski0afbb632008-06-06 21:10:57 +0000154 def monitor_disk_usage(self, max_rate):
155 """\
156 Signal that the job should monitor disk space usage on /
157 and generate a warning if a test uses up disk space at a
158 rate exceeding 'max_rate'.
mbligh0692e472007-08-30 16:07:53 +0000159
jadmanski0afbb632008-06-06 21:10:57 +0000160 Parameters:
161 max_rate - the maximium allowed rate of disk consumption
162 during a test, in MB/hour, or 0 to indicate
163 no limit.
164 """
165 self.set_state('__monitor_disk', max_rate)
166 self.max_disk_usage_rate = max_rate
mbligh0692e472007-08-30 16:07:53 +0000167
168
jadmanski0afbb632008-06-06 21:10:57 +0000169 def relative_path(self, path):
170 """\
171 Return a patch relative to the job results directory
172 """
173 head = len(self.resultdir) + 1 # remove the / inbetween
174 return path[head:]
mbligh362ab3d2007-08-30 11:24:04 +0000175
mblighcaa605c2006-10-02 00:37:35 +0000176
jadmanski0afbb632008-06-06 21:10:57 +0000177 def control_get(self):
178 return self.control
mbligh8d83cdc2007-12-03 18:09:18 +0000179
180
jadmanski0afbb632008-06-06 21:10:57 +0000181 def control_set(self, control):
182 self.control = os.path.abspath(control)
apwde1503a2006-10-10 08:34:21 +0000183
184
jadmanski0afbb632008-06-06 21:10:57 +0000185 def harness_select(self, which):
186 self.harness = harness.select(which, self)
apw059e1b12006-10-12 17:18:26 +0000187
188
jadmanski0afbb632008-06-06 21:10:57 +0000189 def config_set(self, name, value):
190 self.config.set(name, value)
apw059e1b12006-10-12 17:18:26 +0000191
mbligh1e8858e2006-11-24 22:18:35 +0000192
jadmanski0afbb632008-06-06 21:10:57 +0000193 def config_get(self, name):
194 return self.config.get(name)
mbligh72b88fc2006-12-16 18:41:35 +0000195
mblighc1f8ced2008-06-13 21:43:28 +0000196
jadmanski0afbb632008-06-06 21:10:57 +0000197 def setup_dirs(self, results_dir, tmp_dir):
198 if not tmp_dir:
199 tmp_dir = os.path.join(self.tmpdir, 'build')
200 if not os.path.exists(tmp_dir):
201 os.mkdir(tmp_dir)
202 if not os.path.isdir(tmp_dir):
203 e_msg = "Temp dir (%s) is not a dir - args backwards?" % self.tmpdir
204 raise ValueError(e_msg)
mbligh8baa2ea2006-12-17 23:01:24 +0000205
jadmanski0afbb632008-06-06 21:10:57 +0000206 # We label the first build "build" and then subsequent ones
207 # as "build.2", "build.3", etc. Whilst this is a little bit
208 # inconsistent, 99.9% of jobs will only have one build
209 # (that's not done as kernbench, sparse, or buildtest),
210 # so it works out much cleaner. One of life's comprimises.
211 if not results_dir:
212 results_dir = os.path.join(self.resultdir, 'build')
213 i = 2
214 while os.path.exists(results_dir):
215 results_dir = os.path.join(self.resultdir, 'build.%d' % i)
216 i += 1
217 if not os.path.exists(results_dir):
218 os.mkdir(results_dir)
mbligh8baa2ea2006-12-17 23:01:24 +0000219
jadmanski0afbb632008-06-06 21:10:57 +0000220 return (results_dir, tmp_dir)
mbligh8baa2ea2006-12-17 23:01:24 +0000221
222
jadmanski0afbb632008-06-06 21:10:57 +0000223 def xen(self, base_tree, results_dir = '', tmp_dir = '', leave = False, \
224 kjob = None ):
225 """Summon a xen object"""
226 (results_dir, tmp_dir) = self.setup_dirs(results_dir, tmp_dir)
227 build_dir = 'xen'
228 return xen.xen(self, base_tree, results_dir, tmp_dir, build_dir, leave, kjob)
mblighf4c35322006-03-13 01:01:10 +0000229
mblighcaa605c2006-10-02 00:37:35 +0000230
jadmanski0afbb632008-06-06 21:10:57 +0000231 def kernel(self, base_tree, results_dir = '', tmp_dir = '', leave = False):
232 """Summon a kernel object"""
233 (results_dir, tmp_dir) = self.setup_dirs(results_dir, tmp_dir)
234 build_dir = 'linux'
mbligh15440802008-06-06 22:48:19 +0000235 return kernel.auto_kernel(self, base_tree, results_dir, tmp_dir,
236 build_dir, leave)
mblighfadca202006-09-23 04:40:01 +0000237
mblighcaa605c2006-10-02 00:37:35 +0000238
jadmanski0afbb632008-06-06 21:10:57 +0000239 def barrier(self, *args, **kwds):
240 """Create a barrier object"""
241 return barrier.barrier(*args, **kwds)
mbligh4b089662006-06-14 22:34:58 +0000242
243
jadmanski0afbb632008-06-06 21:10:57 +0000244 def setup_dep(self, deps):
245 """Set up the dependencies for this test.
apwf1a81162006-04-25 10:10:29 +0000246
jadmanski0afbb632008-06-06 21:10:57 +0000247 deps is a list of libraries required for this test.
248 """
249 for dep in deps:
250 try:
251 os.chdir(os.path.join(self.autodir, 'deps', dep))
252 utils.system('./' + dep + '.py')
253 except:
254 err = "setting up dependency " + dep + "\n"
255 raise error.UnhandledError(err)
mblighcaa605c2006-10-02 00:37:35 +0000256
mbligh12a7df72006-10-06 03:54:33 +0000257
mbligh6e83b6f2008-06-10 16:26:59 +0000258 def _runtest(self, url, tag, args, dargs):
jadmanski0afbb632008-06-06 21:10:57 +0000259 try:
260 l = lambda : test.runtest(self, url, tag, args, dargs)
261 pid = parallel.fork_start(self.resultdir, l)
262 parallel.fork_waitfor(self.resultdir, pid)
263 except error.AutotestError:
264 raise
265 except Exception, e:
266 msg = "Unhandled %s error occured during test\n"
267 msg %= str(e.__class__.__name__)
268 raise error.UnhandledError(msg)
mbligh7dd510c2007-11-13 17:11:22 +0000269
mbligh65938a22007-12-10 16:58:52 +0000270
jadmanski0afbb632008-06-06 21:10:57 +0000271 def run_test(self, url, *args, **dargs):
272 """Summon a test object and run it.
jadmanski8415f962008-05-06 20:38:53 +0000273
jadmanski0afbb632008-06-06 21:10:57 +0000274 tag
275 tag to add to testname
276 url
277 url of the test to run
278 """
mbligh7dd510c2007-11-13 17:11:22 +0000279
jadmanski0afbb632008-06-06 21:10:57 +0000280 if not url:
281 raise TypeError("Test name is invalid. "
282 "Switched arguments?")
283 (group, testname) = test.testname(url)
284 namelen = len(testname)
285 dargs = dargs.copy()
286 tntag = dargs.pop('tag', None)
287 if tntag: # testname tag is included in reported test name
288 testname += '.' + tntag
289 subdir = testname
290 sdtag = dargs.pop('subdir_tag', None)
291 if sdtag: # subdir-only tag is not included in reports
292 subdir = subdir + '.' + sdtag
293 tag = subdir[namelen+1:] # '' if none
mbligh7dd510c2007-11-13 17:11:22 +0000294
jadmanski0afbb632008-06-06 21:10:57 +0000295 outputdir = os.path.join(self.resultdir, subdir)
296 if os.path.exists(outputdir):
297 msg = ("%s already exists, test <%s> may have"
298 " already run with tag <%s>"
299 % (outputdir, testname, tag) )
300 raise error.TestError(msg)
301 os.mkdir(outputdir)
mbligh7dd510c2007-11-13 17:11:22 +0000302
jadmanski0afbb632008-06-06 21:10:57 +0000303 container = dargs.pop('container', None)
304 if container:
305 cname = container.get('name', None)
306 if not cname: # get old name
307 cname = container.get('container_name', None)
308 mbytes = container.get('mbytes', None)
309 if not mbytes: # get old name
310 mbytes = container.get('mem', None)
311 cpus = container.get('cpus', None)
312 if not cpus: # get old name
313 cpus = container.get('cpu', None)
314 root = container.get('root', None)
315 self.new_container(mbytes=mbytes, cpus=cpus,
316 root=root, name=cname)
317 # We are running in a container now...
mbligh7dd510c2007-11-13 17:11:22 +0000318
jadmanski0afbb632008-06-06 21:10:57 +0000319 def log_warning(reason):
320 self.record("WARN", subdir, testname, reason)
mbligh15440802008-06-06 22:48:19 +0000321 @disk_usage_monitor.watch(log_warning, "/", self.max_disk_usage_rate)
jadmanski0afbb632008-06-06 21:10:57 +0000322 def group_func():
323 try:
mbligh6e83b6f2008-06-10 16:26:59 +0000324 self._runtest(url, tag, args, dargs)
jadmanski0afbb632008-06-06 21:10:57 +0000325 except error.TestNAError, detail:
326 self.record('TEST_NA', subdir, testname,
327 str(detail))
328 raise
329 except Exception, detail:
330 self.record('FAIL', subdir, testname,
331 str(detail))
332 raise
333 else:
334 self.record('GOOD', subdir, testname,
335 'completed successfully')
mbligh7dd510c2007-11-13 17:11:22 +0000336
mbligh6e83b6f2008-06-10 16:26:59 +0000337 result, exc_info = self._rungroup(subdir, testname, group_func)
jadmanski0afbb632008-06-06 21:10:57 +0000338 if container:
339 self.release_container()
340 if exc_info and isinstance(exc_info[1], error.TestError):
341 return False
342 elif exc_info:
343 raise exc_info[0], exc_info[1], exc_info[2]
344 else:
345 return True
apw0865f482006-03-30 18:50:19 +0000346
mblighd7fb4a62006-10-01 00:57:53 +0000347
mbligh6e83b6f2008-06-10 16:26:59 +0000348 def _rungroup(self, subdir, testname, function, *args, **dargs):
jadmanski0afbb632008-06-06 21:10:57 +0000349 """\
350 subdir:
351 name of the group
352 testname:
353 name of the test to run, or support step
354 function:
355 subroutine to run
356 *args:
357 arguments for the function
mbligh88ab90f2007-08-29 15:52:49 +0000358
jadmanski0afbb632008-06-06 21:10:57 +0000359 Returns a 2-tuple (result, exc_info) where result
360 is the return value of function, and exc_info is
361 the sys.exc_info() of the exception thrown by the
362 function (which may be None).
363 """
apw1da244b2007-09-27 17:18:01 +0000364
jadmanski0afbb632008-06-06 21:10:57 +0000365 result, exc_info = None, None
366 try:
367 self.record('START', subdir, testname)
368 self._increment_group_level()
369 result = function(*args, **dargs)
370 self._decrement_group_level()
371 self.record('END GOOD', subdir, testname)
372 except error.TestNAError, e:
373 self._decrement_group_level()
374 self.record('END TEST_NA', subdir, testname, str(e))
375 except Exception, e:
376 exc_info = sys.exc_info()
377 self._decrement_group_level()
378 err_msg = str(e) + '\n' + traceback.format_exc()
379 self.record('END FAIL', subdir, testname, err_msg)
mblighd660afe2008-06-05 22:17:53 +0000380
jadmanski0afbb632008-06-06 21:10:57 +0000381 return result, exc_info
apw1da244b2007-09-27 17:18:01 +0000382
mbligh88ab90f2007-08-29 15:52:49 +0000383
jadmanski0afbb632008-06-06 21:10:57 +0000384 def run_group(self, function, *args, **dargs):
385 """\
386 function:
387 subroutine to run
388 *args:
389 arguments for the function
390 """
apw08403ca2007-09-27 17:17:22 +0000391
jadmanski0afbb632008-06-06 21:10:57 +0000392 # Allow the tag for the group to be specified
393 name = function.__name__
394 tag = dargs.pop('tag', None)
395 if tag:
396 name = tag
mbligh88ab90f2007-08-29 15:52:49 +0000397
jadmanski0afbb632008-06-06 21:10:57 +0000398 outputdir = os.path.join(self.resultdir, name)
399 if os.path.exists(outputdir):
400 msg = ("%s already exists, test <%s> may have"
401 " already run with tag <%s>"
402 % (outputdir, name, name) )
403 raise error.TestError(msg)
404 os.mkdir(outputdir)
405
mblighc1f8ced2008-06-13 21:43:28 +0000406 result, exc_info = self._rungroup(name, name, function, *args, **dargs)
jadmanski0afbb632008-06-06 21:10:57 +0000407
408 # if there was a non-TestError exception, raise it
409 if exc_info and not isinstance(exc_info[1], error.TestError):
410 err = ''.join(traceback.format_exception(*exc_info))
411 raise error.TestError(name + ' failed\n' + err)
412
413 # pass back the actual return value from the function
414 return result
415
416
417 def new_container(self, mbytes=None, cpus=None, root=None, name=None):
418 if not autotest_utils.grep('cpuset', '/proc/filesystems'):
419 print "Containers not enabled by latest reboot"
420 return # containers weren't enabled in this kernel boot
421 pid = os.getpid()
422 if not name:
423 name = 'test%d' % pid # make arbitrary unique name
mbligh15440802008-06-06 22:48:19 +0000424 self.container = cpuset.cpuset(name, job_size=mbytes, job_pid=pid,
425 cpus=cpus, root=root)
jadmanski0afbb632008-06-06 21:10:57 +0000426 # This job's python shell is now running in the new container
427 # and all forked test processes will inherit that container
428
429
430 def release_container(self):
431 if self.container:
432 self.container.release()
433 self.container = None
434
mbligh68119582008-01-25 18:16:41 +0000435
jadmanski0afbb632008-06-06 21:10:57 +0000436 def cpu_count(self):
437 if self.container:
438 return len(self.container.cpus)
439 return autotest_utils.count_cpus() # use total system count
mbligh68119582008-01-25 18:16:41 +0000440
mbligh68119582008-01-25 18:16:41 +0000441
jadmanski0afbb632008-06-06 21:10:57 +0000442 # Check the passed kernel identifier against the command line
443 # and the running kernel, abort the job on missmatch.
444 def kernel_check_ident(self, expected_when, expected_id, subdir,
445 type = 'src', patches=[]):
446 print (("POST BOOT: checking booted kernel " +
447 "mark=%d identity='%s' type='%s'") %
448 (expected_when, expected_id, type))
mbligh68119582008-01-25 18:16:41 +0000449
jadmanski0afbb632008-06-06 21:10:57 +0000450 running_id = autotest_utils.running_os_ident()
mbligh68119582008-01-25 18:16:41 +0000451
jadmanski0afbb632008-06-06 21:10:57 +0000452 cmdline = utils.read_one_line("/proc/cmdline")
mbligh68119582008-01-25 18:16:41 +0000453
jadmanski0afbb632008-06-06 21:10:57 +0000454 find_sum = re.compile(r'.*IDENT=(\d+)')
455 m = find_sum.match(cmdline)
456 cmdline_when = -1
457 if m:
458 cmdline_when = int(m.groups()[0])
apwce73d892007-09-25 16:53:05 +0000459
jadmanski0afbb632008-06-06 21:10:57 +0000460 # We have all the facts, see if they indicate we
461 # booted the requested kernel or not.
462 bad = False
463 if (type == 'src' and expected_id != running_id or
464 type == 'rpm' and
465 not running_id.startswith(expected_id + '::')):
466 print "check_kernel_ident: kernel identifier mismatch"
467 bad = True
468 if expected_when != cmdline_when:
469 print "check_kernel_ident: kernel command line mismatch"
470 bad = True
apwce73d892007-09-25 16:53:05 +0000471
jadmanski0afbb632008-06-06 21:10:57 +0000472 if bad:
473 print " Expected Ident: " + expected_id
474 print " Running Ident: " + running_id
475 print " Expected Mark: %d" % (expected_when)
476 print "Command Line Mark: %d" % (cmdline_when)
477 print " Command Line: " + cmdline
apwce73d892007-09-25 16:53:05 +0000478
jadmanski0afbb632008-06-06 21:10:57 +0000479 raise error.JobError("boot failure", "reboot.verify")
apwce73d892007-09-25 16:53:05 +0000480
jadmanski0afbb632008-06-06 21:10:57 +0000481 kernel_info = {'kernel': expected_id}
482 for i, patch in enumerate(patches):
483 kernel_info["patch%d" % i] = patch
484 self.record('GOOD', subdir, 'reboot.verify', expected_id)
485 self._decrement_group_level()
486 self.record('END GOOD', subdir, 'reboot',
487 optional_fields=kernel_info)
apwce73d892007-09-25 16:53:05 +0000488
apwce73d892007-09-25 16:53:05 +0000489
jadmanski0afbb632008-06-06 21:10:57 +0000490 def filesystem(self, device, mountpoint = None, loop_size = 0):
491 if not mountpoint:
492 mountpoint = self.tmpdir
493 return filesystem.filesystem(self, device, mountpoint,loop_size)
apwce73d892007-09-25 16:53:05 +0000494
apwce73d892007-09-25 16:53:05 +0000495
jadmanski0afbb632008-06-06 21:10:57 +0000496 def enable_external_logging(self):
497 pass
apwce73d892007-09-25 16:53:05 +0000498
mblighd7fb4a62006-10-01 00:57:53 +0000499
jadmanski0afbb632008-06-06 21:10:57 +0000500 def disable_external_logging(self):
501 pass
mblighcaa62c22008-04-07 21:51:17 +0000502
503
jadmanski0afbb632008-06-06 21:10:57 +0000504 def reboot_setup(self):
505 pass
mblighcaa62c22008-04-07 21:51:17 +0000506
mblighcaa62c22008-04-07 21:51:17 +0000507
jadmanski0afbb632008-06-06 21:10:57 +0000508 def reboot(self, tag='autotest'):
509 self.reboot_setup()
510 self.record('START', None, 'reboot')
511 self._increment_group_level()
512 self.record('GOOD', None, 'reboot.start')
513 self.harness.run_reboot()
514 default = self.config_get('boot.set_default')
515 if default:
516 self.bootloader.set_default(tag)
517 else:
518 self.bootloader.boot_once(tag)
519 cmd = "(sleep 5; reboot) </dev/null >/dev/null 2>&1 &"
520 utils.system(cmd)
521 self.quit()
mblighcaa605c2006-10-02 00:37:35 +0000522
mblighcaa605c2006-10-02 00:37:35 +0000523
jadmanski0afbb632008-06-06 21:10:57 +0000524 def noop(self, text):
525 print "job: noop: " + text
mblighcaa605c2006-10-02 00:37:35 +0000526
apw0865f482006-03-30 18:50:19 +0000527
jadmanski0afbb632008-06-06 21:10:57 +0000528 def parallel(self, *tasklist):
529 """Run tasks in parallel"""
mblighcaa605c2006-10-02 00:37:35 +0000530
jadmanski0afbb632008-06-06 21:10:57 +0000531 pids = []
532 old_log_filename = self.log_filename
533 for i, task in enumerate(tasklist):
534 self.log_filename = old_log_filename + (".%d" % i)
535 task_func = lambda: task[0](*task[1:])
mbligh15440802008-06-06 22:48:19 +0000536 pids.append(parallel.fork_start(self.resultdir, task_func))
apw8fef4ac2006-10-10 22:53:37 +0000537
jadmanski0afbb632008-06-06 21:10:57 +0000538 old_log_path = os.path.join(self.resultdir, old_log_filename)
539 old_log = open(old_log_path, "a")
540 exceptions = []
541 for i, pid in enumerate(pids):
542 # wait for the task to finish
543 try:
544 parallel.fork_waitfor(self.resultdir, pid)
545 except Exception, e:
546 exceptions.append(e)
547 # copy the logs from the subtask into the main log
548 new_log_path = old_log_path + (".%d" % i)
549 if os.path.exists(new_log_path):
550 new_log = open(new_log_path)
551 old_log.write(new_log.read())
552 new_log.close()
553 old_log.flush()
554 os.remove(new_log_path)
555 old_log.close()
mblighd528d302007-12-19 16:19:05 +0000556
jadmanski0afbb632008-06-06 21:10:57 +0000557 self.log_filename = old_log_filename
mblighd528d302007-12-19 16:19:05 +0000558
jadmanski0afbb632008-06-06 21:10:57 +0000559 # handle any exceptions raised by the parallel tasks
560 if exceptions:
561 msg = "%d task(s) failed" % len(exceptions)
562 raise error.JobError(msg, str(exceptions), exceptions)
apw0865f482006-03-30 18:50:19 +0000563
mblighd509b712008-01-14 17:41:25 +0000564
jadmanski0afbb632008-06-06 21:10:57 +0000565 def quit(self):
566 # XXX: should have a better name.
567 self.harness.run_pause()
568 raise error.JobContinue("more to come")
mblighcaa605c2006-10-02 00:37:35 +0000569
apw0865f482006-03-30 18:50:19 +0000570
jadmanski0afbb632008-06-06 21:10:57 +0000571 def complete(self, status):
572 """Clean up and exit"""
573 # We are about to exit 'complete' so clean up the control file.
574 try:
575 os.unlink(self.state_file)
576 except:
577 pass
mblighcaa605c2006-10-02 00:37:35 +0000578
jadmanski0afbb632008-06-06 21:10:57 +0000579 self.harness.run_complete()
580 self.disable_external_logging()
581 sys.exit(status)
mblighc0b10d32008-03-03 16:03:28 +0000582
apw0865f482006-03-30 18:50:19 +0000583
jadmanski0afbb632008-06-06 21:10:57 +0000584 def set_state(self, var, val):
585 # Deep copies make sure that the state can't be altered
586 # without it being re-written. Perf wise, deep copies
587 # are overshadowed by pickling/loading.
588 self.state[var] = copy.deepcopy(val)
589 pickle.dump(self.state, open(self.state_file, 'w'))
mblighcaa605c2006-10-02 00:37:35 +0000590
mbligh366ff1b2008-04-25 16:07:56 +0000591
mbligh6e83b6f2008-06-10 16:26:59 +0000592 def _load_state(self):
jadmanski0afbb632008-06-06 21:10:57 +0000593 assert not hasattr(self, "state")
594 try:
595 self.state = pickle.load(open(self.state_file, 'r'))
596 self.state_existed = True
597 except Exception:
598 print "Initializing the state engine."
599 self.state = {}
600 self.set_state('__steps', []) # writes pickle file
601 self.state_existed = False
mbligh366ff1b2008-04-25 16:07:56 +0000602
mbligh366ff1b2008-04-25 16:07:56 +0000603
jadmanski0afbb632008-06-06 21:10:57 +0000604 def get_state(self, var, default=None):
605 if var in self.state or default == None:
606 val = self.state[var]
607 else:
608 val = default
609 return copy.deepcopy(val)
mbligh366ff1b2008-04-25 16:07:56 +0000610
mbligh366ff1b2008-04-25 16:07:56 +0000611
jadmanski0afbb632008-06-06 21:10:57 +0000612 def __create_step_tuple(self, fn, args, dargs):
613 # Legacy code passes in an array where the first arg is
614 # the function or its name.
615 if isinstance(fn, list):
616 assert(len(args) == 0)
617 assert(len(dargs) == 0)
618 args = fn[1:]
619 fn = fn[0]
620 # Pickling actual functions is harry, thus we have to call
621 # them by name. Unfortunately, this means only functions
622 # defined globally can be used as a next step.
623 if callable(fn):
624 fn = fn.__name__
625 if not isinstance(fn, types.StringTypes):
626 raise StepError("Next steps must be functions or "
627 "strings containing the function name")
628 ancestry = copy.copy(self.current_step_ancestry)
629 return (ancestry, fn, args, dargs)
mbligh366ff1b2008-04-25 16:07:56 +0000630
mbligh12a04cb2008-04-25 16:07:20 +0000631
jadmanski0afbb632008-06-06 21:10:57 +0000632 def next_step_append(self, fn, *args, **dargs):
633 """Define the next step and place it at the end"""
634 steps = self.get_state('__steps')
635 steps.append(self.__create_step_tuple(fn, args, dargs))
636 self.set_state('__steps', steps)
mbligh12a04cb2008-04-25 16:07:20 +0000637
apw0865f482006-03-30 18:50:19 +0000638
jadmanski0afbb632008-06-06 21:10:57 +0000639 def next_step(self, fn, *args, **dargs):
640 """Create a new step and place it after any steps added
641 while running the current step but before any steps added in
642 previous steps"""
643 steps = self.get_state('__steps')
644 steps.insert(self.next_step_index,
645 self.__create_step_tuple(fn, args, dargs))
646 self.next_step_index += 1
647 self.set_state('__steps', steps)
mblighcaa605c2006-10-02 00:37:35 +0000648
mbligh8f4d0432008-06-02 19:42:50 +0000649
jadmanski0afbb632008-06-06 21:10:57 +0000650 def next_step_prepend(self, fn, *args, **dargs):
651 """Insert a new step, executing first"""
652 steps = self.get_state('__steps')
653 steps.insert(0, self.__create_step_tuple(fn, args, dargs))
654 self.next_step_index += 1
655 self.set_state('__steps', steps)
mbligh8f4d0432008-06-02 19:42:50 +0000656
mbligh237bed32007-09-05 13:05:57 +0000657
jadmanski0afbb632008-06-06 21:10:57 +0000658 def _run_step_fn(self, local_vars, fn, args, dargs):
659 """Run a (step) function within the given context"""
mbligh237bed32007-09-05 13:05:57 +0000660
jadmanski0afbb632008-06-06 21:10:57 +0000661 local_vars['__args'] = args
662 local_vars['__dargs'] = dargs
mbligh15440802008-06-06 22:48:19 +0000663 exec('__ret = %s(*__args, **__dargs)' % fn, local_vars, local_vars)
jadmanski0afbb632008-06-06 21:10:57 +0000664 return local_vars['__ret']
mblighb274ef52008-06-02 19:40:01 +0000665
mblighb274ef52008-06-02 19:40:01 +0000666
jadmanski0afbb632008-06-06 21:10:57 +0000667 def _create_frame(self, global_vars, ancestry, fn_name):
668 """Set up the environment like it would have been when this
669 function was first defined.
mblighb274ef52008-06-02 19:40:01 +0000670
jadmanski0afbb632008-06-06 21:10:57 +0000671 Child step engine 'implementations' must have 'return locals()'
672 at end end of their steps. Because of this, we can call the
673 parent function and get back all child functions (i.e. those
674 defined within it).
mblighb274ef52008-06-02 19:40:01 +0000675
jadmanski0afbb632008-06-06 21:10:57 +0000676 Unfortunately, the call stack of the function calling
677 job.next_step might have been deeper than the function it
678 added. In order to make sure that the environment is what it
679 should be, we need to then pop off the frames we built until
680 we find the frame where the function was first defined."""
mblighb274ef52008-06-02 19:40:01 +0000681
jadmanski0afbb632008-06-06 21:10:57 +0000682 # The copies ensure that the parent frames are not modified
683 # while building child frames. This matters if we then
684 # pop some frames in the next part of this function.
685 current_frame = copy.copy(global_vars)
686 frames = [current_frame]
687 for steps_fn_name in ancestry:
mbligh15440802008-06-06 22:48:19 +0000688 ret = self._run_step_fn(current_frame, steps_fn_name, [], {})
jadmanski0afbb632008-06-06 21:10:57 +0000689 current_frame = copy.copy(ret)
690 frames.append(current_frame)
mblighb274ef52008-06-02 19:40:01 +0000691
jadmanski0afbb632008-06-06 21:10:57 +0000692 while len(frames) > 2:
693 if fn_name not in frames[-2]:
694 break
695 if frames[-2][fn_name] != frames[-1][fn_name]:
696 break
697 frames.pop()
698 ancestry.pop()
mblighb274ef52008-06-02 19:40:01 +0000699
jadmanski0afbb632008-06-06 21:10:57 +0000700 return (frames[-1], ancestry)
mblighb274ef52008-06-02 19:40:01 +0000701
mblighb274ef52008-06-02 19:40:01 +0000702
jadmanski0afbb632008-06-06 21:10:57 +0000703 def _add_step_init(self, local_vars, current_function):
704 """If the function returned a dictionary that includes a
705 function named 'step_init', prepend it to our list of steps.
706 This will only get run the first time a function with a nested
707 use of the step engine is run."""
mblighb274ef52008-06-02 19:40:01 +0000708
jadmanski0afbb632008-06-06 21:10:57 +0000709 if (isinstance(local_vars, dict) and
710 'step_init' in local_vars and
711 callable(local_vars['step_init'])):
712 # The init step is a child of the function
713 # we were just running.
714 self.current_step_ancestry.append(current_function)
715 self.next_step_prepend('step_init')
mblighb274ef52008-06-02 19:40:01 +0000716
mblighb274ef52008-06-02 19:40:01 +0000717
jadmanski0afbb632008-06-06 21:10:57 +0000718 def step_engine(self):
719 """the stepping engine -- if the control file defines
720 step_init we will be using this engine to drive multiple runs.
721 """
722 """Do the next step"""
mblighb274ef52008-06-02 19:40:01 +0000723
jadmanski0afbb632008-06-06 21:10:57 +0000724 # Set up the environment and then interpret the control file.
725 # Some control files will have code outside of functions,
726 # which means we need to have our state engine initialized
727 # before reading in the file.
728 global_control_vars = {'job': self}
729 exec(JOB_PREAMBLE, global_control_vars, global_control_vars)
730 execfile(self.control, global_control_vars, global_control_vars)
apw83f8d772006-04-27 14:12:56 +0000731
jadmanski0afbb632008-06-06 21:10:57 +0000732 # If we loaded in a mid-job state file, then we presumably
733 # know what steps we have yet to run.
734 if not self.state_existed:
735 if global_control_vars.has_key('step_init'):
736 self.next_step(global_control_vars['step_init'])
apw83f8d772006-04-27 14:12:56 +0000737
jadmanski0afbb632008-06-06 21:10:57 +0000738 # Iterate through the steps. If we reboot, we'll simply
739 # continue iterating on the next step.
740 while len(self.get_state('__steps')) > 0:
741 steps = self.get_state('__steps')
742 (ancestry, fn_name, args, dargs) = steps.pop(0)
743 self.set_state('__steps', steps)
apw0865f482006-03-30 18:50:19 +0000744
jadmanski0afbb632008-06-06 21:10:57 +0000745 self.next_step_index = 0
mbligh15440802008-06-06 22:48:19 +0000746 ret = self._create_frame(global_control_vars, ancestry, fn_name)
jadmanski0afbb632008-06-06 21:10:57 +0000747 local_vars, self.current_step_ancestry = ret
mbligh15440802008-06-06 22:48:19 +0000748 local_vars = self._run_step_fn(local_vars, fn_name, args, dargs)
jadmanski0afbb632008-06-06 21:10:57 +0000749 self._add_step_init(local_vars, fn_name)
apw0865f482006-03-30 18:50:19 +0000750
apw0865f482006-03-30 18:50:19 +0000751
jadmanski0afbb632008-06-06 21:10:57 +0000752 def _init_group_level(self):
753 self.group_level = self.get_state("__group_level", default=0)
mblighcaa605c2006-10-02 00:37:35 +0000754
jadmanskia9c75c42008-05-01 22:05:31 +0000755
jadmanski0afbb632008-06-06 21:10:57 +0000756 def _increment_group_level(self):
757 self.group_level += 1
758 self.set_state("__group_level", self.group_level)
jadmanskia9c75c42008-05-01 22:05:31 +0000759
jadmanskia9c75c42008-05-01 22:05:31 +0000760
jadmanski0afbb632008-06-06 21:10:57 +0000761 def _decrement_group_level(self):
762 self.group_level -= 1
763 self.set_state("__group_level", self.group_level)
jadmanskia9c75c42008-05-01 22:05:31 +0000764
jadmanskia9c75c42008-05-01 22:05:31 +0000765
jadmanski0afbb632008-06-06 21:10:57 +0000766 def record(self, status_code, subdir, operation, status = '',
767 optional_fields=None):
768 """
769 Record job-level status
jadmanskia9c75c42008-05-01 22:05:31 +0000770
jadmanski0afbb632008-06-06 21:10:57 +0000771 The intent is to make this file both machine parseable and
772 human readable. That involves a little more complexity, but
773 really isn't all that bad ;-)
apw7db8d0b2006-10-09 08:10:25 +0000774
jadmanski0afbb632008-06-06 21:10:57 +0000775 Format is <status code>\t<subdir>\t<operation>\t<status>
mbligh09f288a2007-09-18 21:34:57 +0000776
jadmanski0afbb632008-06-06 21:10:57 +0000777 status code: (GOOD|WARN|FAIL|ABORT)
778 or START
779 or END (GOOD|WARN|FAIL|ABORT)
mbligh09f288a2007-09-18 21:34:57 +0000780
jadmanski0afbb632008-06-06 21:10:57 +0000781 subdir: MUST be a relevant subdirectory in the results,
782 or None, which will be represented as '----'
mbligh09f288a2007-09-18 21:34:57 +0000783
jadmanski0afbb632008-06-06 21:10:57 +0000784 operation: description of what you ran (e.g. "dbench", or
785 "mkfs -t foobar /dev/sda9")
mbligh09f288a2007-09-18 21:34:57 +0000786
jadmanski0afbb632008-06-06 21:10:57 +0000787 status: error message or "completed sucessfully"
mbligh09f288a2007-09-18 21:34:57 +0000788
jadmanski0afbb632008-06-06 21:10:57 +0000789 ------------------------------------------------------------
mbligh09f288a2007-09-18 21:34:57 +0000790
jadmanski0afbb632008-06-06 21:10:57 +0000791 Initial tabs indicate indent levels for grouping, and is
792 governed by self.group_level
mbligh09f288a2007-09-18 21:34:57 +0000793
jadmanski0afbb632008-06-06 21:10:57 +0000794 multiline messages have secondary lines prefaced by a double
795 space (' ')
796 """
mbligh09f288a2007-09-18 21:34:57 +0000797
jadmanski0afbb632008-06-06 21:10:57 +0000798 if subdir:
799 if re.match(r'[\n\t]', subdir):
mbligh15440802008-06-06 22:48:19 +0000800 raise ValueError("Invalid character in subdir string")
jadmanski0afbb632008-06-06 21:10:57 +0000801 substr = subdir
802 else:
803 substr = '----'
mbligh09f288a2007-09-18 21:34:57 +0000804
jadmanski0afbb632008-06-06 21:10:57 +0000805 if not logging.is_valid_status(status_code):
mbligh15440802008-06-06 22:48:19 +0000806 raise ValueError("Invalid status code supplied: %s" % status_code)
jadmanski0afbb632008-06-06 21:10:57 +0000807 if not operation:
808 operation = '----'
jadmanskia9c75c42008-05-01 22:05:31 +0000809
jadmanski0afbb632008-06-06 21:10:57 +0000810 if re.match(r'[\n\t]', operation):
mbligh15440802008-06-06 22:48:19 +0000811 raise ValueError("Invalid character in operation string")
jadmanski0afbb632008-06-06 21:10:57 +0000812 operation = operation.rstrip()
jadmanskia9c75c42008-05-01 22:05:31 +0000813
jadmanski0afbb632008-06-06 21:10:57 +0000814 if not optional_fields:
815 optional_fields = {}
jadmanskia9c75c42008-05-01 22:05:31 +0000816
jadmanski0afbb632008-06-06 21:10:57 +0000817 status = status.rstrip()
818 status = re.sub(r"\t", " ", status)
819 # Ensure any continuation lines are marked so we can
820 # detect them in the status file to ensure it is parsable.
mbligh15440802008-06-06 22:48:19 +0000821 status = re.sub(r"\n", "\n" + "\t" * self.group_level + " ", status)
mbligh09f288a2007-09-18 21:34:57 +0000822
jadmanski0afbb632008-06-06 21:10:57 +0000823 # Generate timestamps for inclusion in the logs
824 epoch_time = int(time.time()) # seconds since epoch, in UTC
825 local_time = time.localtime(epoch_time)
826 optional_fields["timestamp"] = str(epoch_time)
827 optional_fields["localtime"] = time.strftime("%b %d %H:%M:%S",
828 local_time)
mbligh30270302007-11-05 20:33:52 +0000829
jadmanski0afbb632008-06-06 21:10:57 +0000830 fields = [status_code, substr, operation]
831 fields += ["%s=%s" % x for x in optional_fields.iteritems()]
832 fields.append(status)
jadmanskia9c75c42008-05-01 22:05:31 +0000833
jadmanski0afbb632008-06-06 21:10:57 +0000834 msg = '\t'.join(str(x) for x in fields)
835 msg = '\t' * self.group_level + msg
apw7db8d0b2006-10-09 08:10:25 +0000836
jadmanski0afbb632008-06-06 21:10:57 +0000837 msg_tag = ""
838 if "." in self.log_filename:
839 msg_tag = self.log_filename.split(".", 1)[1]
mblighd528d302007-12-19 16:19:05 +0000840
mbligh15440802008-06-06 22:48:19 +0000841 self.harness.test_status_detail(status_code, substr, operation, status,
842 msg_tag)
jadmanski0afbb632008-06-06 21:10:57 +0000843 self.harness.test_status(msg, msg_tag)
mblighd528d302007-12-19 16:19:05 +0000844
jadmanski0afbb632008-06-06 21:10:57 +0000845 # log to stdout (if enabled)
846 #if self.log_filename == self.DEFAULT_LOG_FILENAME:
847 print msg
mblighd528d302007-12-19 16:19:05 +0000848
jadmanski0afbb632008-06-06 21:10:57 +0000849 # log to the "root" status log
850 status_file = os.path.join(self.resultdir, self.log_filename)
851 open(status_file, "a").write(msg + "\n")
mblighd528d302007-12-19 16:19:05 +0000852
jadmanski0afbb632008-06-06 21:10:57 +0000853 # log to the subdir status log (if subdir is set)
854 if subdir:
855 dir = os.path.join(self.resultdir, subdir)
mbligh15440802008-06-06 22:48:19 +0000856 status_file = os.path.join(dir, self.DEFAULT_LOG_FILENAME)
jadmanski0afbb632008-06-06 21:10:57 +0000857 open(status_file, "a").write(msg + "\n")
apwce9abe92006-04-27 14:14:04 +0000858
859
jadmanski8415f962008-05-06 20:38:53 +0000860class disk_usage_monitor:
jadmanski0afbb632008-06-06 21:10:57 +0000861 def __init__(self, logging_func, device, max_mb_per_hour):
862 self.func = logging_func
863 self.device = device
864 self.max_mb_per_hour = max_mb_per_hour
jadmanski8415f962008-05-06 20:38:53 +0000865
866
jadmanski0afbb632008-06-06 21:10:57 +0000867 def start(self):
868 self.initial_space = autotest_utils.freespace(self.device)
869 self.start_time = time.time()
jadmanski8415f962008-05-06 20:38:53 +0000870
871
jadmanski0afbb632008-06-06 21:10:57 +0000872 def stop(self):
873 # if no maximum usage rate was set, we don't need to
874 # generate any warnings
875 if not self.max_mb_per_hour:
876 return
jadmanski8415f962008-05-06 20:38:53 +0000877
jadmanski0afbb632008-06-06 21:10:57 +0000878 final_space = autotest_utils.freespace(self.device)
879 used_space = self.initial_space - final_space
880 stop_time = time.time()
881 total_time = stop_time - self.start_time
882 # round up the time to one minute, to keep extremely short
883 # tests from generating false positives due to short, badly
884 # timed bursts of activity
885 total_time = max(total_time, 60.0)
jadmanski8415f962008-05-06 20:38:53 +0000886
jadmanski0afbb632008-06-06 21:10:57 +0000887 # determine the usage rate
888 bytes_per_sec = used_space / total_time
889 mb_per_sec = bytes_per_sec / 1024**2
890 mb_per_hour = mb_per_sec * 60 * 60
jadmanski8415f962008-05-06 20:38:53 +0000891
jadmanski0afbb632008-06-06 21:10:57 +0000892 if mb_per_hour > self.max_mb_per_hour:
mbligh15440802008-06-06 22:48:19 +0000893 msg = ("disk space on %s was consumed at a rate of %.2f MB/hour")
jadmanski0afbb632008-06-06 21:10:57 +0000894 msg %= (self.device, mb_per_hour)
895 self.func(msg)
jadmanski8415f962008-05-06 20:38:53 +0000896
897
jadmanski0afbb632008-06-06 21:10:57 +0000898 @classmethod
899 def watch(cls, *monitor_args, **monitor_dargs):
900 """ Generic decorator to wrap a function call with the
901 standard create-monitor -> start -> call -> stop idiom."""
902 def decorator(func):
903 def watched_func(*args, **dargs):
904 monitor = cls(*monitor_args, **monitor_dargs)
905 monitor.start()
906 try:
907 func(*args, **dargs)
908 finally:
909 monitor.stop()
910 return watched_func
911 return decorator
jadmanski8415f962008-05-06 20:38:53 +0000912
913
mblighcaa62c22008-04-07 21:51:17 +0000914def runjob(control, cont = False, tag = "default", harness_type = '',
jadmanski0afbb632008-06-06 21:10:57 +0000915 use_external_logging = False):
916 """The main interface to this module
mblighc86b0b42006-07-28 17:35:28 +0000917
jadmanski0afbb632008-06-06 21:10:57 +0000918 control
919 The control file to use for this job.
920 cont
921 Whether this is the continuation of a previously started job
922 """
923 control = os.path.abspath(control)
924 state = control + '.state'
apwce9abe92006-04-27 14:14:04 +0000925
jadmanski0afbb632008-06-06 21:10:57 +0000926 # instantiate the job object ready for the control file.
927 myjob = None
928 try:
929 # Check that the control file is valid
930 if not os.path.exists(control):
mbligh15440802008-06-06 22:48:19 +0000931 raise error.JobError(control + ": control file not found")
apwce9abe92006-04-27 14:14:04 +0000932
jadmanski0afbb632008-06-06 21:10:57 +0000933 # When continuing, the job is complete when there is no
934 # state file, ensure we don't try and continue.
935 if cont and not os.path.exists(state):
936 raise error.JobComplete("all done")
937 if cont == False and os.path.exists(state):
938 os.unlink(state)
apwce9abe92006-04-27 14:14:04 +0000939
mbligh15440802008-06-06 22:48:19 +0000940 myjob = job(control, tag, cont, harness_type, use_external_logging)
apwce9abe92006-04-27 14:14:04 +0000941
jadmanski0afbb632008-06-06 21:10:57 +0000942 # Load in the users control file, may do any one of:
943 # 1) execute in toto
944 # 2) define steps, and select the first via next_step()
945 myjob.step_engine()
apwce9abe92006-04-27 14:14:04 +0000946
jadmanski0afbb632008-06-06 21:10:57 +0000947 except error.JobContinue:
948 sys.exit(5)
apwce9abe92006-04-27 14:14:04 +0000949
jadmanski0afbb632008-06-06 21:10:57 +0000950 except error.JobComplete:
951 sys.exit(1)
apwb832e1b2007-11-24 20:24:38 +0000952
jadmanski0afbb632008-06-06 21:10:57 +0000953 except error.JobError, instance:
954 print "JOB ERROR: " + instance.args[0]
955 if myjob:
956 command = None
957 if len(instance.args) > 1:
958 command = instance.args[1]
959 myjob.record('ABORT', None, command, instance.args[0])
960 myjob._decrement_group_level()
961 myjob.record('END ABORT', None, None)
962 assert(myjob.group_level == 0)
963 myjob.complete(1)
964 else:
965 sys.exit(1)
apwce9abe92006-04-27 14:14:04 +0000966
jadmanski0afbb632008-06-06 21:10:57 +0000967 except Exception, e:
968 msg = str(e) + '\n' + traceback.format_exc()
969 print "JOB ERROR: " + msg
970 if myjob:
971 myjob.record('ABORT', None, None, msg)
972 myjob._decrement_group_level()
973 myjob.record('END ABORT', None, None)
974 assert(myjob.group_level == 0)
975 myjob.complete(1)
976 else:
977 sys.exit(1)
mbligh892d37f2007-03-01 17:03:25 +0000978
jadmanski0afbb632008-06-06 21:10:57 +0000979 # If we get here, then we assume the job is complete and good.
980 myjob._decrement_group_level()
981 myjob.record('END GOOD', None, None)
982 assert(myjob.group_level == 0)
mbligh0144e5a2008-03-07 18:17:53 +0000983
jadmanski0afbb632008-06-06 21:10:57 +0000984 myjob.complete(0)
mblighcaa62c22008-04-07 21:51:17 +0000985
986
987# site_job.py may be non-existant or empty, make sure that an appropriate
988# site_job class is created nevertheless
989try:
jadmanski0afbb632008-06-06 21:10:57 +0000990 from site_job import site_job
mblighcaa62c22008-04-07 21:51:17 +0000991except ImportError:
jadmanski0afbb632008-06-06 21:10:57 +0000992 class site_job(base_job):
993 pass
mblighcaa62c22008-04-07 21:51:17 +0000994
995class job(site_job):
jadmanski0afbb632008-06-06 21:10:57 +0000996 pass