blob: b25de5adc31fbd7e6e21ff28b8b1311d2f21797d [file] [log] [blame]
mblighc86b0b42006-07-28 17:35:28 +00001"""The main job wrapper
mbligha2508052006-05-28 21:29:53 +00002
mblighc86b0b42006-07-28 17:35:28 +00003This is the core infrastructure.
4"""
5
6__author__ = """Copyright Andy Whitcroft, Martin J. Bligh 2006"""
mbligha2508052006-05-28 21:29:53 +00007
mbligh8f243ec2006-10-10 05:55:49 +00008# standard stuff
mbligh366ff1b2008-04-25 16:07:56 +00009import os, sys, re, pickle, shutil, time, traceback, types, copy
mbligh302482e2008-05-01 20:06:16 +000010
mbligh8f243ec2006-10-10 05:55:49 +000011# autotest stuff
mblighc61fb362008-06-05 16:22:15 +000012from autotest_lib.client.bin import autotest_utils, parallel, kernel, xen
13from autotest_lib.client.bin import profilers, fd_stack, boottool, harness
14from autotest_lib.client.bin import config, sysinfo, cpuset, test, filesystem
mblighe829ba52008-06-03 15:04:08 +000015from autotest_lib.client.common_lib import error, barrier, logging, utils
mbligh302482e2008-05-01 20:06:16 +000016
mbligh366ff1b2008-04-25 16:07:56 +000017JOB_PREAMBLE = """
mbligh4700d942008-06-11 21:42:31 +000018from autotest_lib.common.common_lib.error import *
19from autotest_lib.client.bin.autotest_utils import *
mbligh366ff1b2008-04-25 16:07:56 +000020"""
21
mbligh302482e2008-05-01 20:06:16 +000022class StepError(error.AutotestError):
jadmanski0afbb632008-06-06 21:10:57 +000023 pass
mbligh12a04cb2008-04-25 16:07:20 +000024
25
mbligh6e83b6f2008-06-10 16:26:59 +000026class base_job(object):
jadmanski0afbb632008-06-06 21:10:57 +000027 """The actual job against which we do everything.
mblighc86b0b42006-07-28 17:35:28 +000028
jadmanski0afbb632008-06-06 21:10:57 +000029 Properties:
30 autodir
31 The top level autotest directory (/usr/local/autotest).
32 Comes from os.environ['AUTODIR'].
33 bindir
34 <autodir>/bin/
35 libdir
36 <autodir>/lib/
37 testdir
38 <autodir>/tests/
39 site_testdir
40 <autodir>/site_tests/
41 profdir
42 <autodir>/profilers/
43 tmpdir
44 <autodir>/tmp/
45 resultdir
46 <autodir>/results/<jobtag>
47 stdout
48 fd_stack object for stdout
49 stderr
50 fd_stack object for stderr
51 profilers
52 the profilers object for this job
53 harness
54 the server harness object for this job
55 config
56 the job configuration for this job
57 """
mblighc86b0b42006-07-28 17:35:28 +000058
jadmanski0afbb632008-06-06 21:10:57 +000059 DEFAULT_LOG_FILENAME = "status"
mblighd528d302007-12-19 16:19:05 +000060
jadmanski0afbb632008-06-06 21:10:57 +000061 def __init__(self, control, jobtag, cont, harness_type=None,
62 use_external_logging = False):
63 """
64 control
65 The control file (pathname of)
66 jobtag
67 The job tag string (eg "default")
68 cont
69 If this is the continuation of this job
70 harness_type
71 An alternative server harness
72 """
73 self.autodir = os.environ['AUTODIR']
74 self.bindir = os.path.join(self.autodir, 'bin')
75 self.libdir = os.path.join(self.autodir, 'lib')
76 self.testdir = os.path.join(self.autodir, 'tests')
77 self.site_testdir = os.path.join(self.autodir, 'site_tests')
78 self.profdir = os.path.join(self.autodir, 'profilers')
79 self.tmpdir = os.path.join(self.autodir, 'tmp')
80 self.resultdir = os.path.join(self.autodir, 'results', jobtag)
81 self.sysinfodir = os.path.join(self.resultdir, 'sysinfo')
82 self.control = os.path.abspath(control)
83 self.state_file = self.control + '.state'
84 self.current_step_ancestry = []
85 self.next_step_index = 0
mbligh6e83b6f2008-06-10 16:26:59 +000086 self._load_state()
mbligha2508052006-05-28 21:29:53 +000087
jadmanski0afbb632008-06-06 21:10:57 +000088 if not cont:
89 """
90 Don't cleanup the tmp dir (which contains the lockfile)
91 in the constructor, this would be a problem for multiple
92 jobs starting at the same time on the same client. Instead
93 do the delete at the server side. We simply create the tmp
94 directory here if it does not already exist.
95 """
96 if not os.path.exists(self.tmpdir):
97 os.mkdir(self.tmpdir)
apw96da1a42006-11-02 00:23:18 +000098
jadmanski0afbb632008-06-06 21:10:57 +000099 results = os.path.join(self.autodir, 'results')
100 if not os.path.exists(results):
101 os.mkdir(results)
mblighc1cbc992008-05-27 20:01:45 +0000102
jadmanski0afbb632008-06-06 21:10:57 +0000103 download = os.path.join(self.testdir, 'download')
104 if not os.path.exists(download):
105 os.mkdir(download)
apw96da1a42006-11-02 00:23:18 +0000106
jadmanski0afbb632008-06-06 21:10:57 +0000107 if os.path.exists(self.resultdir):
mbligh15440802008-06-06 22:48:19 +0000108 utils.system('rm -rf ' + self.resultdir)
jadmanski0afbb632008-06-06 21:10:57 +0000109 os.mkdir(self.resultdir)
110 os.mkdir(self.sysinfodir)
apw870988b2007-09-25 16:50:53 +0000111
jadmanski0afbb632008-06-06 21:10:57 +0000112 os.mkdir(os.path.join(self.resultdir, 'debug'))
113 os.mkdir(os.path.join(self.resultdir, 'analysis'))
mblighf4ca14f2008-03-03 16:03:05 +0000114
jadmanski0afbb632008-06-06 21:10:57 +0000115 shutil.copyfile(self.control,
116 os.path.join(self.resultdir, 'control'))
mbligh4b089662006-06-14 22:34:58 +0000117
mblighf4c35322006-03-13 01:01:10 +0000118
jadmanski0afbb632008-06-06 21:10:57 +0000119 self.control = control
120 self.jobtag = jobtag
121 self.log_filename = self.DEFAULT_LOG_FILENAME
122 self.container = None
jadmanskia9c75c42008-05-01 22:05:31 +0000123
jadmanski0afbb632008-06-06 21:10:57 +0000124 self.stdout = fd_stack.fd_stack(1, sys.stdout)
125 self.stderr = fd_stack.fd_stack(2, sys.stderr)
mblighf4c35322006-03-13 01:01:10 +0000126
jadmanski0afbb632008-06-06 21:10:57 +0000127 self._init_group_level()
apw059e1b12006-10-12 17:18:26 +0000128
jadmanski0afbb632008-06-06 21:10:57 +0000129 self.config = config.config(self)
jadmanski0afbb632008-06-06 21:10:57 +0000130 self.harness = harness.select(harness_type, self)
jadmanski0afbb632008-06-06 21:10:57 +0000131 self.profilers = profilers.profilers(self)
mblighcaa605c2006-10-02 00:37:35 +0000132
jadmanski0afbb632008-06-06 21:10:57 +0000133 try:
134 tool = self.config_get('boottool.executable')
135 self.bootloader = boottool.boottool(tool)
136 except:
137 pass
mbligh3a6d6ca2006-04-23 15:50:24 +0000138
jadmanski0afbb632008-06-06 21:10:57 +0000139 sysinfo.log_per_reboot_data(self.sysinfodir)
apw357f50f2006-12-01 11:22:39 +0000140
jadmanski0afbb632008-06-06 21:10:57 +0000141 if not cont:
142 self.record('START', None, None)
143 self._increment_group_level()
apwf91efaf2007-11-24 17:32:13 +0000144
jadmanski0afbb632008-06-06 21:10:57 +0000145 self.harness.run_start()
jadmanski8415f962008-05-06 20:38:53 +0000146
jadmanski0afbb632008-06-06 21:10:57 +0000147 if use_external_logging:
148 self.enable_external_logging()
jadmanski8415f962008-05-06 20:38:53 +0000149
jadmanski0afbb632008-06-06 21:10:57 +0000150 # load the max disk usage rate - default to no monitoring
mbligh15440802008-06-06 22:48:19 +0000151 self.max_disk_usage_rate = self.get_state('__monitor_disk', default=0.0)
jadmanski8415f962008-05-06 20:38:53 +0000152
jadmanski8415f962008-05-06 20:38:53 +0000153
jadmanski0afbb632008-06-06 21:10:57 +0000154 def monitor_disk_usage(self, max_rate):
155 """\
156 Signal that the job should monitor disk space usage on /
157 and generate a warning if a test uses up disk space at a
158 rate exceeding 'max_rate'.
mbligh0692e472007-08-30 16:07:53 +0000159
jadmanski0afbb632008-06-06 21:10:57 +0000160 Parameters:
161 max_rate - the maximium allowed rate of disk consumption
162 during a test, in MB/hour, or 0 to indicate
163 no limit.
164 """
165 self.set_state('__monitor_disk', max_rate)
166 self.max_disk_usage_rate = max_rate
mbligh0692e472007-08-30 16:07:53 +0000167
168
jadmanski0afbb632008-06-06 21:10:57 +0000169 def relative_path(self, path):
170 """\
171 Return a patch relative to the job results directory
172 """
173 head = len(self.resultdir) + 1 # remove the / inbetween
174 return path[head:]
mbligh362ab3d2007-08-30 11:24:04 +0000175
mblighcaa605c2006-10-02 00:37:35 +0000176
jadmanski0afbb632008-06-06 21:10:57 +0000177 def control_get(self):
178 return self.control
mbligh8d83cdc2007-12-03 18:09:18 +0000179
180
jadmanski0afbb632008-06-06 21:10:57 +0000181 def control_set(self, control):
182 self.control = os.path.abspath(control)
apwde1503a2006-10-10 08:34:21 +0000183
184
jadmanski0afbb632008-06-06 21:10:57 +0000185 def harness_select(self, which):
186 self.harness = harness.select(which, self)
apw059e1b12006-10-12 17:18:26 +0000187
188
jadmanski0afbb632008-06-06 21:10:57 +0000189 def config_set(self, name, value):
190 self.config.set(name, value)
apw059e1b12006-10-12 17:18:26 +0000191
mbligh1e8858e2006-11-24 22:18:35 +0000192
jadmanski0afbb632008-06-06 21:10:57 +0000193 def config_get(self, name):
194 return self.config.get(name)
mbligh72b88fc2006-12-16 18:41:35 +0000195
jadmanski0afbb632008-06-06 21:10:57 +0000196 def setup_dirs(self, results_dir, tmp_dir):
197 if not tmp_dir:
198 tmp_dir = os.path.join(self.tmpdir, 'build')
199 if not os.path.exists(tmp_dir):
200 os.mkdir(tmp_dir)
201 if not os.path.isdir(tmp_dir):
202 e_msg = "Temp dir (%s) is not a dir - args backwards?" % self.tmpdir
203 raise ValueError(e_msg)
mbligh8baa2ea2006-12-17 23:01:24 +0000204
jadmanski0afbb632008-06-06 21:10:57 +0000205 # We label the first build "build" and then subsequent ones
206 # as "build.2", "build.3", etc. Whilst this is a little bit
207 # inconsistent, 99.9% of jobs will only have one build
208 # (that's not done as kernbench, sparse, or buildtest),
209 # so it works out much cleaner. One of life's comprimises.
210 if not results_dir:
211 results_dir = os.path.join(self.resultdir, 'build')
212 i = 2
213 while os.path.exists(results_dir):
214 results_dir = os.path.join(self.resultdir, 'build.%d' % i)
215 i += 1
216 if not os.path.exists(results_dir):
217 os.mkdir(results_dir)
mbligh8baa2ea2006-12-17 23:01:24 +0000218
jadmanski0afbb632008-06-06 21:10:57 +0000219 return (results_dir, tmp_dir)
mbligh8baa2ea2006-12-17 23:01:24 +0000220
221
jadmanski0afbb632008-06-06 21:10:57 +0000222 def xen(self, base_tree, results_dir = '', tmp_dir = '', leave = False, \
223 kjob = None ):
224 """Summon a xen object"""
225 (results_dir, tmp_dir) = self.setup_dirs(results_dir, tmp_dir)
226 build_dir = 'xen'
227 return xen.xen(self, base_tree, results_dir, tmp_dir, build_dir, leave, kjob)
mblighf4c35322006-03-13 01:01:10 +0000228
mblighcaa605c2006-10-02 00:37:35 +0000229
jadmanski0afbb632008-06-06 21:10:57 +0000230 def kernel(self, base_tree, results_dir = '', tmp_dir = '', leave = False):
231 """Summon a kernel object"""
232 (results_dir, tmp_dir) = self.setup_dirs(results_dir, tmp_dir)
233 build_dir = 'linux'
mbligh15440802008-06-06 22:48:19 +0000234 return kernel.auto_kernel(self, base_tree, results_dir, tmp_dir,
235 build_dir, leave)
mblighfadca202006-09-23 04:40:01 +0000236
mblighcaa605c2006-10-02 00:37:35 +0000237
jadmanski0afbb632008-06-06 21:10:57 +0000238 def barrier(self, *args, **kwds):
239 """Create a barrier object"""
240 return barrier.barrier(*args, **kwds)
mbligh4b089662006-06-14 22:34:58 +0000241
242
jadmanski0afbb632008-06-06 21:10:57 +0000243 def setup_dep(self, deps):
244 """Set up the dependencies for this test.
apwf1a81162006-04-25 10:10:29 +0000245
jadmanski0afbb632008-06-06 21:10:57 +0000246 deps is a list of libraries required for this test.
247 """
248 for dep in deps:
249 try:
250 os.chdir(os.path.join(self.autodir, 'deps', dep))
251 utils.system('./' + dep + '.py')
252 except:
253 err = "setting up dependency " + dep + "\n"
254 raise error.UnhandledError(err)
mblighcaa605c2006-10-02 00:37:35 +0000255
mbligh12a7df72006-10-06 03:54:33 +0000256
mbligh6e83b6f2008-06-10 16:26:59 +0000257 def _runtest(self, url, tag, args, dargs):
jadmanski0afbb632008-06-06 21:10:57 +0000258 try:
259 l = lambda : test.runtest(self, url, tag, args, dargs)
260 pid = parallel.fork_start(self.resultdir, l)
261 parallel.fork_waitfor(self.resultdir, pid)
262 except error.AutotestError:
263 raise
264 except Exception, e:
265 msg = "Unhandled %s error occured during test\n"
266 msg %= str(e.__class__.__name__)
267 raise error.UnhandledError(msg)
mbligh7dd510c2007-11-13 17:11:22 +0000268
mbligh65938a22007-12-10 16:58:52 +0000269
jadmanski0afbb632008-06-06 21:10:57 +0000270 def run_test(self, url, *args, **dargs):
271 """Summon a test object and run it.
jadmanski8415f962008-05-06 20:38:53 +0000272
jadmanski0afbb632008-06-06 21:10:57 +0000273 tag
274 tag to add to testname
275 url
276 url of the test to run
277 """
mbligh7dd510c2007-11-13 17:11:22 +0000278
jadmanski0afbb632008-06-06 21:10:57 +0000279 if not url:
280 raise TypeError("Test name is invalid. "
281 "Switched arguments?")
282 (group, testname) = test.testname(url)
283 namelen = len(testname)
284 dargs = dargs.copy()
285 tntag = dargs.pop('tag', None)
286 if tntag: # testname tag is included in reported test name
287 testname += '.' + tntag
288 subdir = testname
289 sdtag = dargs.pop('subdir_tag', None)
290 if sdtag: # subdir-only tag is not included in reports
291 subdir = subdir + '.' + sdtag
292 tag = subdir[namelen+1:] # '' if none
mbligh7dd510c2007-11-13 17:11:22 +0000293
jadmanski0afbb632008-06-06 21:10:57 +0000294 outputdir = os.path.join(self.resultdir, subdir)
295 if os.path.exists(outputdir):
296 msg = ("%s already exists, test <%s> may have"
297 " already run with tag <%s>"
298 % (outputdir, testname, tag) )
299 raise error.TestError(msg)
300 os.mkdir(outputdir)
mbligh7dd510c2007-11-13 17:11:22 +0000301
jadmanski0afbb632008-06-06 21:10:57 +0000302 container = dargs.pop('container', None)
303 if container:
304 cname = container.get('name', None)
305 if not cname: # get old name
306 cname = container.get('container_name', None)
307 mbytes = container.get('mbytes', None)
308 if not mbytes: # get old name
309 mbytes = container.get('mem', None)
310 cpus = container.get('cpus', None)
311 if not cpus: # get old name
312 cpus = container.get('cpu', None)
313 root = container.get('root', None)
314 self.new_container(mbytes=mbytes, cpus=cpus,
315 root=root, name=cname)
316 # We are running in a container now...
mbligh7dd510c2007-11-13 17:11:22 +0000317
jadmanski0afbb632008-06-06 21:10:57 +0000318 def log_warning(reason):
319 self.record("WARN", subdir, testname, reason)
mbligh15440802008-06-06 22:48:19 +0000320 @disk_usage_monitor.watch(log_warning, "/", self.max_disk_usage_rate)
jadmanski0afbb632008-06-06 21:10:57 +0000321 def group_func():
322 try:
mbligh6e83b6f2008-06-10 16:26:59 +0000323 self._runtest(url, tag, args, dargs)
jadmanski0afbb632008-06-06 21:10:57 +0000324 except error.TestNAError, detail:
325 self.record('TEST_NA', subdir, testname,
326 str(detail))
327 raise
328 except Exception, detail:
329 self.record('FAIL', subdir, testname,
330 str(detail))
331 raise
332 else:
333 self.record('GOOD', subdir, testname,
334 'completed successfully')
mbligh7dd510c2007-11-13 17:11:22 +0000335
mbligh6e83b6f2008-06-10 16:26:59 +0000336 result, exc_info = self._rungroup(subdir, testname, group_func)
jadmanski0afbb632008-06-06 21:10:57 +0000337 if container:
338 self.release_container()
339 if exc_info and isinstance(exc_info[1], error.TestError):
340 return False
341 elif exc_info:
342 raise exc_info[0], exc_info[1], exc_info[2]
343 else:
344 return True
apw0865f482006-03-30 18:50:19 +0000345
mblighd7fb4a62006-10-01 00:57:53 +0000346
mbligh6e83b6f2008-06-10 16:26:59 +0000347 def _rungroup(self, subdir, testname, function, *args, **dargs):
jadmanski0afbb632008-06-06 21:10:57 +0000348 """\
349 subdir:
350 name of the group
351 testname:
352 name of the test to run, or support step
353 function:
354 subroutine to run
355 *args:
356 arguments for the function
mbligh88ab90f2007-08-29 15:52:49 +0000357
jadmanski0afbb632008-06-06 21:10:57 +0000358 Returns a 2-tuple (result, exc_info) where result
359 is the return value of function, and exc_info is
360 the sys.exc_info() of the exception thrown by the
361 function (which may be None).
362 """
apw1da244b2007-09-27 17:18:01 +0000363
jadmanski0afbb632008-06-06 21:10:57 +0000364 result, exc_info = None, None
365 try:
366 self.record('START', subdir, testname)
367 self._increment_group_level()
368 result = function(*args, **dargs)
369 self._decrement_group_level()
370 self.record('END GOOD', subdir, testname)
371 except error.TestNAError, e:
372 self._decrement_group_level()
373 self.record('END TEST_NA', subdir, testname, str(e))
374 except Exception, e:
375 exc_info = sys.exc_info()
376 self._decrement_group_level()
377 err_msg = str(e) + '\n' + traceback.format_exc()
378 self.record('END FAIL', subdir, testname, err_msg)
mblighd660afe2008-06-05 22:17:53 +0000379
jadmanski0afbb632008-06-06 21:10:57 +0000380 return result, exc_info
apw1da244b2007-09-27 17:18:01 +0000381
mbligh88ab90f2007-08-29 15:52:49 +0000382
jadmanski0afbb632008-06-06 21:10:57 +0000383 def run_group(self, function, *args, **dargs):
384 """\
385 function:
386 subroutine to run
387 *args:
388 arguments for the function
389 """
apw08403ca2007-09-27 17:17:22 +0000390
jadmanski0afbb632008-06-06 21:10:57 +0000391 # Allow the tag for the group to be specified
392 name = function.__name__
393 tag = dargs.pop('tag', None)
394 if tag:
395 name = tag
mbligh88ab90f2007-08-29 15:52:49 +0000396
jadmanski0afbb632008-06-06 21:10:57 +0000397 outputdir = os.path.join(self.resultdir, name)
398 if os.path.exists(outputdir):
399 msg = ("%s already exists, test <%s> may have"
400 " already run with tag <%s>"
401 % (outputdir, name, name) )
402 raise error.TestError(msg)
403 os.mkdir(outputdir)
404
mbligh15440802008-06-06 22:48:19 +0000405 result, exc_info = self.__rungroup(name, name, function, *args, **dargs)
jadmanski0afbb632008-06-06 21:10:57 +0000406
407 # if there was a non-TestError exception, raise it
408 if exc_info and not isinstance(exc_info[1], error.TestError):
409 err = ''.join(traceback.format_exception(*exc_info))
410 raise error.TestError(name + ' failed\n' + err)
411
412 # pass back the actual return value from the function
413 return result
414
415
416 def new_container(self, mbytes=None, cpus=None, root=None, name=None):
417 if not autotest_utils.grep('cpuset', '/proc/filesystems'):
418 print "Containers not enabled by latest reboot"
419 return # containers weren't enabled in this kernel boot
420 pid = os.getpid()
421 if not name:
422 name = 'test%d' % pid # make arbitrary unique name
mbligh15440802008-06-06 22:48:19 +0000423 self.container = cpuset.cpuset(name, job_size=mbytes, job_pid=pid,
424 cpus=cpus, root=root)
jadmanski0afbb632008-06-06 21:10:57 +0000425 # This job's python shell is now running in the new container
426 # and all forked test processes will inherit that container
427
428
429 def release_container(self):
430 if self.container:
431 self.container.release()
432 self.container = None
433
mbligh68119582008-01-25 18:16:41 +0000434
jadmanski0afbb632008-06-06 21:10:57 +0000435 def cpu_count(self):
436 if self.container:
437 return len(self.container.cpus)
438 return autotest_utils.count_cpus() # use total system count
mbligh68119582008-01-25 18:16:41 +0000439
mbligh68119582008-01-25 18:16:41 +0000440
jadmanski0afbb632008-06-06 21:10:57 +0000441 # Check the passed kernel identifier against the command line
442 # and the running kernel, abort the job on missmatch.
443 def kernel_check_ident(self, expected_when, expected_id, subdir,
444 type = 'src', patches=[]):
445 print (("POST BOOT: checking booted kernel " +
446 "mark=%d identity='%s' type='%s'") %
447 (expected_when, expected_id, type))
mbligh68119582008-01-25 18:16:41 +0000448
jadmanski0afbb632008-06-06 21:10:57 +0000449 running_id = autotest_utils.running_os_ident()
mbligh68119582008-01-25 18:16:41 +0000450
jadmanski0afbb632008-06-06 21:10:57 +0000451 cmdline = utils.read_one_line("/proc/cmdline")
mbligh68119582008-01-25 18:16:41 +0000452
jadmanski0afbb632008-06-06 21:10:57 +0000453 find_sum = re.compile(r'.*IDENT=(\d+)')
454 m = find_sum.match(cmdline)
455 cmdline_when = -1
456 if m:
457 cmdline_when = int(m.groups()[0])
apwce73d892007-09-25 16:53:05 +0000458
jadmanski0afbb632008-06-06 21:10:57 +0000459 # We have all the facts, see if they indicate we
460 # booted the requested kernel or not.
461 bad = False
462 if (type == 'src' and expected_id != running_id or
463 type == 'rpm' and
464 not running_id.startswith(expected_id + '::')):
465 print "check_kernel_ident: kernel identifier mismatch"
466 bad = True
467 if expected_when != cmdline_when:
468 print "check_kernel_ident: kernel command line mismatch"
469 bad = True
apwce73d892007-09-25 16:53:05 +0000470
jadmanski0afbb632008-06-06 21:10:57 +0000471 if bad:
472 print " Expected Ident: " + expected_id
473 print " Running Ident: " + running_id
474 print " Expected Mark: %d" % (expected_when)
475 print "Command Line Mark: %d" % (cmdline_when)
476 print " Command Line: " + cmdline
apwce73d892007-09-25 16:53:05 +0000477
jadmanski0afbb632008-06-06 21:10:57 +0000478 raise error.JobError("boot failure", "reboot.verify")
apwce73d892007-09-25 16:53:05 +0000479
jadmanski0afbb632008-06-06 21:10:57 +0000480 kernel_info = {'kernel': expected_id}
481 for i, patch in enumerate(patches):
482 kernel_info["patch%d" % i] = patch
483 self.record('GOOD', subdir, 'reboot.verify', expected_id)
484 self._decrement_group_level()
485 self.record('END GOOD', subdir, 'reboot',
486 optional_fields=kernel_info)
apwce73d892007-09-25 16:53:05 +0000487
apwce73d892007-09-25 16:53:05 +0000488
jadmanski0afbb632008-06-06 21:10:57 +0000489 def filesystem(self, device, mountpoint = None, loop_size = 0):
490 if not mountpoint:
491 mountpoint = self.tmpdir
492 return filesystem.filesystem(self, device, mountpoint,loop_size)
apwce73d892007-09-25 16:53:05 +0000493
apwce73d892007-09-25 16:53:05 +0000494
jadmanski0afbb632008-06-06 21:10:57 +0000495 def enable_external_logging(self):
496 pass
apwce73d892007-09-25 16:53:05 +0000497
mblighd7fb4a62006-10-01 00:57:53 +0000498
jadmanski0afbb632008-06-06 21:10:57 +0000499 def disable_external_logging(self):
500 pass
mblighcaa62c22008-04-07 21:51:17 +0000501
502
jadmanski0afbb632008-06-06 21:10:57 +0000503 def reboot_setup(self):
504 pass
mblighcaa62c22008-04-07 21:51:17 +0000505
mblighcaa62c22008-04-07 21:51:17 +0000506
jadmanski0afbb632008-06-06 21:10:57 +0000507 def reboot(self, tag='autotest'):
508 self.reboot_setup()
509 self.record('START', None, 'reboot')
510 self._increment_group_level()
511 self.record('GOOD', None, 'reboot.start')
512 self.harness.run_reboot()
513 default = self.config_get('boot.set_default')
514 if default:
515 self.bootloader.set_default(tag)
516 else:
517 self.bootloader.boot_once(tag)
518 cmd = "(sleep 5; reboot) </dev/null >/dev/null 2>&1 &"
519 utils.system(cmd)
520 self.quit()
mblighcaa605c2006-10-02 00:37:35 +0000521
mblighcaa605c2006-10-02 00:37:35 +0000522
jadmanski0afbb632008-06-06 21:10:57 +0000523 def noop(self, text):
524 print "job: noop: " + text
mblighcaa605c2006-10-02 00:37:35 +0000525
apw0865f482006-03-30 18:50:19 +0000526
jadmanski0afbb632008-06-06 21:10:57 +0000527 def parallel(self, *tasklist):
528 """Run tasks in parallel"""
mblighcaa605c2006-10-02 00:37:35 +0000529
jadmanski0afbb632008-06-06 21:10:57 +0000530 pids = []
531 old_log_filename = self.log_filename
532 for i, task in enumerate(tasklist):
533 self.log_filename = old_log_filename + (".%d" % i)
534 task_func = lambda: task[0](*task[1:])
mbligh15440802008-06-06 22:48:19 +0000535 pids.append(parallel.fork_start(self.resultdir, task_func))
apw8fef4ac2006-10-10 22:53:37 +0000536
jadmanski0afbb632008-06-06 21:10:57 +0000537 old_log_path = os.path.join(self.resultdir, old_log_filename)
538 old_log = open(old_log_path, "a")
539 exceptions = []
540 for i, pid in enumerate(pids):
541 # wait for the task to finish
542 try:
543 parallel.fork_waitfor(self.resultdir, pid)
544 except Exception, e:
545 exceptions.append(e)
546 # copy the logs from the subtask into the main log
547 new_log_path = old_log_path + (".%d" % i)
548 if os.path.exists(new_log_path):
549 new_log = open(new_log_path)
550 old_log.write(new_log.read())
551 new_log.close()
552 old_log.flush()
553 os.remove(new_log_path)
554 old_log.close()
mblighd528d302007-12-19 16:19:05 +0000555
jadmanski0afbb632008-06-06 21:10:57 +0000556 self.log_filename = old_log_filename
mblighd528d302007-12-19 16:19:05 +0000557
jadmanski0afbb632008-06-06 21:10:57 +0000558 # handle any exceptions raised by the parallel tasks
559 if exceptions:
560 msg = "%d task(s) failed" % len(exceptions)
561 raise error.JobError(msg, str(exceptions), exceptions)
apw0865f482006-03-30 18:50:19 +0000562
mblighd509b712008-01-14 17:41:25 +0000563
jadmanski0afbb632008-06-06 21:10:57 +0000564 def quit(self):
565 # XXX: should have a better name.
566 self.harness.run_pause()
567 raise error.JobContinue("more to come")
mblighcaa605c2006-10-02 00:37:35 +0000568
apw0865f482006-03-30 18:50:19 +0000569
jadmanski0afbb632008-06-06 21:10:57 +0000570 def complete(self, status):
571 """Clean up and exit"""
572 # We are about to exit 'complete' so clean up the control file.
573 try:
574 os.unlink(self.state_file)
575 except:
576 pass
mblighcaa605c2006-10-02 00:37:35 +0000577
jadmanski0afbb632008-06-06 21:10:57 +0000578 self.harness.run_complete()
579 self.disable_external_logging()
580 sys.exit(status)
mblighc0b10d32008-03-03 16:03:28 +0000581
apw0865f482006-03-30 18:50:19 +0000582
jadmanski0afbb632008-06-06 21:10:57 +0000583 def set_state(self, var, val):
584 # Deep copies make sure that the state can't be altered
585 # without it being re-written. Perf wise, deep copies
586 # are overshadowed by pickling/loading.
587 self.state[var] = copy.deepcopy(val)
588 pickle.dump(self.state, open(self.state_file, 'w'))
mblighcaa605c2006-10-02 00:37:35 +0000589
mbligh366ff1b2008-04-25 16:07:56 +0000590
mbligh6e83b6f2008-06-10 16:26:59 +0000591 def _load_state(self):
jadmanski0afbb632008-06-06 21:10:57 +0000592 assert not hasattr(self, "state")
593 try:
594 self.state = pickle.load(open(self.state_file, 'r'))
595 self.state_existed = True
596 except Exception:
597 print "Initializing the state engine."
598 self.state = {}
599 self.set_state('__steps', []) # writes pickle file
600 self.state_existed = False
mbligh366ff1b2008-04-25 16:07:56 +0000601
mbligh366ff1b2008-04-25 16:07:56 +0000602
jadmanski0afbb632008-06-06 21:10:57 +0000603 def get_state(self, var, default=None):
604 if var in self.state or default == None:
605 val = self.state[var]
606 else:
607 val = default
608 return copy.deepcopy(val)
mbligh366ff1b2008-04-25 16:07:56 +0000609
mbligh366ff1b2008-04-25 16:07:56 +0000610
jadmanski0afbb632008-06-06 21:10:57 +0000611 def __create_step_tuple(self, fn, args, dargs):
612 # Legacy code passes in an array where the first arg is
613 # the function or its name.
614 if isinstance(fn, list):
615 assert(len(args) == 0)
616 assert(len(dargs) == 0)
617 args = fn[1:]
618 fn = fn[0]
619 # Pickling actual functions is harry, thus we have to call
620 # them by name. Unfortunately, this means only functions
621 # defined globally can be used as a next step.
622 if callable(fn):
623 fn = fn.__name__
624 if not isinstance(fn, types.StringTypes):
625 raise StepError("Next steps must be functions or "
626 "strings containing the function name")
627 ancestry = copy.copy(self.current_step_ancestry)
628 return (ancestry, fn, args, dargs)
mbligh366ff1b2008-04-25 16:07:56 +0000629
mbligh12a04cb2008-04-25 16:07:20 +0000630
jadmanski0afbb632008-06-06 21:10:57 +0000631 def next_step_append(self, fn, *args, **dargs):
632 """Define the next step and place it at the end"""
633 steps = self.get_state('__steps')
634 steps.append(self.__create_step_tuple(fn, args, dargs))
635 self.set_state('__steps', steps)
mbligh12a04cb2008-04-25 16:07:20 +0000636
apw0865f482006-03-30 18:50:19 +0000637
jadmanski0afbb632008-06-06 21:10:57 +0000638 def next_step(self, fn, *args, **dargs):
639 """Create a new step and place it after any steps added
640 while running the current step but before any steps added in
641 previous steps"""
642 steps = self.get_state('__steps')
643 steps.insert(self.next_step_index,
644 self.__create_step_tuple(fn, args, dargs))
645 self.next_step_index += 1
646 self.set_state('__steps', steps)
mblighcaa605c2006-10-02 00:37:35 +0000647
mbligh8f4d0432008-06-02 19:42:50 +0000648
jadmanski0afbb632008-06-06 21:10:57 +0000649 def next_step_prepend(self, fn, *args, **dargs):
650 """Insert a new step, executing first"""
651 steps = self.get_state('__steps')
652 steps.insert(0, self.__create_step_tuple(fn, args, dargs))
653 self.next_step_index += 1
654 self.set_state('__steps', steps)
mbligh8f4d0432008-06-02 19:42:50 +0000655
mbligh237bed32007-09-05 13:05:57 +0000656
jadmanski0afbb632008-06-06 21:10:57 +0000657 def _run_step_fn(self, local_vars, fn, args, dargs):
658 """Run a (step) function within the given context"""
mbligh237bed32007-09-05 13:05:57 +0000659
jadmanski0afbb632008-06-06 21:10:57 +0000660 local_vars['__args'] = args
661 local_vars['__dargs'] = dargs
mbligh15440802008-06-06 22:48:19 +0000662 exec('__ret = %s(*__args, **__dargs)' % fn, local_vars, local_vars)
jadmanski0afbb632008-06-06 21:10:57 +0000663 return local_vars['__ret']
mblighb274ef52008-06-02 19:40:01 +0000664
mblighb274ef52008-06-02 19:40:01 +0000665
jadmanski0afbb632008-06-06 21:10:57 +0000666 def _create_frame(self, global_vars, ancestry, fn_name):
667 """Set up the environment like it would have been when this
668 function was first defined.
mblighb274ef52008-06-02 19:40:01 +0000669
jadmanski0afbb632008-06-06 21:10:57 +0000670 Child step engine 'implementations' must have 'return locals()'
671 at end end of their steps. Because of this, we can call the
672 parent function and get back all child functions (i.e. those
673 defined within it).
mblighb274ef52008-06-02 19:40:01 +0000674
jadmanski0afbb632008-06-06 21:10:57 +0000675 Unfortunately, the call stack of the function calling
676 job.next_step might have been deeper than the function it
677 added. In order to make sure that the environment is what it
678 should be, we need to then pop off the frames we built until
679 we find the frame where the function was first defined."""
mblighb274ef52008-06-02 19:40:01 +0000680
jadmanski0afbb632008-06-06 21:10:57 +0000681 # The copies ensure that the parent frames are not modified
682 # while building child frames. This matters if we then
683 # pop some frames in the next part of this function.
684 current_frame = copy.copy(global_vars)
685 frames = [current_frame]
686 for steps_fn_name in ancestry:
mbligh15440802008-06-06 22:48:19 +0000687 ret = self._run_step_fn(current_frame, steps_fn_name, [], {})
jadmanski0afbb632008-06-06 21:10:57 +0000688 current_frame = copy.copy(ret)
689 frames.append(current_frame)
mblighb274ef52008-06-02 19:40:01 +0000690
jadmanski0afbb632008-06-06 21:10:57 +0000691 while len(frames) > 2:
692 if fn_name not in frames[-2]:
693 break
694 if frames[-2][fn_name] != frames[-1][fn_name]:
695 break
696 frames.pop()
697 ancestry.pop()
mblighb274ef52008-06-02 19:40:01 +0000698
jadmanski0afbb632008-06-06 21:10:57 +0000699 return (frames[-1], ancestry)
mblighb274ef52008-06-02 19:40:01 +0000700
mblighb274ef52008-06-02 19:40:01 +0000701
jadmanski0afbb632008-06-06 21:10:57 +0000702 def _add_step_init(self, local_vars, current_function):
703 """If the function returned a dictionary that includes a
704 function named 'step_init', prepend it to our list of steps.
705 This will only get run the first time a function with a nested
706 use of the step engine is run."""
mblighb274ef52008-06-02 19:40:01 +0000707
jadmanski0afbb632008-06-06 21:10:57 +0000708 if (isinstance(local_vars, dict) and
709 'step_init' in local_vars and
710 callable(local_vars['step_init'])):
711 # The init step is a child of the function
712 # we were just running.
713 self.current_step_ancestry.append(current_function)
714 self.next_step_prepend('step_init')
mblighb274ef52008-06-02 19:40:01 +0000715
mblighb274ef52008-06-02 19:40:01 +0000716
jadmanski0afbb632008-06-06 21:10:57 +0000717 def step_engine(self):
718 """the stepping engine -- if the control file defines
719 step_init we will be using this engine to drive multiple runs.
720 """
721 """Do the next step"""
mblighb274ef52008-06-02 19:40:01 +0000722
jadmanski0afbb632008-06-06 21:10:57 +0000723 # Set up the environment and then interpret the control file.
724 # Some control files will have code outside of functions,
725 # which means we need to have our state engine initialized
726 # before reading in the file.
727 global_control_vars = {'job': self}
728 exec(JOB_PREAMBLE, global_control_vars, global_control_vars)
729 execfile(self.control, global_control_vars, global_control_vars)
apw83f8d772006-04-27 14:12:56 +0000730
jadmanski0afbb632008-06-06 21:10:57 +0000731 # If we loaded in a mid-job state file, then we presumably
732 # know what steps we have yet to run.
733 if not self.state_existed:
734 if global_control_vars.has_key('step_init'):
735 self.next_step(global_control_vars['step_init'])
apw83f8d772006-04-27 14:12:56 +0000736
jadmanski0afbb632008-06-06 21:10:57 +0000737 # Iterate through the steps. If we reboot, we'll simply
738 # continue iterating on the next step.
739 while len(self.get_state('__steps')) > 0:
740 steps = self.get_state('__steps')
741 (ancestry, fn_name, args, dargs) = steps.pop(0)
742 self.set_state('__steps', steps)
apw0865f482006-03-30 18:50:19 +0000743
jadmanski0afbb632008-06-06 21:10:57 +0000744 self.next_step_index = 0
mbligh15440802008-06-06 22:48:19 +0000745 ret = self._create_frame(global_control_vars, ancestry, fn_name)
jadmanski0afbb632008-06-06 21:10:57 +0000746 local_vars, self.current_step_ancestry = ret
mbligh15440802008-06-06 22:48:19 +0000747 local_vars = self._run_step_fn(local_vars, fn_name, args, dargs)
jadmanski0afbb632008-06-06 21:10:57 +0000748 self._add_step_init(local_vars, fn_name)
apw0865f482006-03-30 18:50:19 +0000749
apw0865f482006-03-30 18:50:19 +0000750
jadmanski0afbb632008-06-06 21:10:57 +0000751 def _init_group_level(self):
752 self.group_level = self.get_state("__group_level", default=0)
mblighcaa605c2006-10-02 00:37:35 +0000753
jadmanskia9c75c42008-05-01 22:05:31 +0000754
jadmanski0afbb632008-06-06 21:10:57 +0000755 def _increment_group_level(self):
756 self.group_level += 1
757 self.set_state("__group_level", self.group_level)
jadmanskia9c75c42008-05-01 22:05:31 +0000758
jadmanskia9c75c42008-05-01 22:05:31 +0000759
jadmanski0afbb632008-06-06 21:10:57 +0000760 def _decrement_group_level(self):
761 self.group_level -= 1
762 self.set_state("__group_level", self.group_level)
jadmanskia9c75c42008-05-01 22:05:31 +0000763
jadmanskia9c75c42008-05-01 22:05:31 +0000764
jadmanski0afbb632008-06-06 21:10:57 +0000765 def record(self, status_code, subdir, operation, status = '',
766 optional_fields=None):
767 """
768 Record job-level status
jadmanskia9c75c42008-05-01 22:05:31 +0000769
jadmanski0afbb632008-06-06 21:10:57 +0000770 The intent is to make this file both machine parseable and
771 human readable. That involves a little more complexity, but
772 really isn't all that bad ;-)
apw7db8d0b2006-10-09 08:10:25 +0000773
jadmanski0afbb632008-06-06 21:10:57 +0000774 Format is <status code>\t<subdir>\t<operation>\t<status>
mbligh09f288a2007-09-18 21:34:57 +0000775
jadmanski0afbb632008-06-06 21:10:57 +0000776 status code: (GOOD|WARN|FAIL|ABORT)
777 or START
778 or END (GOOD|WARN|FAIL|ABORT)
mbligh09f288a2007-09-18 21:34:57 +0000779
jadmanski0afbb632008-06-06 21:10:57 +0000780 subdir: MUST be a relevant subdirectory in the results,
781 or None, which will be represented as '----'
mbligh09f288a2007-09-18 21:34:57 +0000782
jadmanski0afbb632008-06-06 21:10:57 +0000783 operation: description of what you ran (e.g. "dbench", or
784 "mkfs -t foobar /dev/sda9")
mbligh09f288a2007-09-18 21:34:57 +0000785
jadmanski0afbb632008-06-06 21:10:57 +0000786 status: error message or "completed sucessfully"
mbligh09f288a2007-09-18 21:34:57 +0000787
jadmanski0afbb632008-06-06 21:10:57 +0000788 ------------------------------------------------------------
mbligh09f288a2007-09-18 21:34:57 +0000789
jadmanski0afbb632008-06-06 21:10:57 +0000790 Initial tabs indicate indent levels for grouping, and is
791 governed by self.group_level
mbligh09f288a2007-09-18 21:34:57 +0000792
jadmanski0afbb632008-06-06 21:10:57 +0000793 multiline messages have secondary lines prefaced by a double
794 space (' ')
795 """
mbligh09f288a2007-09-18 21:34:57 +0000796
jadmanski0afbb632008-06-06 21:10:57 +0000797 if subdir:
798 if re.match(r'[\n\t]', subdir):
mbligh15440802008-06-06 22:48:19 +0000799 raise ValueError("Invalid character in subdir string")
jadmanski0afbb632008-06-06 21:10:57 +0000800 substr = subdir
801 else:
802 substr = '----'
mbligh09f288a2007-09-18 21:34:57 +0000803
jadmanski0afbb632008-06-06 21:10:57 +0000804 if not logging.is_valid_status(status_code):
mbligh15440802008-06-06 22:48:19 +0000805 raise ValueError("Invalid status code supplied: %s" % status_code)
jadmanski0afbb632008-06-06 21:10:57 +0000806 if not operation:
807 operation = '----'
jadmanskia9c75c42008-05-01 22:05:31 +0000808
jadmanski0afbb632008-06-06 21:10:57 +0000809 if re.match(r'[\n\t]', operation):
mbligh15440802008-06-06 22:48:19 +0000810 raise ValueError("Invalid character in operation string")
jadmanski0afbb632008-06-06 21:10:57 +0000811 operation = operation.rstrip()
jadmanskia9c75c42008-05-01 22:05:31 +0000812
jadmanski0afbb632008-06-06 21:10:57 +0000813 if not optional_fields:
814 optional_fields = {}
jadmanskia9c75c42008-05-01 22:05:31 +0000815
jadmanski0afbb632008-06-06 21:10:57 +0000816 status = status.rstrip()
817 status = re.sub(r"\t", " ", status)
818 # Ensure any continuation lines are marked so we can
819 # detect them in the status file to ensure it is parsable.
mbligh15440802008-06-06 22:48:19 +0000820 status = re.sub(r"\n", "\n" + "\t" * self.group_level + " ", status)
mbligh09f288a2007-09-18 21:34:57 +0000821
jadmanski0afbb632008-06-06 21:10:57 +0000822 # Generate timestamps for inclusion in the logs
823 epoch_time = int(time.time()) # seconds since epoch, in UTC
824 local_time = time.localtime(epoch_time)
825 optional_fields["timestamp"] = str(epoch_time)
826 optional_fields["localtime"] = time.strftime("%b %d %H:%M:%S",
827 local_time)
mbligh30270302007-11-05 20:33:52 +0000828
jadmanski0afbb632008-06-06 21:10:57 +0000829 fields = [status_code, substr, operation]
830 fields += ["%s=%s" % x for x in optional_fields.iteritems()]
831 fields.append(status)
jadmanskia9c75c42008-05-01 22:05:31 +0000832
jadmanski0afbb632008-06-06 21:10:57 +0000833 msg = '\t'.join(str(x) for x in fields)
834 msg = '\t' * self.group_level + msg
apw7db8d0b2006-10-09 08:10:25 +0000835
jadmanski0afbb632008-06-06 21:10:57 +0000836 msg_tag = ""
837 if "." in self.log_filename:
838 msg_tag = self.log_filename.split(".", 1)[1]
mblighd528d302007-12-19 16:19:05 +0000839
mbligh15440802008-06-06 22:48:19 +0000840 self.harness.test_status_detail(status_code, substr, operation, status,
841 msg_tag)
jadmanski0afbb632008-06-06 21:10:57 +0000842 self.harness.test_status(msg, msg_tag)
mblighd528d302007-12-19 16:19:05 +0000843
jadmanski0afbb632008-06-06 21:10:57 +0000844 # log to stdout (if enabled)
845 #if self.log_filename == self.DEFAULT_LOG_FILENAME:
846 print msg
mblighd528d302007-12-19 16:19:05 +0000847
jadmanski0afbb632008-06-06 21:10:57 +0000848 # log to the "root" status log
849 status_file = os.path.join(self.resultdir, self.log_filename)
850 open(status_file, "a").write(msg + "\n")
mblighd528d302007-12-19 16:19:05 +0000851
jadmanski0afbb632008-06-06 21:10:57 +0000852 # log to the subdir status log (if subdir is set)
853 if subdir:
854 dir = os.path.join(self.resultdir, subdir)
mbligh15440802008-06-06 22:48:19 +0000855 status_file = os.path.join(dir, self.DEFAULT_LOG_FILENAME)
jadmanski0afbb632008-06-06 21:10:57 +0000856 open(status_file, "a").write(msg + "\n")
apwce9abe92006-04-27 14:14:04 +0000857
858
jadmanski8415f962008-05-06 20:38:53 +0000859class disk_usage_monitor:
jadmanski0afbb632008-06-06 21:10:57 +0000860 def __init__(self, logging_func, device, max_mb_per_hour):
861 self.func = logging_func
862 self.device = device
863 self.max_mb_per_hour = max_mb_per_hour
jadmanski8415f962008-05-06 20:38:53 +0000864
865
jadmanski0afbb632008-06-06 21:10:57 +0000866 def start(self):
867 self.initial_space = autotest_utils.freespace(self.device)
868 self.start_time = time.time()
jadmanski8415f962008-05-06 20:38:53 +0000869
870
jadmanski0afbb632008-06-06 21:10:57 +0000871 def stop(self):
872 # if no maximum usage rate was set, we don't need to
873 # generate any warnings
874 if not self.max_mb_per_hour:
875 return
jadmanski8415f962008-05-06 20:38:53 +0000876
jadmanski0afbb632008-06-06 21:10:57 +0000877 final_space = autotest_utils.freespace(self.device)
878 used_space = self.initial_space - final_space
879 stop_time = time.time()
880 total_time = stop_time - self.start_time
881 # round up the time to one minute, to keep extremely short
882 # tests from generating false positives due to short, badly
883 # timed bursts of activity
884 total_time = max(total_time, 60.0)
jadmanski8415f962008-05-06 20:38:53 +0000885
jadmanski0afbb632008-06-06 21:10:57 +0000886 # determine the usage rate
887 bytes_per_sec = used_space / total_time
888 mb_per_sec = bytes_per_sec / 1024**2
889 mb_per_hour = mb_per_sec * 60 * 60
jadmanski8415f962008-05-06 20:38:53 +0000890
jadmanski0afbb632008-06-06 21:10:57 +0000891 if mb_per_hour > self.max_mb_per_hour:
mbligh15440802008-06-06 22:48:19 +0000892 msg = ("disk space on %s was consumed at a rate of %.2f MB/hour")
jadmanski0afbb632008-06-06 21:10:57 +0000893 msg %= (self.device, mb_per_hour)
894 self.func(msg)
jadmanski8415f962008-05-06 20:38:53 +0000895
896
jadmanski0afbb632008-06-06 21:10:57 +0000897 @classmethod
898 def watch(cls, *monitor_args, **monitor_dargs):
899 """ Generic decorator to wrap a function call with the
900 standard create-monitor -> start -> call -> stop idiom."""
901 def decorator(func):
902 def watched_func(*args, **dargs):
903 monitor = cls(*monitor_args, **monitor_dargs)
904 monitor.start()
905 try:
906 func(*args, **dargs)
907 finally:
908 monitor.stop()
909 return watched_func
910 return decorator
jadmanski8415f962008-05-06 20:38:53 +0000911
912
mblighcaa62c22008-04-07 21:51:17 +0000913def runjob(control, cont = False, tag = "default", harness_type = '',
jadmanski0afbb632008-06-06 21:10:57 +0000914 use_external_logging = False):
915 """The main interface to this module
mblighc86b0b42006-07-28 17:35:28 +0000916
jadmanski0afbb632008-06-06 21:10:57 +0000917 control
918 The control file to use for this job.
919 cont
920 Whether this is the continuation of a previously started job
921 """
922 control = os.path.abspath(control)
923 state = control + '.state'
apwce9abe92006-04-27 14:14:04 +0000924
jadmanski0afbb632008-06-06 21:10:57 +0000925 # instantiate the job object ready for the control file.
926 myjob = None
927 try:
928 # Check that the control file is valid
929 if not os.path.exists(control):
mbligh15440802008-06-06 22:48:19 +0000930 raise error.JobError(control + ": control file not found")
apwce9abe92006-04-27 14:14:04 +0000931
jadmanski0afbb632008-06-06 21:10:57 +0000932 # When continuing, the job is complete when there is no
933 # state file, ensure we don't try and continue.
934 if cont and not os.path.exists(state):
935 raise error.JobComplete("all done")
936 if cont == False and os.path.exists(state):
937 os.unlink(state)
apwce9abe92006-04-27 14:14:04 +0000938
mbligh15440802008-06-06 22:48:19 +0000939 myjob = job(control, tag, cont, harness_type, use_external_logging)
apwce9abe92006-04-27 14:14:04 +0000940
jadmanski0afbb632008-06-06 21:10:57 +0000941 # Load in the users control file, may do any one of:
942 # 1) execute in toto
943 # 2) define steps, and select the first via next_step()
944 myjob.step_engine()
apwce9abe92006-04-27 14:14:04 +0000945
jadmanski0afbb632008-06-06 21:10:57 +0000946 except error.JobContinue:
947 sys.exit(5)
apwce9abe92006-04-27 14:14:04 +0000948
jadmanski0afbb632008-06-06 21:10:57 +0000949 except error.JobComplete:
950 sys.exit(1)
apwb832e1b2007-11-24 20:24:38 +0000951
jadmanski0afbb632008-06-06 21:10:57 +0000952 except error.JobError, instance:
953 print "JOB ERROR: " + instance.args[0]
954 if myjob:
955 command = None
956 if len(instance.args) > 1:
957 command = instance.args[1]
958 myjob.record('ABORT', None, command, instance.args[0])
959 myjob._decrement_group_level()
960 myjob.record('END ABORT', None, None)
961 assert(myjob.group_level == 0)
962 myjob.complete(1)
963 else:
964 sys.exit(1)
apwce9abe92006-04-27 14:14:04 +0000965
jadmanski0afbb632008-06-06 21:10:57 +0000966 except Exception, e:
967 msg = str(e) + '\n' + traceback.format_exc()
968 print "JOB ERROR: " + msg
969 if myjob:
970 myjob.record('ABORT', None, None, msg)
971 myjob._decrement_group_level()
972 myjob.record('END ABORT', None, None)
973 assert(myjob.group_level == 0)
974 myjob.complete(1)
975 else:
976 sys.exit(1)
mbligh892d37f2007-03-01 17:03:25 +0000977
jadmanski0afbb632008-06-06 21:10:57 +0000978 # If we get here, then we assume the job is complete and good.
979 myjob._decrement_group_level()
980 myjob.record('END GOOD', None, None)
981 assert(myjob.group_level == 0)
mbligh0144e5a2008-03-07 18:17:53 +0000982
jadmanski0afbb632008-06-06 21:10:57 +0000983 myjob.complete(0)
mblighcaa62c22008-04-07 21:51:17 +0000984
985
986# site_job.py may be non-existant or empty, make sure that an appropriate
987# site_job class is created nevertheless
988try:
jadmanski0afbb632008-06-06 21:10:57 +0000989 from site_job import site_job
mblighcaa62c22008-04-07 21:51:17 +0000990except ImportError:
jadmanski0afbb632008-06-06 21:10:57 +0000991 class site_job(base_job):
992 pass
mblighcaa62c22008-04-07 21:51:17 +0000993
994class job(site_job):
jadmanski0afbb632008-06-06 21:10:57 +0000995 pass