blob: a02fc77c2c2f13872340a65a8370e35be6c6254d [file] [log] [blame]
mblighc86b0b42006-07-28 17:35:28 +00001"""The main job wrapper
mbligha2508052006-05-28 21:29:53 +00002
mblighc86b0b42006-07-28 17:35:28 +00003This is the core infrastructure.
4"""
5
6__author__ = """Copyright Andy Whitcroft, Martin J. Bligh 2006"""
mbligha2508052006-05-28 21:29:53 +00007
mbligh8f243ec2006-10-10 05:55:49 +00008# standard stuff
mbligh7dd510c2007-11-13 17:11:22 +00009import os, sys, re, pickle, shutil, time, traceback
mbligh8f243ec2006-10-10 05:55:49 +000010# autotest stuff
mblighf4c35322006-03-13 01:01:10 +000011from autotest_utils import *
apw8fef4ac2006-10-10 22:53:37 +000012from parallel import *
mblighf31b0c02007-11-29 18:19:22 +000013from common.error import *
mbligh65938a22007-12-10 16:58:52 +000014from common import barrier
mblighe1417fa2007-12-10 16:55:13 +000015import kernel, xen, test, profilers, filesystem, fd_stack, boottool
apw059e1b12006-10-12 17:18:26 +000016import harness, config
mbligh83ac9942007-11-05 18:59:37 +000017import sysinfo
mbligh65938a22007-12-10 16:58:52 +000018import cpuset
mblighf4c35322006-03-13 01:01:10 +000019
20class job:
mblighc86b0b42006-07-28 17:35:28 +000021 """The actual job against which we do everything.
22
23 Properties:
mbligh72b88fc2006-12-16 18:41:35 +000024 autodir
mblighc86b0b42006-07-28 17:35:28 +000025 The top level autotest directory (/usr/local/autotest).
26 Comes from os.environ['AUTODIR'].
mbligh72b88fc2006-12-16 18:41:35 +000027 bindir
mblighc86b0b42006-07-28 17:35:28 +000028 <autodir>/bin/
mblighd5a38832008-01-25 18:15:39 +000029 libdir
30 <autodir>/lib/
mbligh72b88fc2006-12-16 18:41:35 +000031 testdir
mblighc86b0b42006-07-28 17:35:28 +000032 <autodir>/tests/
mbligh84bafdb2008-01-26 19:43:34 +000033 site_testdir
34 <autodir>/site_tests/
mblighc86b0b42006-07-28 17:35:28 +000035 profdir
36 <autodir>/profilers/
37 tmpdir
38 <autodir>/tmp/
39 resultdir
40 <autodir>/results/<jobtag>
41 stdout
42 fd_stack object for stdout
43 stderr
44 fd_stack object for stderr
45 profilers
46 the profilers object for this job
apw504a7dd2006-10-12 17:18:37 +000047 harness
48 the server harness object for this job
apw059e1b12006-10-12 17:18:26 +000049 config
50 the job configuration for this job
mblighc86b0b42006-07-28 17:35:28 +000051 """
52
mblighd528d302007-12-19 16:19:05 +000053 DEFAULT_LOG_FILENAME = "status"
54
mbligh362ab3d2007-08-30 11:24:04 +000055 def __init__(self, control, jobtag, cont, harness_type=None):
mblighc86b0b42006-07-28 17:35:28 +000056 """
57 control
58 The control file (pathname of)
59 jobtag
60 The job tag string (eg "default")
apw96da1a42006-11-02 00:23:18 +000061 cont
62 If this is the continuation of this job
apwe68a7132006-12-01 11:21:37 +000063 harness_type
64 An alternative server harness
mblighc86b0b42006-07-28 17:35:28 +000065 """
mblighf4c35322006-03-13 01:01:10 +000066 self.autodir = os.environ['AUTODIR']
apw870988b2007-09-25 16:50:53 +000067 self.bindir = os.path.join(self.autodir, 'bin')
mblighd5a38832008-01-25 18:15:39 +000068 self.libdir = os.path.join(self.autodir, 'lib')
apw870988b2007-09-25 16:50:53 +000069 self.testdir = os.path.join(self.autodir, 'tests')
mbligh84bafdb2008-01-26 19:43:34 +000070 self.site_testdir = os.path.join(self.autodir, 'site_tests')
apw870988b2007-09-25 16:50:53 +000071 self.profdir = os.path.join(self.autodir, 'profilers')
72 self.tmpdir = os.path.join(self.autodir, 'tmp')
73 self.resultdir = os.path.join(self.autodir, 'results', jobtag)
mbligh0fb83972008-01-10 16:30:02 +000074 self.sysinfodir = os.path.join(self.resultdir, 'sysinfo')
mbligh8d83cdc2007-12-03 18:09:18 +000075 self.control = os.path.abspath(control)
mbligha2508052006-05-28 21:29:53 +000076
apw96da1a42006-11-02 00:23:18 +000077 if not cont:
mblighbf79bba2008-03-03 16:02:37 +000078 df_root = system_output('df -m / | tail -1').split()
79 self.free_space_mb_root_before = int(df_root[3])
80 self.usage_percent_root_before = int(df_root[4].rstrip('%'))
81 if (self.free_space_mb_root_before < 100 or
82 self.usage_percent_root_before > 90):
83 self.record('WARN', 'check free space on root', 'free space is less than 100Mb or 10%')
mblighf4ca14f2008-03-03 16:03:05 +000084 pickle.dump(self.free_space_mb_root_before, file(self.control + '.fs', 'w'))
mblighbf79bba2008-03-03 16:02:37 +000085
apw96da1a42006-11-02 00:23:18 +000086 if os.path.exists(self.tmpdir):
mbligh09f288a2007-09-18 21:34:57 +000087 system('umount -f %s > /dev/null 2> /dev/null'%\
88 self.tmpdir, ignorestatus=True)
apw96da1a42006-11-02 00:23:18 +000089 system('rm -rf ' + self.tmpdir)
90 os.mkdir(self.tmpdir)
91
apw870988b2007-09-25 16:50:53 +000092 results = os.path.join(self.autodir, 'results')
93 if not os.path.exists(results):
94 os.mkdir(results)
mblighfbfb77d2007-02-15 18:54:03 +000095
apwf3d28622007-09-25 16:49:17 +000096 download = os.path.join(self.testdir, 'download')
97 if os.path.exists(download):
98 system('rm -rf ' + download)
99 os.mkdir(download)
100
apw96da1a42006-11-02 00:23:18 +0000101 if os.path.exists(self.resultdir):
102 system('rm -rf ' + self.resultdir)
103 os.mkdir(self.resultdir)
mbligh0fb83972008-01-10 16:30:02 +0000104 os.mkdir(self.sysinfodir)
apw96da1a42006-11-02 00:23:18 +0000105
apw870988b2007-09-25 16:50:53 +0000106 os.mkdir(os.path.join(self.resultdir, 'debug'))
107 os.mkdir(os.path.join(self.resultdir, 'analysis'))
apw870988b2007-09-25 16:50:53 +0000108
mbligh8d83cdc2007-12-03 18:09:18 +0000109 shutil.copyfile(self.control,
110 os.path.join(self.resultdir, 'control'))
mblighf4ca14f2008-03-03 16:03:05 +0000111 else:
112 self.free_space_mb_root_before = pickle.load(file(self.control + '.fs', 'r'))
113
mbligh4b089662006-06-14 22:34:58 +0000114
apwecf41b72006-03-31 14:00:55 +0000115 self.control = control
mbligh27113602007-10-31 21:07:51 +0000116 self.jobtag = jobtag
mblighd528d302007-12-19 16:19:05 +0000117 self.log_filename = self.DEFAULT_LOG_FILENAME
mbligh68119582008-01-25 18:16:41 +0000118 self.container = None
mblighf4c35322006-03-13 01:01:10 +0000119
mbligh56f1fbb2006-10-01 15:10:56 +0000120 self.stdout = fd_stack.fd_stack(1, sys.stdout)
121 self.stderr = fd_stack.fd_stack(2, sys.stderr)
mbligh7dd510c2007-11-13 17:11:22 +0000122 self.group_level = 0
mblighf4c35322006-03-13 01:01:10 +0000123
apw059e1b12006-10-12 17:18:26 +0000124 self.config = config.config(self)
125
apwd27e55f2006-12-01 11:22:08 +0000126 self.harness = harness.select(harness_type, self)
127
mbligha35553b2006-04-23 15:52:25 +0000128 self.profilers = profilers.profilers(self)
mbligh72905562006-05-25 01:30:49 +0000129
mblighcaa605c2006-10-02 00:37:35 +0000130 try:
apw90154af2006-12-01 11:23:36 +0000131 tool = self.config_get('boottool.executable')
132 self.bootloader = boottool.boottool(tool)
mblighcaa605c2006-10-02 00:37:35 +0000133 except:
134 pass
135
mbligh0fb83972008-01-10 16:30:02 +0000136 sysinfo.log_per_reboot_data(self.sysinfodir)
mbligh3a6d6ca2006-04-23 15:50:24 +0000137
mbligh30270302007-11-05 20:33:52 +0000138 if not cont:
mblighc3430162007-11-14 23:57:19 +0000139 self.record('START', None, None)
mblighc3430162007-11-14 23:57:19 +0000140 self.group_level = 1
apw357f50f2006-12-01 11:22:39 +0000141
apwf91efaf2007-11-24 17:32:13 +0000142 self.harness.run_start()
143
mbligh0692e472007-08-30 16:07:53 +0000144
145 def relative_path(self, path):
146 """\
147 Return a patch relative to the job results directory
148 """
mbligh1c250ca2007-08-30 16:31:38 +0000149 head = len(self.resultdir) + 1 # remove the / inbetween
150 return path[head:]
mbligh0692e472007-08-30 16:07:53 +0000151
152
mbligh362ab3d2007-08-30 11:24:04 +0000153 def control_get(self):
154 return self.control
155
mblighcaa605c2006-10-02 00:37:35 +0000156
mbligh8d83cdc2007-12-03 18:09:18 +0000157 def control_set(self, control):
158 self.control = os.path.abspath(control)
159
160
apwde1503a2006-10-10 08:34:21 +0000161 def harness_select(self, which):
162 self.harness = harness.select(which, self)
163
164
apw059e1b12006-10-12 17:18:26 +0000165 def config_set(self, name, value):
166 self.config.set(name, value)
167
168
169 def config_get(self, name):
170 return self.config.get(name)
171
mbligh8baa2ea2006-12-17 23:01:24 +0000172 def setup_dirs(self, results_dir, tmp_dir):
mbligh1e8858e2006-11-24 22:18:35 +0000173 if not tmp_dir:
apw870988b2007-09-25 16:50:53 +0000174 tmp_dir = os.path.join(self.tmpdir, 'build')
mbligh1e8858e2006-11-24 22:18:35 +0000175 if not os.path.exists(tmp_dir):
176 os.mkdir(tmp_dir)
177 if not os.path.isdir(tmp_dir):
mbligh642b03e2008-01-14 16:53:15 +0000178 e_msg = "Temp dir (%s) is not a dir - args backwards?" % self.tmpdir
179 raise ValueError(e_msg)
mbligh1e8858e2006-11-24 22:18:35 +0000180
181 # We label the first build "build" and then subsequent ones
182 # as "build.2", "build.3", etc. Whilst this is a little bit
183 # inconsistent, 99.9% of jobs will only have one build
184 # (that's not done as kernbench, sparse, or buildtest),
185 # so it works out much cleaner. One of life's comprimises.
186 if not results_dir:
187 results_dir = os.path.join(self.resultdir, 'build')
188 i = 2
189 while os.path.exists(results_dir):
190 results_dir = os.path.join(self.resultdir, 'build.%d' % i)
mblighd9223fc2006-11-26 17:19:54 +0000191 i += 1
mbligh1e8858e2006-11-24 22:18:35 +0000192 if not os.path.exists(results_dir):
193 os.mkdir(results_dir)
mbligh72b88fc2006-12-16 18:41:35 +0000194
mbligh8baa2ea2006-12-17 23:01:24 +0000195 return (results_dir, tmp_dir)
196
197
198 def xen(self, base_tree, results_dir = '', tmp_dir = '', leave = False, \
199 kjob = None ):
200 """Summon a xen object"""
201 (results_dir, tmp_dir) = self.setup_dirs(results_dir, tmp_dir)
202 build_dir = 'xen'
203 return xen.xen(self, base_tree, results_dir, tmp_dir, build_dir, leave, kjob)
204
205
206 def kernel(self, base_tree, results_dir = '', tmp_dir = '', leave = False):
207 """Summon a kernel object"""
mbligh669caa12007-11-05 18:32:13 +0000208 (results_dir, tmp_dir) = self.setup_dirs(results_dir, tmp_dir)
mbligh8baa2ea2006-12-17 23:01:24 +0000209 build_dir = 'linux'
mbligh6ee7ee02007-11-13 23:49:05 +0000210 return kernel.auto_kernel(self, base_tree, results_dir,
211 tmp_dir, build_dir, leave)
mblighf4c35322006-03-13 01:01:10 +0000212
mblighcaa605c2006-10-02 00:37:35 +0000213
mbligh6b504ff2007-12-12 21:03:49 +0000214 def barrier(self, *args, **kwds):
mblighfadca202006-09-23 04:40:01 +0000215 """Create a barrier object"""
mbligh6b504ff2007-12-12 21:03:49 +0000216 return barrier.barrier(*args, **kwds)
mblighfadca202006-09-23 04:40:01 +0000217
mblighcaa605c2006-10-02 00:37:35 +0000218
mbligh4b089662006-06-14 22:34:58 +0000219 def setup_dep(self, deps):
mblighc86b0b42006-07-28 17:35:28 +0000220 """Set up the dependencies for this test.
221
222 deps is a list of libraries required for this test.
223 """
mbligh4b089662006-06-14 22:34:58 +0000224 for dep in deps:
225 try:
apw870988b2007-09-25 16:50:53 +0000226 os.chdir(os.path.join(self.autodir, 'deps', dep))
mbligh4b089662006-06-14 22:34:58 +0000227 system('./' + dep + '.py')
228 except:
229 error = "setting up dependency " + dep + "\n"
mbligh72b88fc2006-12-16 18:41:35 +0000230 raise UnhandledError(error)
mbligh4b089662006-06-14 22:34:58 +0000231
232
mbligh72b88fc2006-12-16 18:41:35 +0000233 def __runtest(self, url, tag, args, dargs):
234 try:
mbligh53c41502007-10-23 20:45:04 +0000235 l = lambda : test.runtest(self, url, tag, args, dargs)
236 pid = fork_start(self.resultdir, l)
237 fork_waitfor(self.resultdir, pid)
mbligh72b88fc2006-12-16 18:41:35 +0000238 except AutotestError:
239 raise
240 except:
241 raise UnhandledError('running test ' + \
242 self.__class__.__name__ + "\n")
apwf1a81162006-04-25 10:10:29 +0000243
mblighcaa605c2006-10-02 00:37:35 +0000244
mblighd016ecc2006-11-25 21:41:07 +0000245 def run_test(self, url, *args, **dargs):
mblighc86b0b42006-07-28 17:35:28 +0000246 """Summon a test object and run it.
247
248 tag
249 tag to add to testname
mbligh12a7df72006-10-06 03:54:33 +0000250 url
251 url of the test to run
mblighc86b0b42006-07-28 17:35:28 +0000252 """
mbligh12a7df72006-10-06 03:54:33 +0000253
mblighd016ecc2006-11-25 21:41:07 +0000254 if not url:
mbligh642b03e2008-01-14 16:53:15 +0000255 raise TypeError("Test name is invalid. Switched arguments?")
mbligh09f288a2007-09-18 21:34:57 +0000256 (group, testname) = test.testname(url)
mbligh7dd510c2007-11-13 17:11:22 +0000257 tag = dargs.pop('tag', None)
mbligh65938a22007-12-10 16:58:52 +0000258 container = dargs.pop('container', None)
mbligh09f288a2007-09-18 21:34:57 +0000259 subdir = testname
mbligh7dd510c2007-11-13 17:11:22 +0000260 if tag:
261 subdir += '.' + tag
262
mbligh65938a22007-12-10 16:58:52 +0000263 if container:
mbligh68119582008-01-25 18:16:41 +0000264 cname = container.get('name', None)
265 if not cname: # get old name
266 cname = container.get('container_name', None)
267 mbytes = container.get('mbytes', None)
268 if not mbytes: # get old name
269 mbytes = container.get('mem', None)
270 cpus = container.get('cpus', None)
271 if not cpus: # get old name
272 cpus = container.get('cpu', None)
273 root = container.get('root', None)
274 self.new_container(mbytes=mbytes, cpus=cpus,
275 root=root, name=cname)
mbligh65938a22007-12-10 16:58:52 +0000276 # We are running in a container now...
277
mbligh7dd510c2007-11-13 17:11:22 +0000278 def group_func():
apwf1a81162006-04-25 10:10:29 +0000279 try:
mblighd016ecc2006-11-25 21:41:07 +0000280 self.__runtest(url, tag, args, dargs)
apwf1a81162006-04-25 10:10:29 +0000281 except Exception, detail:
mbligh7dd510c2007-11-13 17:11:22 +0000282 self.record('FAIL', subdir, testname,
283 str(detail))
apwf1a81162006-04-25 10:10:29 +0000284 raise
285 else:
mbligh7dd510c2007-11-13 17:11:22 +0000286 self.record('GOOD', subdir, testname,
287 'completed successfully')
mblighcfc6dd32007-11-20 00:44:35 +0000288 result, exc_info = self.__rungroup(subdir, group_func)
mbligh68119582008-01-25 18:16:41 +0000289 if container:
290 self.release_container()
mbligh7dd510c2007-11-13 17:11:22 +0000291 if exc_info and isinstance(exc_info[1], TestError):
292 return False
293 elif exc_info:
mbligh71ea2492008-01-15 20:35:52 +0000294 raise exc_info[0], exc_info[1], exc_info[2]
apwf1a81162006-04-25 10:10:29 +0000295 else:
mbligh7dd510c2007-11-13 17:11:22 +0000296 return True
297
298
299 def __rungroup(self, name, function, *args, **dargs):
300 """\
301 name:
302 name of the group
303 function:
304 subroutine to run
305 *args:
306 arguments for the function
307
308 Returns a 2-tuple (result, exc_info) where result
309 is the return value of function, and exc_info is
310 the sys.exc_info() of the exception thrown by the
311 function (which may be None).
312 """
313
314 result, exc_info = None, None
315 try:
316 self.record('START', None, name)
317 self.group_level += 1
318 result = function(*args, **dargs)
319 self.group_level -= 1
320 self.record('END GOOD', None, name)
321 except Exception, e:
322 exc_info = sys.exc_info()
323 self.group_level -= 1
mbligh51144e02007-11-20 20:38:18 +0000324 err_msg = str(e) + '\n' + format_error()
325 self.record('END FAIL', None, name, err_msg)
mbligh7dd510c2007-11-13 17:11:22 +0000326
327 return result, exc_info
apw0865f482006-03-30 18:50:19 +0000328
mblighd7fb4a62006-10-01 00:57:53 +0000329
apw1da244b2007-09-27 17:18:01 +0000330 def run_group(self, function, *args, **dargs):
mbligh88ab90f2007-08-29 15:52:49 +0000331 """\
332 function:
333 subroutine to run
334 *args:
335 arguments for the function
336 """
337
mbligh7dd510c2007-11-13 17:11:22 +0000338 # Allow the tag for the group to be specified
mbligh88ab90f2007-08-29 15:52:49 +0000339 name = function.__name__
mbligh7dd510c2007-11-13 17:11:22 +0000340 tag = dargs.pop('tag', None)
341 if tag:
342 name = tag
apw1da244b2007-09-27 17:18:01 +0000343
mbligh7dd510c2007-11-13 17:11:22 +0000344 result, exc_info = self.__rungroup(name, function,
345 *args, **dargs)
apw1da244b2007-09-27 17:18:01 +0000346
mbligh7dd510c2007-11-13 17:11:22 +0000347 # if there was a non-TestError exception, raise it
mbligh71ea2492008-01-15 20:35:52 +0000348 if exc_info and not isinstance(exc_info[1], TestError):
mbligh7dd510c2007-11-13 17:11:22 +0000349 err = ''.join(traceback.format_exception(*exc_info))
350 raise TestError(name + ' failed\n' + err)
mbligh88ab90f2007-08-29 15:52:49 +0000351
mbligh7dd510c2007-11-13 17:11:22 +0000352 # pass back the actual return value from the function
apw08403ca2007-09-27 17:17:22 +0000353 return result
354
mbligh88ab90f2007-08-29 15:52:49 +0000355
mbligh68119582008-01-25 18:16:41 +0000356 def new_container(self, mbytes=None, cpus=None, root=None, name=None):
357 if grep('cpusets', '/proc/filesystems'):
358 print "Containers not enabled by latest reboot"
359 return # containers weren't enabled in this kernel boot
360 pid = os.getpid()
361 if not root:
362 root = 'sys'
363 if not name:
364 name = 'test%d' % pid # make arbitrary unique name
365 self.container = cpuset.cpuset(name, job_size=mbytes,
366 job_pid=pid, cpus=cpus, root=root, cleanup=1)
367 # This job's python shell is now running in the new container
368 # and all forked test processes will inherit that container
369
370
371 def release_container(self):
372 if self.container:
373 self.container.release(job_pid=os.getpid())
374 self.container = None
375
376
377 def cpu_count(self):
378 if self.container:
379 return len(self.container.cpus)
380 return count_cpus() # use total system count
381
382
apwce73d892007-09-25 16:53:05 +0000383 # Check the passed kernel identifier against the command line
384 # and the running kernel, abort the job on missmatch.
mblighda0311e2007-10-25 16:03:33 +0000385 def kernel_check_ident(self, expected_when, expected_id, expected_cl, subdir, type = 'src'):
386 print "POST BOOT: checking booted kernel mark=%d identity='%s' changelist=%s type='%s'" \
387 % (expected_when, expected_id, expected_cl, type)
apwce73d892007-09-25 16:53:05 +0000388
389 running_id = running_os_ident()
390
391 cmdline = read_one_line("/proc/cmdline")
392
393 find_sum = re.compile(r'.*IDENT=(\d+)')
394 m = find_sum.match(cmdline)
395 cmdline_when = -1
396 if m:
397 cmdline_when = int(m.groups()[0])
398
mblighda0311e2007-10-25 16:03:33 +0000399 cl_re = re.compile(r'\d{7,}')
400 cl_match = cl_re.search(system_output('uname -v').split()[1])
401 if cl_match:
402 current_cl = cl_match.group()
403 else:
404 current_cl = None
405
apwce73d892007-09-25 16:53:05 +0000406 # We have all the facts, see if they indicate we
407 # booted the requested kernel or not.
408 bad = False
mblighda0311e2007-10-25 16:03:33 +0000409 if (type == 'src' and expected_id != running_id or
410 type == 'rpm' and not running_id.startswith(expected_id + '::')):
apwce73d892007-09-25 16:53:05 +0000411 print "check_kernel_ident: kernel identifier mismatch"
412 bad = True
413 if expected_when != cmdline_when:
414 print "check_kernel_ident: kernel command line mismatch"
415 bad = True
mblighda0311e2007-10-25 16:03:33 +0000416 if expected_cl and current_cl and str(expected_cl) != current_cl:
417 print 'check_kernel_ident: kernel changelist mismatch'
418 bad = True
apwce73d892007-09-25 16:53:05 +0000419
420 if bad:
421 print " Expected Ident: " + expected_id
422 print " Running Ident: " + running_id
423 print " Expected Mark: %d" % (expected_when)
424 print "Command Line Mark: %d" % (cmdline_when)
mblighda0311e2007-10-25 16:03:33 +0000425 print " Expected P4 CL: %s" % expected_cl
426 print " P4 CL: %s" % current_cl
apwce73d892007-09-25 16:53:05 +0000427 print " Command Line: " + cmdline
428
mbligh30270302007-11-05 20:33:52 +0000429 raise JobError("boot failure", "reboot.verify")
apwce73d892007-09-25 16:53:05 +0000430
mbligh30270302007-11-05 20:33:52 +0000431 self.record('GOOD', subdir, 'reboot.verify')
apwce73d892007-09-25 16:53:05 +0000432
433
mblighc2359852007-08-28 18:11:48 +0000434 def filesystem(self, device, mountpoint = None, loop_size = 0):
mblighd7fb4a62006-10-01 00:57:53 +0000435 if not mountpoint:
436 mountpoint = self.tmpdir
mblighc2359852007-08-28 18:11:48 +0000437 return filesystem.filesystem(self, device, mountpoint,loop_size)
mblighd7fb4a62006-10-01 00:57:53 +0000438
mblighcaa605c2006-10-02 00:37:35 +0000439
440 def reboot(self, tag='autotest'):
mbligh30270302007-11-05 20:33:52 +0000441 self.record('GOOD', None, 'reboot.start')
apwde1503a2006-10-10 08:34:21 +0000442 self.harness.run_reboot()
apw11985b72007-10-04 15:44:47 +0000443 default = self.config_get('boot.set_default')
444 if default:
445 self.bootloader.set_default(tag)
446 else:
447 self.bootloader.boot_once(tag)
mblighf3b78932007-11-07 16:52:47 +0000448 system("(sleep 5; reboot) </dev/null >/dev/null 2>&1 &")
apw0778a2f2006-10-06 03:11:40 +0000449 self.quit()
mblighcaa605c2006-10-02 00:37:35 +0000450
451
apw0865f482006-03-30 18:50:19 +0000452 def noop(self, text):
453 print "job: noop: " + text
454
mblighcaa605c2006-10-02 00:37:35 +0000455
mblighc86b0b42006-07-28 17:35:28 +0000456 def parallel(self, *tasklist):
457 """Run tasks in parallel"""
apw8fef4ac2006-10-10 22:53:37 +0000458
459 pids = []
mblighd528d302007-12-19 16:19:05 +0000460 old_log_filename = self.log_filename
461 for i, task in enumerate(tasklist):
462 self.log_filename = old_log_filename + (".%d" % i)
463 task_func = lambda: task[0](*task[1:])
464 pids.append(fork_start(self.resultdir, task_func))
465
466 old_log_path = os.path.join(self.resultdir, old_log_filename)
467 old_log = open(old_log_path, "a")
mblighd509b712008-01-14 17:41:25 +0000468 exceptions = []
mblighd528d302007-12-19 16:19:05 +0000469 for i, pid in enumerate(pids):
470 # wait for the task to finish
mblighd509b712008-01-14 17:41:25 +0000471 try:
472 fork_waitfor(self.resultdir, pid)
473 except Exception, e:
474 exceptions.append(e)
mblighd528d302007-12-19 16:19:05 +0000475 # copy the logs from the subtask into the main log
476 new_log_path = old_log_path + (".%d" % i)
477 if os.path.exists(new_log_path):
478 new_log = open(new_log_path)
479 old_log.write(new_log.read())
480 new_log.close()
481 old_log.flush()
482 os.remove(new_log_path)
483 old_log.close()
484
485 self.log_filename = old_log_filename
apw0865f482006-03-30 18:50:19 +0000486
mblighd509b712008-01-14 17:41:25 +0000487 # handle any exceptions raised by the parallel tasks
488 if exceptions:
489 msg = "%d task(s) failed" % len(exceptions)
490 raise JobError(msg, str(exceptions), exceptions)
491
mblighcaa605c2006-10-02 00:37:35 +0000492
apw0865f482006-03-30 18:50:19 +0000493 def quit(self):
mblighc86b0b42006-07-28 17:35:28 +0000494 # XXX: should have a better name.
apwde1503a2006-10-10 08:34:21 +0000495 self.harness.run_pause()
apwf2c66602006-04-27 14:11:25 +0000496 raise JobContinue("more to come")
apw0865f482006-03-30 18:50:19 +0000497
mblighcaa605c2006-10-02 00:37:35 +0000498
apw0865f482006-03-30 18:50:19 +0000499 def complete(self, status):
mblighc86b0b42006-07-28 17:35:28 +0000500 """Clean up and exit"""
apw0865f482006-03-30 18:50:19 +0000501 # We are about to exit 'complete' so clean up the control file.
502 try:
apwecf41b72006-03-31 14:00:55 +0000503 os.unlink(self.control + '.state')
apw0865f482006-03-30 18:50:19 +0000504 except:
505 pass
mblighc0b10d32008-03-03 16:03:28 +0000506 try:
507 os.unlink(self.control + '.fs')
508 except:
509 pass
510
mbligh61a6c1a2006-12-25 01:26:38 +0000511 self.harness.run_complete()
apw1b021902006-04-03 17:02:56 +0000512 sys.exit(status)
apw0865f482006-03-30 18:50:19 +0000513
mblighcaa605c2006-10-02 00:37:35 +0000514
apw0865f482006-03-30 18:50:19 +0000515 steps = []
516 def next_step(self, step):
mblighc86b0b42006-07-28 17:35:28 +0000517 """Define the next step"""
apwce73d892007-09-25 16:53:05 +0000518 if not isinstance(step[0], basestring):
519 step[0] = step[0].__name__
apw0865f482006-03-30 18:50:19 +0000520 self.steps.append(step)
apwecf41b72006-03-31 14:00:55 +0000521 pickle.dump(self.steps, open(self.control + '.state', 'w'))
apw0865f482006-03-30 18:50:19 +0000522
mblighcaa605c2006-10-02 00:37:35 +0000523
mbligh237bed32007-09-05 13:05:57 +0000524 def next_step_prepend(self, step):
525 """Insert a new step, executing first"""
apwce73d892007-09-25 16:53:05 +0000526 if not isinstance(step[0], basestring):
527 step[0] = step[0].__name__
mbligh237bed32007-09-05 13:05:57 +0000528 self.steps.insert(0, step)
529 pickle.dump(self.steps, open(self.control + '.state', 'w'))
530
531
apw83f8d772006-04-27 14:12:56 +0000532 def step_engine(self):
mblighc86b0b42006-07-28 17:35:28 +0000533 """the stepping engine -- if the control file defines
534 step_init we will be using this engine to drive multiple runs.
535 """
536 """Do the next step"""
apw83f8d772006-04-27 14:12:56 +0000537 lcl = dict({'job': self})
538
539 str = """
mblighf31b0c02007-11-29 18:19:22 +0000540from common.error import *
apw83f8d772006-04-27 14:12:56 +0000541from autotest_utils import *
542"""
543 exec(str, lcl, lcl)
544 execfile(self.control, lcl, lcl)
545
mblighd9223fc2006-11-26 17:19:54 +0000546 state = self.control + '.state'
apw0865f482006-03-30 18:50:19 +0000547 # If there is a mid-job state file load that in and continue
548 # where it indicates. Otherwise start stepping at the passed
549 # entry.
550 try:
mblighd9223fc2006-11-26 17:19:54 +0000551 self.steps = pickle.load(open(state, 'r'))
apw0865f482006-03-30 18:50:19 +0000552 except:
apw83f8d772006-04-27 14:12:56 +0000553 if lcl.has_key('step_init'):
554 self.next_step([lcl['step_init']])
apw0865f482006-03-30 18:50:19 +0000555
556 # Run the step list.
557 while len(self.steps) > 0:
apwfd922bb2006-04-04 07:47:00 +0000558 step = self.steps.pop(0)
mblighd9223fc2006-11-26 17:19:54 +0000559 pickle.dump(self.steps, open(state, 'w'))
apw0865f482006-03-30 18:50:19 +0000560
561 cmd = step.pop(0)
apw83f8d772006-04-27 14:12:56 +0000562 lcl['__args'] = step
apwce73d892007-09-25 16:53:05 +0000563 exec(cmd + "(*__args)", lcl, lcl)
apw0865f482006-03-30 18:50:19 +0000564
mblighcaa605c2006-10-02 00:37:35 +0000565
mbligh09f288a2007-09-18 21:34:57 +0000566 def record(self, status_code, subdir, operation, status = ''):
567 """
568 Record job-level status
apw7db8d0b2006-10-09 08:10:25 +0000569
mbligh09f288a2007-09-18 21:34:57 +0000570 The intent is to make this file both machine parseable and
571 human readable. That involves a little more complexity, but
572 really isn't all that bad ;-)
573
574 Format is <status code>\t<subdir>\t<operation>\t<status>
575
576 status code: (GOOD|WARN|FAIL|ABORT)
577 or START
578 or END (GOOD|WARN|FAIL|ABORT)
579
580 subdir: MUST be a relevant subdirectory in the results,
581 or None, which will be represented as '----'
582
583 operation: description of what you ran (e.g. "dbench", or
584 "mkfs -t foobar /dev/sda9")
585
586 status: error message or "completed sucessfully"
587
588 ------------------------------------------------------------
589
590 Initial tabs indicate indent levels for grouping, and is
mbligh7dd510c2007-11-13 17:11:22 +0000591 governed by self.group_level
mbligh09f288a2007-09-18 21:34:57 +0000592
593 multiline messages have secondary lines prefaced by a double
594 space (' ')
595 """
596
mblighb0570ad2007-09-19 18:18:11 +0000597 if subdir:
598 if re.match(r'[\n\t]', subdir):
mbligh642b03e2008-01-14 16:53:15 +0000599 raise ValueError("Invalid character in subdir string")
mblighb0570ad2007-09-19 18:18:11 +0000600 substr = subdir
601 else:
602 substr = '----'
mbligh09f288a2007-09-18 21:34:57 +0000603
604 if not re.match(r'(START|(END )?(GOOD|WARN|FAIL|ABORT))$', \
605 status_code):
mbligh642b03e2008-01-14 16:53:15 +0000606 raise ValueError("Invalid status code supplied: %s" % status_code)
mbligh9c5ac322007-10-31 18:01:59 +0000607 if not operation:
608 operation = '----'
mbligh09f288a2007-09-18 21:34:57 +0000609 if re.match(r'[\n\t]', operation):
mbligh642b03e2008-01-14 16:53:15 +0000610 raise ValueError("Invalid character in operation string")
mbligh09f288a2007-09-18 21:34:57 +0000611 operation = operation.rstrip()
612 status = status.rstrip()
613 status = re.sub(r"\t", " ", status)
apw7db8d0b2006-10-09 08:10:25 +0000614 # Ensure any continuation lines are marked so we can
615 # detect them in the status file to ensure it is parsable.
mbligh7dd510c2007-11-13 17:11:22 +0000616 status = re.sub(r"\n", "\n" + "\t" * self.group_level + " ", status)
mbligh09f288a2007-09-18 21:34:57 +0000617
mbligh30270302007-11-05 20:33:52 +0000618 # Generate timestamps for inclusion in the logs
619 epoch_time = int(time.time()) # seconds since epoch, in UTC
620 local_time = time.localtime(epoch_time)
621 epoch_time_str = "timestamp=%d" % (epoch_time,)
622 local_time_str = time.strftime("localtime=%b %d %H:%M:%S",
623 local_time)
624
625 msg = '\t'.join(str(x) for x in (status_code, substr, operation,
626 epoch_time_str, local_time_str,
627 status))
mbligh7dd510c2007-11-13 17:11:22 +0000628 msg = '\t' * self.group_level + msg
apw7db8d0b2006-10-09 08:10:25 +0000629
mblighd528d302007-12-19 16:19:05 +0000630 msg_tag = ""
631 if "." in self.log_filename:
632 msg_tag = self.log_filename.split(".", 1)[1]
633
634 self.harness.test_status_detail(status_code, substr, operation,
635 status, msg_tag)
636 self.harness.test_status(msg, msg_tag)
637
638 # log to stdout (if enabled)
639 #if self.log_filename == self.DEFAULT_LOG_FILENAME:
apwf1a81162006-04-25 10:10:29 +0000640 print msg
mblighd528d302007-12-19 16:19:05 +0000641
642 # log to the "root" status log
643 status_file = os.path.join(self.resultdir, self.log_filename)
mbligh7dd510c2007-11-13 17:11:22 +0000644 open(status_file, "a").write(msg + "\n")
mblighd528d302007-12-19 16:19:05 +0000645
646 # log to the subdir status log (if subdir is set)
mblighb0570ad2007-09-19 18:18:11 +0000647 if subdir:
mblighadff6ca2008-01-22 16:38:25 +0000648 dir = os.path.join(self.resultdir, subdir)
649 if not os.path.exists(dir):
650 os.mkdir(dir)
651
652 status_file = os.path.join(dir,
mblighd528d302007-12-19 16:19:05 +0000653 self.DEFAULT_LOG_FILENAME)
mblighb0570ad2007-09-19 18:18:11 +0000654 open(status_file, "a").write(msg + "\n")
apwce9abe92006-04-27 14:14:04 +0000655
656
mbligh570e93e2006-11-26 05:15:56 +0000657def runjob(control, cont = False, tag = "default", harness_type = ''):
mblighc86b0b42006-07-28 17:35:28 +0000658 """The main interface to this module
659
mbligh72b88fc2006-12-16 18:41:35 +0000660 control
mblighc86b0b42006-07-28 17:35:28 +0000661 The control file to use for this job.
662 cont
663 Whether this is the continuation of a previously started job
664 """
mblighb4eef242007-07-23 18:22:49 +0000665 control = os.path.abspath(control)
apwce9abe92006-04-27 14:14:04 +0000666 state = control + '.state'
667
668 # instantiate the job object ready for the control file.
669 myjob = None
670 try:
671 # Check that the control file is valid
672 if not os.path.exists(control):
673 raise JobError(control + ": control file not found")
674
675 # When continuing, the job is complete when there is no
676 # state file, ensure we don't try and continue.
mblighf3fef462006-09-13 16:05:05 +0000677 if cont and not os.path.exists(state):
apwb832e1b2007-11-24 20:24:38 +0000678 raise JobComplete("all done")
mblighf3fef462006-09-13 16:05:05 +0000679 if cont == False and os.path.exists(state):
apwce9abe92006-04-27 14:14:04 +0000680 os.unlink(state)
681
mbligh570e93e2006-11-26 05:15:56 +0000682 myjob = job(control, tag, cont, harness_type)
apwce9abe92006-04-27 14:14:04 +0000683
684 # Load in the users control file, may do any one of:
685 # 1) execute in toto
686 # 2) define steps, and select the first via next_step()
687 myjob.step_engine()
688
apwce9abe92006-04-27 14:14:04 +0000689 except JobContinue:
690 sys.exit(5)
691
apwb832e1b2007-11-24 20:24:38 +0000692 except JobComplete:
693 sys.exit(1)
694
mbligh47681712007-11-16 21:41:51 +0000695 except JobError, instance:
apwce9abe92006-04-27 14:14:04 +0000696 print "JOB ERROR: " + instance.args[0]
mbligh9c5ac322007-10-31 18:01:59 +0000697 if myjob:
mbligh30270302007-11-05 20:33:52 +0000698 command = None
699 if len(instance.args) > 1:
700 command = instance.args[1]
mblighc3430162007-11-14 23:57:19 +0000701 myjob.group_level = 0
mbligh30270302007-11-05 20:33:52 +0000702 myjob.record('ABORT', None, command, instance.args[0])
mblighc3430162007-11-14 23:57:19 +0000703 myjob.record('END ABORT', None, None)
apwce9abe92006-04-27 14:14:04 +0000704 myjob.complete(1)
apwb832e1b2007-11-24 20:24:38 +0000705 else:
706 sys.exit(1)
apwce9abe92006-04-27 14:14:04 +0000707
mblighc3430162007-11-14 23:57:19 +0000708 except Exception, e:
mbligh51144e02007-11-20 20:38:18 +0000709 msg = str(e) + '\n' + format_error()
mblighc3430162007-11-14 23:57:19 +0000710 print "JOB ERROR: " + msg
mblighfbfb77d2007-02-15 18:54:03 +0000711 if myjob:
mblighc3430162007-11-14 23:57:19 +0000712 myjob.group_level = 0
713 myjob.record('ABORT', None, None, msg)
714 myjob.record('END ABORT', None, None)
mbligh9c5ac322007-10-31 18:01:59 +0000715 myjob.complete(1)
apwb832e1b2007-11-24 20:24:38 +0000716 else:
717 sys.exit(1)
mbligh892d37f2007-03-01 17:03:25 +0000718
719 # If we get here, then we assume the job is complete and good.
mblighc3430162007-11-14 23:57:19 +0000720 myjob.group_level = 0
721 myjob.record('END GOOD', None, None)
mblighbf79bba2008-03-03 16:02:37 +0000722 df_root = system_output('df -m / | tail -1').split()
723 free_space_mb_root_after = int(df_root[3])
724 if myjob.free_space_mb_root_before - free_space_mb_root_after > 5:
725 myjob.record('WARN', 'Check disk usage', 'disk usage on root is greater than 5Mb')
mbligh892d37f2007-03-01 17:03:25 +0000726 myjob.complete(0)
mbligh68119582008-01-25 18:16:41 +0000727