| #!/usr/bin/python -u |
| # monitor_queue <client> <spool_directory> <resultsdir> [<conmux_server>] |
| import os, time, sys |
| from subprocess import * |
| import tempfile |
| |
| if (len(sys.argv) < 3): |
| print "Usage: monitor_queue <spool_directory> <resultsdir> [<conmux_server>]" |
| sys.exit(1) |
| (spooldir, resultsdir) = [os.path.abspath(p) for p in sys.argv[1:3]] |
| |
| queue_name = os.path.basename(spooldir) |
| dotmachines = os.path.join(spooldir, '.machines') |
| if os.path.exists(dotmachines): |
| machines = [l.strip() for l in open(dotmachines).readlines() if len(l.strip())] |
| else: |
| print "No .machines file in %s, assuming queue name is a machine"\ |
| % queue_name |
| machines = [queue_name] |
| |
| if len(sys.argv) == 5: |
| console = sys.argv[4] |
| else: |
| console = None |
| if not os.path.exists(spooldir): |
| print "spooldir %s does not exist" % spooldir |
| sys.exit(1) |
| if not os.path.exists(resultsdir): |
| print "resultsdir %s does not exist" % resultsdir |
| sys.exit(1) |
| |
| |
| ##### Control file templates ##### |
| SERV_MULTI = """# monitor_queue generated autoserv file (SERV_MULTI template) |
| hosts = [hosts.ConmuxSSHHost(hostname, server=%s) |
| for hostname in machines] |
| |
| at = autotest.Autotest() |
| |
| control_path = %s |
| results = %s |
| |
| def install_run(host): |
| at.install(host) |
| host_results = os.path.join(results, host.hostname) |
| at.run(control_path, host_results, host) |
| |
| parallel([subcommand(install_run, [host]) for host in hosts])""" |
| |
| |
| SERV_SINGLE = """# monitor_queue generated autoserv file (SERV_SINGLE template) |
| host = hosts.ConmuxSSHHost(machines[0], server=%s) |
| |
| at = autotest.Autotest() |
| |
| control_path = %s |
| results = %s |
| |
| at.install(host) |
| at.run(control_path, results, host)""" |
| |
| ##### End control file templates ##### |
| |
| def pick_job(jobs): |
| """Pick the next job to run. Currently we just pick the oldest job |
| However, this would be the place to put prioritizations.""" |
| if not jobs: |
| return None |
| return sorted(jobs, key=lambda x:os.stat(x).st_mtime, reverse=True)[0] |
| |
| |
| def __create_autoserv_wrapper(template, control_path, results): |
| """Create an autoserv file that runs an autotest file at |
| control_path on clients and outputs the results in results.""" |
| # Create an autoserv control file to run this autotest control file |
| tmpfd, tmpname = tempfile.mkstemp() |
| tmp = os.fdopen(tmpfd, 'w') |
| |
| print >> tmp, template % tuple([repr(s) for s in (console, |
| control_path, |
| results)]) |
| return tmpname |
| |
| |
| def run_job(control, queuename, scheduler_dir): |
| """Runs a control file from the spooldir. |
| Args: |
| control: A path to a control file. It is assumed to be an |
| Autotest control file in which case it will automatically |
| be wrapped with autoserv control commands and run with |
| autoserv. If the file name ends with .srv the wrapping |
| procedure will be skipped and the autoserv file will be |
| run directly. |
| |
| Return: |
| The return code from the autoserv process. |
| """ |
| # Make sure all the output directories are all setup |
| results = os.path.join(resultsdir, queuename + '-' + control) |
| if os.path.exists(results): |
| print "Resultsdir %s already present, " % results, |
| results = "%s.%d" % (results, int(time.time())) |
| print "changing to %s" % results |
| os.mkdir(results) |
| debug = os.path.join(results, 'debug') |
| os.mkdir(debug) |
| |
| # If this is an autoserv file then don't create the wrapper control |
| is_autoserv_ctl = control.endswith('.srv') |
| control_path = os.path.abspath(os.path.join(spooldir, control)) |
| # Otherwise create a tmp autoserv file just to launch the AT ctl file |
| if not is_autoserv_ctl: |
| if len(machines) > 1: |
| # Run autotest file on *all* machines in *parallel* |
| template = SERV_MULTI |
| else: |
| # Run autotest file on *one* machine |
| template = SERV_SINGLE |
| control_path = __create_autoserv_wrapper(template, |
| control_path, |
| results) |
| |
| # Now run the job |
| autoserv_exe = os.path.join(scheduler_dir, '..', 'server', 'autoserv') |
| autoserv_exe = os.path.abspath(autoserv_exe) |
| |
| autoserv_cmd = ' '.join([autoserv_exe, '-m', ','.join(machines), |
| control_path]) |
| |
| print "Starting job: %s" % control |
| print autoserv_cmd |
| |
| open(os.path.join(debug, 'autoserv.cmd'), 'w', 0).write(autoserv_cmd + '\n') |
| autoserv_stdout = open(os.path.join(debug, 'autoserv.stdout'), 'w', 0) |
| autoserv_stderr = open(os.path.join(debug, 'autoserv.stderr'), 'w', 0) |
| p = Popen(autoserv_cmd, shell=True, stdout=autoserv_stdout, |
| stderr=autoserv_stderr, cwd=results) |
| (pid, ret) = os.waitpid(p.pid, 0) |
| autoserv_stdout.close() |
| autoserv_stderr.close() |
| |
| # If this was a tempfile then clean it up |
| if not is_autoserv_ctl: |
| os.unlink(control_path) |
| print "Completed job: %s (%d) " % (control, ret) |
| |
| return ret |
| |
| |
| scheduler_dir = os.path.dirname(os.path.abspath(sys.argv[0])) |
| os.chdir(spooldir) |
| print "monitoring spool directory: " + spooldir |
| while True: |
| jobs = [j for j in os.listdir(spooldir) if not j.startswith('.')] |
| next_job = pick_job(jobs) |
| if not next_job: |
| time.sleep(10) |
| continue |
| ret = run_job(next_job, os.path.basename(spooldir), scheduler_dir) |
| os.remove(next_job) |