mbligh | dcc0499 | 2007-07-26 19:42:55 +0000 | [diff] [blame] | 1 | #!/usr/bin/python -u |
| 2 | # monitor_queue <client> <spool_directory> <resultsdir> [<conmux_server>] |
| 3 | import os, time, sys |
| 4 | from subprocess import * |
| 5 | import tempfile |
| 6 | |
mbligh | 88d3256 | 2007-09-11 20:18:58 +0000 | [diff] [blame] | 7 | if (len(sys.argv) < 3): |
| 8 | print "Usage: monitor_queue <spool_directory> <resultsdir> [<conmux_server>]" |
mbligh | dcc0499 | 2007-07-26 19:42:55 +0000 | [diff] [blame] | 9 | sys.exit(1) |
mbligh | 88d3256 | 2007-09-11 20:18:58 +0000 | [diff] [blame] | 10 | (spooldir, resultsdir) = [os.path.abspath(p) for p in sys.argv[1:3]] |
| 11 | |
| 12 | queue_name = os.path.basename(spooldir) |
| 13 | dotmachines = os.path.join(spooldir, '.machines') |
| 14 | if os.path.exists(dotmachines): |
| 15 | machines = [l.strip() for l in open(dotmachines).readlines() if len(l.strip())] |
| 16 | else: |
mbligh | 8dcb745 | 2007-09-30 01:29:22 +0000 | [diff] [blame] | 17 | print "No .machines file in %s, assuming queue name is a machine"\ |
| 18 | % queue_name |
mbligh | 88d3256 | 2007-09-11 20:18:58 +0000 | [diff] [blame] | 19 | machines = [queue_name] |
| 20 | |
mbligh | dcc0499 | 2007-07-26 19:42:55 +0000 | [diff] [blame] | 21 | if len(sys.argv) == 5: |
| 22 | console = sys.argv[4] |
| 23 | else: |
| 24 | console = None |
| 25 | if not os.path.exists(spooldir): |
| 26 | print "spooldir %s does not exist" % spooldir |
| 27 | sys.exit(1) |
| 28 | if not os.path.exists(resultsdir): |
| 29 | print "resultsdir %s does not exist" % resultsdir |
| 30 | sys.exit(1) |
| 31 | |
| 32 | |
mbligh | 88d3256 | 2007-09-11 20:18:58 +0000 | [diff] [blame] | 33 | ##### Control file templates ##### |
| 34 | SERV_MULTI = """# monitor_queue generated autoserv file (SERV_MULTI template) |
| 35 | hosts = [hosts.ConmuxSSHHost(hostname, server=%s) |
| 36 | for hostname in machines] |
| 37 | |
| 38 | at = autotest.Autotest() |
| 39 | |
| 40 | control_path = %s |
| 41 | results = %s |
| 42 | |
| 43 | def install_run(host): |
| 44 | at.install(host) |
| 45 | host_results = os.path.join(results, host.hostname) |
| 46 | at.run(control_path, host_results, host) |
| 47 | |
| 48 | parallel([subcommand(install_run, [host]) for host in hosts])""" |
| 49 | |
| 50 | |
| 51 | SERV_SINGLE = """# monitor_queue generated autoserv file (SERV_SINGLE template) |
| 52 | host = hosts.ConmuxSSHHost(machines[0], server=%s) |
| 53 | |
| 54 | at = autotest.Autotest() |
| 55 | |
| 56 | control_path = %s |
| 57 | results = %s |
| 58 | |
| 59 | at.install(host) |
| 60 | at.run(control_path, results, host)""" |
| 61 | |
| 62 | ##### End control file templates ##### |
| 63 | |
mbligh | dcc0499 | 2007-07-26 19:42:55 +0000 | [diff] [blame] | 64 | def pick_job(jobs): |
| 65 | """Pick the next job to run. Currently we just pick the oldest job |
| 66 | However, this would be the place to put prioritizations.""" |
| 67 | if not jobs: |
| 68 | return None |
| 69 | return sorted(jobs, key=lambda x:os.stat(x).st_mtime, reverse=True)[0] |
| 70 | |
| 71 | |
mbligh | 88d3256 | 2007-09-11 20:18:58 +0000 | [diff] [blame] | 72 | def __create_autoserv_wrapper(template, control_path, results): |
mbligh | dcc0499 | 2007-07-26 19:42:55 +0000 | [diff] [blame] | 73 | """Create an autoserv file that runs an autotest file at |
mbligh | 88d3256 | 2007-09-11 20:18:58 +0000 | [diff] [blame] | 74 | control_path on clients and outputs the results in results.""" |
mbligh | dcc0499 | 2007-07-26 19:42:55 +0000 | [diff] [blame] | 75 | # Create an autoserv control file to run this autotest control file |
| 76 | tmpfd, tmpname = tempfile.mkstemp() |
| 77 | tmp = os.fdopen(tmpfd, 'w') |
mbligh | 88d3256 | 2007-09-11 20:18:58 +0000 | [diff] [blame] | 78 | |
| 79 | print >> tmp, template % tuple([repr(s) for s in (console, |
| 80 | control_path, |
| 81 | results)]) |
mbligh | dcc0499 | 2007-07-26 19:42:55 +0000 | [diff] [blame] | 82 | return tmpname |
| 83 | |
| 84 | |
mbligh | 8dcb745 | 2007-09-30 01:29:22 +0000 | [diff] [blame] | 85 | def run_job(control, queuename, scheduler_dir): |
mbligh | dcc0499 | 2007-07-26 19:42:55 +0000 | [diff] [blame] | 86 | """Runs a control file from the spooldir. |
| 87 | Args: |
| 88 | control: A path to a control file. It is assumed to be an |
| 89 | Autotest control file in which case it will automatically |
| 90 | be wrapped with autoserv control commands and run with |
| 91 | autoserv. If the file name ends with .srv the wrapping |
| 92 | procedure will be skipped and the autoserv file will be |
| 93 | run directly. |
| 94 | |
| 95 | Return: |
| 96 | The return code from the autoserv process. |
| 97 | """ |
| 98 | # Make sure all the output directories are all setup |
mbligh | 8dcb745 | 2007-09-30 01:29:22 +0000 | [diff] [blame] | 99 | results = os.path.join(resultsdir, queuename + '-' + control) |
mbligh | dcc0499 | 2007-07-26 19:42:55 +0000 | [diff] [blame] | 100 | if os.path.exists(results): |
| 101 | print "Resultsdir %s already present, " % results, |
| 102 | results = "%s.%d" % (results, int(time.time())) |
| 103 | print "changing to %s" % results |
| 104 | os.mkdir(results) |
| 105 | debug = os.path.join(results, 'debug') |
| 106 | os.mkdir(debug) |
| 107 | |
| 108 | # If this is an autoserv file then don't create the wrapper control |
| 109 | is_autoserv_ctl = control.endswith('.srv') |
| 110 | control_path = os.path.abspath(os.path.join(spooldir, control)) |
| 111 | # Otherwise create a tmp autoserv file just to launch the AT ctl file |
| 112 | if not is_autoserv_ctl: |
mbligh | 88d3256 | 2007-09-11 20:18:58 +0000 | [diff] [blame] | 113 | if len(machines) > 1: |
mbligh | b7ef301 | 2007-10-01 18:29:36 +0000 | [diff] [blame] | 114 | # Run autotest file on *all* machines in *parallel* |
mbligh | 88d3256 | 2007-09-11 20:18:58 +0000 | [diff] [blame] | 115 | template = SERV_MULTI |
| 116 | else: |
mbligh | b7ef301 | 2007-10-01 18:29:36 +0000 | [diff] [blame] | 117 | # Run autotest file on *one* machine |
mbligh | 88d3256 | 2007-09-11 20:18:58 +0000 | [diff] [blame] | 118 | template = SERV_SINGLE |
| 119 | control_path = __create_autoserv_wrapper(template, |
| 120 | control_path, |
| 121 | results) |
mbligh | dcc0499 | 2007-07-26 19:42:55 +0000 | [diff] [blame] | 122 | |
| 123 | # Now run the job |
mbligh | 8dcb745 | 2007-09-30 01:29:22 +0000 | [diff] [blame] | 124 | autoserv_exe = os.path.join(scheduler_dir, '..', 'server', 'autoserv') |
| 125 | autoserv_exe = os.path.abspath(autoserv_exe) |
| 126 | |
| 127 | autoserv_cmd = ' '.join([autoserv_exe, '-m', ','.join(machines), |
| 128 | control_path]) |
mbligh | 88d3256 | 2007-09-11 20:18:58 +0000 | [diff] [blame] | 129 | |
| 130 | print "Starting job: %s" % control |
| 131 | print autoserv_cmd |
mbligh | dcc0499 | 2007-07-26 19:42:55 +0000 | [diff] [blame] | 132 | |
mbligh | 6203ace | 2007-10-04 21:54:24 +0000 | [diff] [blame] | 133 | open(os.path.join(debug, 'autoserv.cmd'), 'w', 0).write(autoserv_cmd + '\n') |
| 134 | autoserv_stdout = open(os.path.join(debug, 'autoserv.stdout'), 'w', 0) |
| 135 | autoserv_stderr = open(os.path.join(debug, 'autoserv.stderr'), 'w', 0) |
| 136 | p = Popen(autoserv_cmd, shell=True, stdout=autoserv_stdout, |
| 137 | stderr=autoserv_stderr, cwd=results) |
mbligh | dcc0499 | 2007-07-26 19:42:55 +0000 | [diff] [blame] | 138 | (pid, ret) = os.waitpid(p.pid, 0) |
mbligh | 6203ace | 2007-10-04 21:54:24 +0000 | [diff] [blame] | 139 | autoserv_stdout.close() |
| 140 | autoserv_stderr.close() |
mbligh | dcc0499 | 2007-07-26 19:42:55 +0000 | [diff] [blame] | 141 | |
| 142 | # If this was a tempfile then clean it up |
| 143 | if not is_autoserv_ctl: |
| 144 | os.unlink(control_path) |
| 145 | print "Completed job: %s (%d) " % (control, ret) |
| 146 | |
| 147 | return ret |
| 148 | |
| 149 | |
mbligh | 8dcb745 | 2007-09-30 01:29:22 +0000 | [diff] [blame] | 150 | scheduler_dir = os.path.dirname(os.path.abspath(sys.argv[0])) |
mbligh | dcc0499 | 2007-07-26 19:42:55 +0000 | [diff] [blame] | 151 | os.chdir(spooldir) |
| 152 | print "monitoring spool directory: " + spooldir |
| 153 | while True: |
| 154 | jobs = [j for j in os.listdir(spooldir) if not j.startswith('.')] |
| 155 | next_job = pick_job(jobs) |
| 156 | if not next_job: |
| 157 | time.sleep(10) |
| 158 | continue |
mbligh | 8dcb745 | 2007-09-30 01:29:22 +0000 | [diff] [blame] | 159 | ret = run_job(next_job, os.path.basename(spooldir), scheduler_dir) |
mbligh | dcc0499 | 2007-07-26 19:42:55 +0000 | [diff] [blame] | 160 | os.remove(next_job) |