Blame - client/tests/kvm/kvm_scheduler.py - platform/external/autotest

blob: f1adb39ab9f781be36bccfaf66d9e19787ea0625 [file] [log] [blame]

lmr	6c4c1bf	2009-09-30 11:28:29 +0000	[diff] [blame]	1	import os, select
				2	import kvm_utils, kvm_vm, kvm_subprocess
				3
				4
				5	class scheduler:
				6	"""
				7	A scheduler that manages several parallel test execution pipelines on a
				8	single host.
				9	"""
				10
				11	def __init__(self, tests, num_workers, total_cpus, total_mem, bindir):
				12	"""
				13	Initialize the class.
				14
				15	@param tests: A list of test dictionaries.
				16	@param num_workers: The number of workers (pipelines).
				17	@param total_cpus: The total number of CPUs to dedicate to tests.
				18	@param total_mem: The total amount of memory to dedicate to tests.
				19	@param bindir: The directory where environment files reside.
				20	"""
				21	self.tests = tests
				22	self.num_workers = num_workers
				23	self.total_cpus = total_cpus
				24	self.total_mem = total_mem
				25	self.bindir = bindir
				26	# Pipes -- s stands for scheduler, w stands for worker
				27	self.s2w = [os.pipe() for i in range(num_workers)]
				28	self.w2s = [os.pipe() for i in range(num_workers)]
				29	self.s2w_r = [os.fdopen(r, "r", 0) for r, w in self.s2w]
				30	self.s2w_w = [os.fdopen(w, "w", 0) for r, w in self.s2w]
				31	self.w2s_r = [os.fdopen(r, "r", 0) for r, w in self.w2s]
				32	self.w2s_w = [os.fdopen(w, "w", 0) for r, w in self.w2s]
				33	# "Personal" worker dicts contain modifications that are applied
				34	# specifically to each worker. For example, each worker must use a
				35	# different environment file and a different MAC address pool.
				36	self.worker_dicts = [{"env": "env%d" % i} for i in range(num_workers)]
				37
				38
				39	def worker(self, index, run_test_func):
				40	"""
				41	The worker function.
				42
				43	Waits for commands from the scheduler and processes them.
				44
				45	@param index: The index of this worker (in the range 0..num_workers-1).
				46	@param run_test_func: A function to be called to run a test
				47	(e.g. job.run_test).
				48	"""
				49	r = self.s2w_r[index]
				50	w = self.w2s_w[index]
				51	self_dict = self.worker_dicts[index]
				52
				53	# Inform the scheduler this worker is ready
				54	w.write("ready\n")
				55
				56	while True:
				57	cmd = r.readline().split()
				58	if not cmd:
				59	continue
				60
				61	# The scheduler wants this worker to run a test
				62	if cmd[0] == "run":
				63	test_index = int(cmd[1])
				64	test = self.tests[test_index].copy()
				65	test.update(self_dict)
lmr	f3d3e52	2010-03-23 16:38:12 +0000	[diff] [blame]	66	test = kvm_utils.get_sub_pool(test, index, self.num_workers)
lmr	6c4c1bf	2009-09-30 11:28:29 +0000	[diff] [blame]	67	test_iterations = int(test.get("iterations", 1))
				68	status = run_test_func("kvm", params=test,
				69	tag=test.get("shortname"),
				70	iterations=test_iterations)
				71	w.write("done %s %s\n" % (test_index, status))
				72	w.write("ready\n")
				73
				74	# The scheduler wants this worker to free its used resources
				75	elif cmd[0] == "cleanup":
				76	env_filename = os.path.join(self.bindir, self_dict["env"])
				77	env = kvm_utils.load_env(env_filename, {})
				78	for obj in env.values():
				79	if isinstance(obj, kvm_vm.VM):
				80	obj.destroy()
				81	elif isinstance(obj, kvm_subprocess.kvm_spawn):
				82	obj.close()
				83	kvm_utils.dump_env(env, env_filename)
				84	w.write("cleanup_done\n")
				85	w.write("ready\n")
				86
				87	# There's no more work for this worker
				88	elif cmd[0] == "terminate":
				89	break
				90
				91
				92	def scheduler(self):
				93	"""
				94	The scheduler function.
				95
				96	Sends commands to workers, telling them to run tests, clean up or
				97	terminate execution.
				98	"""
				99	idle_workers = []
				100	closing_workers = []
				101	test_status = ["waiting"] * len(self.tests)
				102	test_worker = [None] * len(self.tests)
				103	used_cpus = [0] * self.num_workers
				104	used_mem = [0] * self.num_workers
				105
				106	while True:
				107	# Wait for a message from a worker
				108	r, w, x = select.select(self.w2s_r, [], [])
				109
				110	someone_is_ready = False
				111
				112	for pipe in r:
				113	worker_index = self.w2s_r.index(pipe)
				114	msg = pipe.readline().split()
				115	if not msg:
				116	continue
				117
				118	# A worker is ready -- add it to the idle_workers list
				119	if msg[0] == "ready":
				120	idle_workers.append(worker_index)
				121	someone_is_ready = True
				122
				123	# A worker completed a test
				124	elif msg[0] == "done":
				125	test_index = int(msg[1])
				126	test = self.tests[test_index]
				127	status = int(eval(msg[2]))
				128	test_status[test_index] = ("fail", "pass")[status]
				129	# If the test failed, mark all dependent tests as "failed" too
				130	if not status:
				131	for i, other_test in enumerate(self.tests):
				132	for dep in other_test.get("depend", []):
				133	if dep in test["name"]:
				134	test_status[i] = "fail"
				135
				136	# A worker is done shutting down its VMs and other processes
				137	elif msg[0] == "cleanup_done":
				138	used_cpus[worker_index] = 0
				139	used_mem[worker_index] = 0
				140	closing_workers.remove(worker_index)
				141
				142	if not someone_is_ready:
				143	continue
				144
				145	for worker in idle_workers[:]:
				146	# Find a test for this worker
				147	test_found = False
				148	for i, test in enumerate(self.tests):
				149	# We only want "waiting" tests
				150	if test_status[i] != "waiting":
				151	continue
				152	# Make sure the test isn't assigned to another worker
				153	if test_worker[i] is not None and test_worker[i] != worker:
				154	continue
				155	# Make sure the test's dependencies are satisfied
				156	dependencies_satisfied = True
				157	for dep in test["depend"]:
				158	dependencies = [j for j, t in enumerate(self.tests)
				159	if dep in t["name"]]
				160	bad_status_deps = [j for j in dependencies
				161	if test_status[j] != "pass"]
				162	if bad_status_deps:
				163	dependencies_satisfied = False
				164	break
				165	if not dependencies_satisfied:
				166	continue
				167	# Make sure we have enough resources to run the test
				168	test_used_cpus = int(test.get("used_cpus", 1))
				169	test_used_mem = int(test.get("used_mem", 128))
				170	# First make sure the other workers aren't using too many
				171	# CPUs (not including the workers currently shutting down)
				172	uc = (sum(used_cpus) - used_cpus[worker] -
				173	sum(used_cpus[i] for i in closing_workers))
				174	if uc and uc + test_used_cpus > self.total_cpus:
				175	continue
				176	# ... or too much memory
				177	um = (sum(used_mem) - used_mem[worker] -
				178	sum(used_mem[i] for i in closing_workers))
				179	if um and um + test_used_mem > self.total_mem:
				180	continue
				181	# If we reached this point it means there are, or will
				182	# soon be, enough resources to run the test
				183	test_found = True
				184	# Now check if the test can be run right now, i.e. if the
				185	# other workers, including the ones currently shutting
				186	# down, aren't using too many CPUs
				187	uc = (sum(used_cpus) - used_cpus[worker])
				188	if uc and uc + test_used_cpus > self.total_cpus:
				189	continue
				190	# ... or too much memory
				191	um = (sum(used_mem) - used_mem[worker])
				192	if um and um + test_used_mem > self.total_mem:
				193	continue
				194	# Everything is OK -- run the test
				195	test_status[i] = "running"
				196	test_worker[i] = worker
				197	idle_workers.remove(worker)
				198	# Update used_cpus and used_mem
				199	used_cpus[worker] = test_used_cpus
				200	used_mem[worker] = test_used_mem
				201	# Assign all related tests to this worker
				202	for j, other_test in enumerate(self.tests):
				203	for other_dep in other_test["depend"]:
				204	# All tests that depend on this test
				205	if other_dep in test["name"]:
				206	test_worker[j] = worker
				207	break
				208	# ... and all tests that share a dependency
				209	# with this test
				210	for dep in test["depend"]:
				211	if dep in other_dep or other_dep in dep:
				212	test_worker[j] = worker
				213	break
				214	# Tell the worker to run the test
				215	self.s2w_w[worker].write("run %s\n" % i)
				216	break
				217
				218	# If there won't be any tests for this worker to run soon, tell
				219	# the worker to free its used resources
				220	if not test_found and (used_cpus[worker] or used_mem[worker]):
				221	self.s2w_w[worker].write("cleanup\n")
				222	idle_workers.remove(worker)
				223	closing_workers.append(worker)
				224
				225	# If there are no more new tests to run, terminate the workers and
				226	# the scheduler
				227	if len(idle_workers) == self.num_workers:
				228	for worker in idle_workers:
				229	self.s2w_w[worker].write("terminate\n")
				230	break