client/bin/cpuset.py - platform/external/autotest - Gitiles

 __author__ = """Copyright Google, Peter Dahl, Martin J. Bligh   2007"""

 import os, sys, re, glob, math
 from autotest_utils import *

 # Convert '1-3,7,9-12' to [1,2,3,7,9,10,11,12]
 def rangelist_to_list(rangelist):
 	result = []
 	if not rangelist:
 		return result
 	for x in rangelist.split(','):
 		if re.match(r'^(\d+)$', x):
 			result.append(int(x))
 			continue
 		m = re.match(r'^(\d+)-(\d+)$', x)
 		if m:
 			start = int(m.group(1))
 			end = int(m.group(2))
 			result += range(start, end+1)
 			continue
 		msg = 'Cannot understand data input: %s %s' % (x, rangelist)
 		raise ValueError(msg)
 	return result

 def rounded_memtotal():
 	# Get total of all physical mem, in Kbytes
 	usable_Kbytes = memtotal()
 	# usable_Kbytes is system's usable DRAM in Kbytes,
 	#   as reported by memtotal() from device /proc/meminfo memtotal
 	#   after Linux deducts 1.5% to 5.1% for system table overhead
 	# Undo the unknown actual deduction by rounding up
 	#   to next small multiple of a big power-of-two
 	#   eg  12GB - 5.1% gets rounded back up to 12GB
 	mindeduct = 0.015  # 1.5 percent
 	maxdeduct = 0.055  # 5.5 percent
 	# deduction range 1.5% .. 5.5% supports physical mem sizes
 	#    6GB .. 12GB in steps of .5GB
 	#   12GB .. 24GB in steps of 1 GB
 	#   24GB .. 48GB in steps of 2 GB ...
 	# Finer granularity in physical mem sizes would require
 	#   tighter spread between min and max possible deductions

 	# increase mem size by at least min deduction, without rounding
 	min_Kbytes   = int(usable_Kbytes / (1.0 - mindeduct))
 	# increase mem size further by 2**n rounding, by 0..roundKb or more
 	round_Kbytes = int(usable_Kbytes / (1.0 - maxdeduct)) - min_Kbytes
 	# find least binary roundup 2**n that covers worst-cast roundKb
 	mod2n = 1 << int(math.ceil(math.log(round_Kbytes, 2)))
 	# have round_Kbytes <= mod2n < round_Kbytes*2
 	# round min_Kbytes up to next multiple of mod2n
 	phys_Kbytes = min_Kbytes + mod2n - 1
 	phys_Kbytes = phys_Kbytes - (phys_Kbytes % mod2n)  # clear low bits
 	return phys_Kbytes

 class cpuset:
 	def get_tasks(self, setname):
 		return [x.rstrip() for x in open(setname+'/tasks').readlines()]

 	def print_one_cpuset(self, name):
 		dir = os.path.join('/dev/cpuset', name)
 		cpus = read_one_line(dir + '/cpus')
 		mems = read_one_line(dir + '/mems')
 		node_size_ = rounded_memtotal()*1024 / len(numa_nodes())
 		memtotal = node_size_ * len(rangelist_to_list(mems))
 		tasks = ','.join(self.get_tasks(dir))
 		print "cpuset %s: size %s; tasks %s; cpus %s; mems %s" % \
 			(name, human_format(memtotal), tasks, cpus, mems)

 	def print_all_cpusets():
 		for cpuset in glob.glob('/dev/cpuset/*'):
 			print_one_cpuset(re.sub(r'.*/', '', cpuset))


 	def display(self):
 		self.print_one_cpuset(os.path.join(self.root,self.name))

 	def get_mems(self, setname):
 		file_name = os.path.join(setname, "mems")
 		if os.path.exists(file_name):
 			return rangelist_to_list(read_one_line(file_name))
 		else:
 			return ""

 	# Start with the nodes available one level up in the cpuset tree,
 	#   subtract off nodes of all siblings at this level.
 	def available_mems(self, parent_nodes):
 		available = set(parent_nodes)
 		for sub_cpusets in glob.glob('%s/*/mems' % self.root):
 			sub_cpusets = os.path.dirname(sub_cpusets)
 			available -= set(self.get_mems(sub_cpusets))
 		return list(available)

 	def release(self, job_pid=None):
 		# job_pid arg is no longer needed
 		print "releasing ", self.cpudir
 		parent_t = os.path.join(self.root, 'tasks')
 		# Transfer survivors (and self) to parent
 		for task in self.get_tasks(self.cpudir):
 			write_one_line(parent_t, task)
 		os.rmdir(self.cpudir)
 		if os.path.exists(self.cpudir):
 			raise AutotestError('Could not delete container '
 						+ self.cpudir)


 	def __init__(self, name, job_size, job_pid, cpus = None,
 	    root = "", cleanup = 1):
 		# Create a cpuset container and move job_pid into it
 		# Allocate the list "cpus" of cpus to that container

 		# name = arbitrary string tag
 		# job size = reqested memory for job in megabytes
 		# job pid = pid of job we're putting into the container
 		# cleanup = 1, set notify_on_release (unimplemented)
 		self.super_root = "/dev/cpuset"
 		self.root = os.path.join(self.super_root, root)
 		self.name = name
 		#
 		memtotal_Mbytes = rounded_memtotal() >> 10
 		if not job_size:  # default to all installed memory
 			job_size = memtotal_Mbytes
 		print "cpuset(name=%s, root=%s, job_size=%d, pid=%d)" % \
 		    (name, root, job_size, job_pid)
 		self.memory = job_size
 		# Convert jobsize to bytes
 		job_size = job_size << 20
 		if not grep('cpuset', '/proc/filesystems'):
 			raise AutotestError('No cpuset support; please reboot')
 		if not os.path.exists(self.super_root):
 			os.mkdir(self.super_root)
 			system('mount -t cpuset none %s' % self.super_root)
 		if not os.path.exists(os.path.join(self.super_root, "cpus")):
 			raise AutotestError('Root container /dev/cpuset is '
 						'empty; please reboot')
 		if not os.path.exists(self.root):
 			raise AutotestError('Parent container %s does not exist'
 						 % self.root)
 		if cpus == None:
 			cpus = range(0, count_cpus())
 		self.cpus = cpus
 		all_nodes = numa_nodes()

 		self.cpudir = os.path.join(self.root, name)
 		if os.path.exists(self.cpudir):
 			self.release()   # destructively replace old

 		node_size = ((memtotal_Mbytes<<20)*1.0) / len(all_nodes)
 		nodes_needed = int(math.ceil((1.0*job_size) /
 					     math.ceil(node_size)))
 		if nodes_needed > len(all_nodes):
 			raise AutotestError("Container's memory is bigger "
 						"than entire machine")
 		parent_nodes = self.get_mems(self.root)
 		if nodes_needed > len(parent_nodes):
 			raise AutotestError("Container's memory is bigger "
 						"than parent's")

 		while True:
 			# Pick specific free mem nodes for this cpuset
 			mems = self.available_mems(parent_nodes)
 			if len(mems) < nodes_needed:
 				raise AutotestError('Existing containers hold '
 					'mem nodes needed by new container')
 			mems = mems[-nodes_needed:]
 			mems_spec = ','.join(['%d' % x for x in mems])
 			os.mkdir(self.cpudir)
 			write_one_line(os.path.join(self.cpudir,
 					'mem_exclusive'), '1')
 			write_one_line(os.path.join(self.cpudir,'mems'),
 					mems_spec)
 			# Above sends err msg to client.log.0, but no exception,
 			#   if mems_spec contained any now-taken nodes
 			# Confirm that siblings didn't grab our chosen mems:
 			nodes_gotten = len(self.get_mems(self.cpudir))
 			if nodes_gotten >= nodes_needed:
 				break   # success
 			print "cpuset %s lost race for nodes" % name, mems_spec
 			# Return any mem we did get, and try again
 			os.rmdir(self.cpudir)

 		# add specified cpu cores and own task pid to container:
 		cpu_spec = ','.join(['%d' % x for x in cpus])
 		write_one_line(os.path.join(self.cpudir, 'cpus'), cpu_spec)
 		write_one_line(os.path.join(self.cpudir, 'tasks'),
 				"%d" % job_pid)
 		self.display()
	__author__ = """Copyright Google, Peter Dahl, Martin J. Bligh 2007"""

	import os, sys, re, glob, math
	from autotest_utils import *

	# Convert '1-3,7,9-12' to [1,2,3,7,9,10,11,12]
	def rangelist_to_list(rangelist):
	result = []
	if not rangelist:
	return result
	for x in rangelist.split(','):
	if re.match(r'^(\d+)$', x):
	result.append(int(x))
	continue
	m = re.match(r'^(\d+)-(\d+)$', x)
	if m:
	start = int(m.group(1))
	end = int(m.group(2))
	result += range(start, end+1)
	continue
	msg = 'Cannot understand data input: %s %s' % (x, rangelist)
	raise ValueError(msg)
	return result

	def rounded_memtotal():
	# Get total of all physical mem, in Kbytes
	usable_Kbytes = memtotal()
	# usable_Kbytes is system's usable DRAM in Kbytes,
	# as reported by memtotal() from device /proc/meminfo memtotal
	# after Linux deducts 1.5% to 5.1% for system table overhead
	# Undo the unknown actual deduction by rounding up
	# to next small multiple of a big power-of-two
	# eg 12GB - 5.1% gets rounded back up to 12GB
	mindeduct = 0.015 # 1.5 percent
	maxdeduct = 0.055 # 5.5 percent
	# deduction range 1.5% .. 5.5% supports physical mem sizes
	# 6GB .. 12GB in steps of .5GB
	# 12GB .. 24GB in steps of 1 GB
	# 24GB .. 48GB in steps of 2 GB ...
	# Finer granularity in physical mem sizes would require
	# tighter spread between min and max possible deductions

	# increase mem size by at least min deduction, without rounding
	min_Kbytes = int(usable_Kbytes / (1.0 - mindeduct))
	# increase mem size further by 2**n rounding, by 0..roundKb or more
	round_Kbytes = int(usable_Kbytes / (1.0 - maxdeduct)) - min_Kbytes
	# find least binary roundup 2**n that covers worst-cast roundKb
	mod2n = 1 << int(math.ceil(math.log(round_Kbytes, 2)))
	# have round_Kbytes <= mod2n < round_Kbytes*2
	# round min_Kbytes up to next multiple of mod2n
	phys_Kbytes = min_Kbytes + mod2n - 1
	phys_Kbytes = phys_Kbytes - (phys_Kbytes % mod2n) # clear low bits
	return phys_Kbytes

	class cpuset:
	def get_tasks(self, setname):
	return [x.rstrip() for x in open(setname+'/tasks').readlines()]

	def print_one_cpuset(self, name):
	dir = os.path.join('/dev/cpuset', name)
	cpus = read_one_line(dir + '/cpus')
	mems = read_one_line(dir + '/mems')
	node_size_ = rounded_memtotal()*1024 / len(numa_nodes())
	memtotal = node_size_ * len(rangelist_to_list(mems))
	tasks = ','.join(self.get_tasks(dir))
	print "cpuset %s: size %s; tasks %s; cpus %s; mems %s" % \
	(name, human_format(memtotal), tasks, cpus, mems)

	def print_all_cpusets():
	for cpuset in glob.glob('/dev/cpuset/*'):
	print_one_cpuset(re.sub(r'.*/', '', cpuset))


	def display(self):
	self.print_one_cpuset(os.path.join(self.root,self.name))

	def get_mems(self, setname):
	file_name = os.path.join(setname, "mems")
	if os.path.exists(file_name):
	return rangelist_to_list(read_one_line(file_name))
	else:
	return ""

	# Start with the nodes available one level up in the cpuset tree,
	# subtract off nodes of all siblings at this level.
	def available_mems(self, parent_nodes):
	available = set(parent_nodes)
	for sub_cpusets in glob.glob('%s/*/mems' % self.root):
	sub_cpusets = os.path.dirname(sub_cpusets)
	available -= set(self.get_mems(sub_cpusets))
	return list(available)

	def release(self, job_pid=None):
	# job_pid arg is no longer needed
	print "releasing ", self.cpudir
	parent_t = os.path.join(self.root, 'tasks')
	# Transfer survivors (and self) to parent
	for task in self.get_tasks(self.cpudir):
	write_one_line(parent_t, task)
	os.rmdir(self.cpudir)
	if os.path.exists(self.cpudir):
	raise AutotestError('Could not delete container '
	+ self.cpudir)


	def __init__(self, name, job_size, job_pid, cpus = None,
	root = "", cleanup = 1):
	# Create a cpuset container and move job_pid into it
	# Allocate the list "cpus" of cpus to that container

	# name = arbitrary string tag
	# job size = reqested memory for job in megabytes
	# job pid = pid of job we're putting into the container
	# cleanup = 1, set notify_on_release (unimplemented)
	self.super_root = "/dev/cpuset"
	self.root = os.path.join(self.super_root, root)
	self.name = name
	#
	memtotal_Mbytes = rounded_memtotal() >> 10
	if not job_size: # default to all installed memory
	job_size = memtotal_Mbytes
	print "cpuset(name=%s, root=%s, job_size=%d, pid=%d)" % \
	(name, root, job_size, job_pid)
	self.memory = job_size
	# Convert jobsize to bytes
	job_size = job_size << 20
	if not grep('cpuset', '/proc/filesystems'):
	raise AutotestError('No cpuset support; please reboot')
	if not os.path.exists(self.super_root):
	os.mkdir(self.super_root)
	system('mount -t cpuset none %s' % self.super_root)
	if not os.path.exists(os.path.join(self.super_root, "cpus")):
	raise AutotestError('Root container /dev/cpuset is '
	'empty; please reboot')
	if not os.path.exists(self.root):
	raise AutotestError('Parent container %s does not exist'
	% self.root)
	if cpus == None:
	cpus = range(0, count_cpus())
	self.cpus = cpus
	all_nodes = numa_nodes()

	self.cpudir = os.path.join(self.root, name)
	if os.path.exists(self.cpudir):
	self.release() # destructively replace old

	node_size = ((memtotal_Mbytes<<20)*1.0) / len(all_nodes)
	nodes_needed = int(math.ceil((1.0*job_size) /
	math.ceil(node_size)))
	if nodes_needed > len(all_nodes):
	raise AutotestError("Container's memory is bigger "
	"than entire machine")
	parent_nodes = self.get_mems(self.root)
	if nodes_needed > len(parent_nodes):
	raise AutotestError("Container's memory is bigger "
	"than parent's")

	while True:
	# Pick specific free mem nodes for this cpuset
	mems = self.available_mems(parent_nodes)
	if len(mems) < nodes_needed:
	raise AutotestError('Existing containers hold '
	'mem nodes needed by new container')
	mems = mems[-nodes_needed:]
	mems_spec = ','.join(['%d' % x for x in mems])
	os.mkdir(self.cpudir)
	write_one_line(os.path.join(self.cpudir,
	'mem_exclusive'), '1')
	write_one_line(os.path.join(self.cpudir,'mems'),
	mems_spec)
	# Above sends err msg to client.log.0, but no exception,
	# if mems_spec contained any now-taken nodes
	# Confirm that siblings didn't grab our chosen mems:
	nodes_gotten = len(self.get_mems(self.cpudir))
	if nodes_gotten >= nodes_needed:
	break # success
	print "cpuset %s lost race for nodes" % name, mems_spec
	# Return any mem we did get, and try again
	os.rmdir(self.cpudir)

	# add specified cpu cores and own task pid to container:
	cpu_spec = ','.join(['%d' % x for x in cpus])
	write_one_line(os.path.join(self.cpudir, 'cpus'), cpu_spec)
	write_one_line(os.path.join(self.cpudir, 'tasks'),
	"%d" % job_pid)
	self.display()