blob: 4221a94ecf4f95446207486f954531d30fe80d21 [file] [log] [blame]
__author__ = """Copyright Google, Peter Dahl, Martin J. Bligh 2007"""
import os, sys, re, glob, math
from autotest_utils import *
# Convert '1-3,7,9-12' to [1,2,3,7,9,10,11,12]
def rangelist_to_list(rangelist):
result = []
if not rangelist:
return result
for x in rangelist.split(','):
if re.match(r'^(\d+)$', x):
result.append(int(x))
continue
m = re.match(r'^(\d+)-(\d+)$', x)
if m:
start = int(m.group(1))
end = int(m.group(2))
result += range(start, end+1)
continue
msg = 'Cannot understand data input: %s %s' % (x, rangelist)
raise ValueError(msg)
return result
def rounded_memtotal():
# Get total of all physical mem, in Kbytes
usable_Kbytes = memtotal()
# usable_Kbytes is system's usable DRAM in Kbytes,
# as reported by memtotal() from device /proc/meminfo memtotal
# after Linux deducts 1.5% to 5.1% for system table overhead
# Undo the unknown actual deduction by rounding up
# to next small multiple of a big power-of-two
# eg 12GB - 5.1% gets rounded back up to 12GB
mindeduct = 0.015 # 1.5 percent
maxdeduct = 0.055 # 5.5 percent
# deduction range 1.5% .. 5.5% supports physical mem sizes
# 6GB .. 12GB in steps of .5GB
# 12GB .. 24GB in steps of 1 GB
# 24GB .. 48GB in steps of 2 GB ...
# Finer granularity in physical mem sizes would require
# tighter spread between min and max possible deductions
# increase mem size by at least min deduction, without rounding
min_Kbytes = int(usable_Kbytes / (1.0 - mindeduct))
# increase mem size further by 2**n rounding, by 0..roundKb or more
round_Kbytes = int(usable_Kbytes / (1.0 - maxdeduct)) - min_Kbytes
# find least binary roundup 2**n that covers worst-cast roundKb
mod2n = 1 << int(math.ceil(math.log(round_Kbytes, 2)))
# have round_Kbytes <= mod2n < round_Kbytes*2
# round min_Kbytes up to next multiple of mod2n
phys_Kbytes = min_Kbytes + mod2n - 1
phys_Kbytes = phys_Kbytes - (phys_Kbytes % mod2n) # clear low bits
return phys_Kbytes
class cpuset:
def get_tasks(self, setname):
return [x.rstrip() for x in open(setname+'/tasks').readlines()]
def print_one_cpuset(self, name):
dir = os.path.join('/dev/cpuset', name)
cpus = read_one_line(dir + '/cpus')
mems = read_one_line(dir + '/mems')
node_size_ = rounded_memtotal()*1024 / len(numa_nodes())
memtotal = node_size_ * len(rangelist_to_list(mems))
tasks = ','.join(self.get_tasks(dir))
print "cpuset %s: size %s; tasks %s; cpus %s; mems %s" % \
(name, human_format(memtotal), tasks, cpus, mems)
def print_all_cpusets():
for cpuset in glob.glob('/dev/cpuset/*'):
print_one_cpuset(re.sub(r'.*/', '', cpuset))
def display(self):
self.print_one_cpuset(os.path.join(self.root,self.name))
def get_mems(self, setname):
file_name = os.path.join(setname, "mems")
if os.path.exists(file_name):
return rangelist_to_list(read_one_line(file_name))
else:
return ""
# Start with the nodes available one level up in the cpuset tree,
# subtract off nodes of all siblings at this level.
def available_mems(self, parent_nodes):
available = set(parent_nodes)
for sub_cpusets in glob.glob('%s/*/mems' % self.root):
sub_cpusets = os.path.dirname(sub_cpusets)
available -= set(self.get_mems(sub_cpusets))
return list(available)
def release(self, job_pid=None):
# job_pid arg is no longer needed
print "releasing ", self.cpudir
parent_t = os.path.join(self.root, 'tasks')
# Transfer survivors (and self) to parent
for task in self.get_tasks(self.cpudir):
write_one_line(parent_t, task)
os.rmdir(self.cpudir)
if os.path.exists(self.cpudir):
raise AutotestError('Could not delete container '
+ self.cpudir)
def __init__(self, name, job_size, job_pid, cpus = None,
root = "", cleanup = 1):
# Create a cpuset container and move job_pid into it
# Allocate the list "cpus" of cpus to that container
# name = arbitrary string tag
# job size = reqested memory for job in megabytes
# job pid = pid of job we're putting into the container
# cleanup = 1, set notify_on_release (unimplemented)
self.super_root = "/dev/cpuset"
self.root = os.path.join(self.super_root, root)
self.name = name
#
memtotal_Mbytes = rounded_memtotal() >> 10
if not job_size: # default to all installed memory
job_size = memtotal_Mbytes
print "cpuset(name=%s, root=%s, job_size=%d, pid=%d)" % \
(name, root, job_size, job_pid)
self.memory = job_size
# Convert jobsize to bytes
job_size = job_size << 20
if not grep('cpuset', '/proc/filesystems'):
raise AutotestError('No cpuset support; please reboot')
if not os.path.exists(self.super_root):
os.mkdir(self.super_root)
system('mount -t cpuset none %s' % self.super_root)
if not os.path.exists(os.path.join(self.super_root, "cpus")):
raise AutotestError('Root container /dev/cpuset is '
'empty; please reboot')
if not os.path.exists(self.root):
raise AutotestError('Parent container %s does not exist'
% self.root)
if cpus == None:
cpus = range(0, count_cpus())
self.cpus = cpus
all_nodes = numa_nodes()
self.cpudir = os.path.join(self.root, name)
if os.path.exists(self.cpudir):
self.release() # destructively replace old
node_size = ((memtotal_Mbytes<<20)*1.0) / len(all_nodes)
nodes_needed = int(math.ceil((1.0*job_size) /
math.ceil(node_size)))
if nodes_needed > len(all_nodes):
raise AutotestError("Container's memory is bigger "
"than entire machine")
parent_nodes = self.get_mems(self.root)
if nodes_needed > len(parent_nodes):
raise AutotestError("Container's memory is bigger "
"than parent's")
while True:
# Pick specific free mem nodes for this cpuset
mems = self.available_mems(parent_nodes)
if len(mems) < nodes_needed:
raise AutotestError('Existing containers hold '
'mem nodes needed by new container')
mems = mems[-nodes_needed:]
mems_spec = ','.join(['%d' % x for x in mems])
os.mkdir(self.cpudir)
write_one_line(os.path.join(self.cpudir,
'mem_exclusive'), '1')
write_one_line(os.path.join(self.cpudir,'mems'),
mems_spec)
# Above sends err msg to client.log.0, but no exception,
# if mems_spec contained any now-taken nodes
# Confirm that siblings didn't grab our chosen mems:
nodes_gotten = len(self.get_mems(self.cpudir))
if nodes_gotten >= nodes_needed:
break # success
print "cpuset %s lost race for nodes" % name, mems_spec
# Return any mem we did get, and try again
os.rmdir(self.cpudir)
# add specified cpu cores and own task pid to container:
cpu_spec = ','.join(['%d' % x for x in cpus])
write_one_line(os.path.join(self.cpudir, 'cpus'), cpu_spec)
write_one_line(os.path.join(self.cpudir, 'tasks'),
"%d" % job_pid)
self.display()