blob: c87c9b9ff49a55a083c2e6ee585c7cc84c97b3a1 [file] [log] [blame]
Yunlian Jiang36f91ad2013-03-28 17:13:29 -07001# Copyright (c) 2013 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
Ahmad Sharif4467f002012-12-20 12:09:49 -08004
Yunlian Jiang93950172015-12-10 13:20:48 -08005"""Machine Manager module."""
6
7from __future__ import print_function
8
Ahmad Shariff395c262012-10-09 17:48:09 -07009import hashlib
10import image_chromeos
cmtice517dc982015-06-12 12:22:32 -070011import file_lock_machine
Ahmad Shariff395c262012-10-09 17:48:09 -070012import math
13import os.path
Ahmad Sharif4467f002012-12-20 12:09:49 -080014import re
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -080015import sys
16import threading
17import time
Ahmad Sharif4467f002012-12-20 12:09:49 -080018
cmtice1505b6a2014-06-04 14:19:19 -070019
Yunlian Jiang0d1a9f32015-12-09 10:47:11 -080020from cros_utils import command_executer
21from cros_utils import logger
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -080022
23CHECKSUM_FILE = "/usr/local/osimage_checksum_file"
24
Caroline Tice5ea9f002015-09-02 12:36:47 -070025class BadChecksum(Exception):
26 """Raised if all machines for a label don't have the same checksum."""
27 pass
28
29class BadChecksumString(Exception):
30 """Raised if all machines for a label don't have the same checksum string."""
cmtice798a8fa2014-05-12 13:56:42 -070031 pass
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -080032
cmtice5c09fc22015-04-22 09:25:53 -070033class MissingLocksDirectory(Exception):
Yunlian Jiang93950172015-12-10 13:20:48 -080034 """Raised when cannot find/access the machine locks directory."""
cmtice5c09fc22015-04-22 09:25:53 -070035
Caroline Tice31fedb02015-09-14 16:30:37 -070036class CrosCommandError(Exception):
Yunlian Jiang93950172015-12-10 13:20:48 -080037 """Raised when an error occurs running command on DUT."""
Caroline Tice31fedb02015-09-14 16:30:37 -070038
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -080039class CrosMachine(object):
Yunlian Jiang93950172015-12-10 13:20:48 -080040 """The machine class."""
cmtice1505b6a2014-06-04 14:19:19 -070041 def __init__(self, name, chromeos_root, log_level, cmd_exec=None):
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -080042 self.name = name
43 self.image = None
Han Shenba649282015-08-05 17:19:55 -070044 # We relate a dut with a label if we reimage the dut using label or we
45 # detect at the very beginning that the dut is running this label.
46 self.label = None
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -080047 self.checksum = None
48 self.locked = False
49 self.released_time = time.time()
Yunlian Jiang04dc5dc2013-04-23 15:05:05 -070050 self.test_run = None
Ahmad Shariff395c262012-10-09 17:48:09 -070051 self.chromeos_root = chromeos_root
cmtice13909242014-03-11 13:38:07 -070052 self.log_level = log_level
Yunlian Jiang93950172015-12-10 13:20:48 -080053 self.cpuinfo = None
54 self.machine_id = None
55 self.checksum_string = None
56 self.meminfo = None
57 self.phys_kbytes = None
cmtice1505b6a2014-06-04 14:19:19 -070058 self.ce = cmd_exec or command_executer.GetCommandExecuter(
59 log_level=self.log_level)
cmtice798a8fa2014-05-12 13:56:42 -070060 self.SetUpChecksumInfo()
61
62 def SetUpChecksumInfo(self):
Yunlian Jiange5b673f2013-05-23 11:42:53 -070063 if not self.IsReachable():
Yunlian Jiang837e07a2013-05-22 16:23:28 -070064 self.machine_checksum = None
65 return
Ahmad Shariff395c262012-10-09 17:48:09 -070066 self._GetMemoryInfo()
67 self._GetCPUInfo()
68 self._ComputeMachineChecksumString()
Ahmad Sharif4467f002012-12-20 12:09:49 -080069 self._GetMachineID()
70 self.machine_checksum = self._GetMD5Checksum(self.checksum_string)
71 self.machine_id_checksum = self._GetMD5Checksum(self.machine_id)
Ahmad Shariff395c262012-10-09 17:48:09 -070072
Yunlian Jiange5b673f2013-05-23 11:42:53 -070073 def IsReachable(self):
Yunlian Jiang837e07a2013-05-22 16:23:28 -070074 command = "ls"
cmtice1505b6a2014-06-04 14:19:19 -070075 ret = self.ce.CrosRunCommand(command,
76 machine=self.name,
77 chromeos_root=self.chromeos_root)
Yunlian Jiang837e07a2013-05-22 16:23:28 -070078 if ret:
79 return False
80 return True
81
Ahmad Shariff395c262012-10-09 17:48:09 -070082 def _ParseMemoryInfo(self):
83 line = self.meminfo.splitlines()[0]
84 usable_kbytes = int(line.split()[1])
Yunlian Jiang04dc5dc2013-04-23 15:05:05 -070085 # This code is from src/third_party/test/files/client/bin/base_utils.py
Ahmad Shariff395c262012-10-09 17:48:09 -070086 # usable_kbytes is system's usable DRAM in kbytes,
87 # as reported by memtotal() from device /proc/meminfo memtotal
88 # after Linux deducts 1.5% to 9.5% for system table overhead
89 # Undo the unknown actual deduction by rounding up
90 # to next small multiple of a big power-of-two
91 # eg 12GB - 5.1% gets rounded back up to 12GB
92 mindeduct = 0.005 # 0.5 percent
93 maxdeduct = 0.095 # 9.5 percent
94 # deduction range 1.5% .. 9.5% supports physical mem sizes
95 # 6GB .. 12GB in steps of .5GB
96 # 12GB .. 24GB in steps of 1 GB
97 # 24GB .. 48GB in steps of 2 GB ...
98 # Finer granularity in physical mem sizes would require
99 # tighter spread between min and max possible deductions
100
101 # increase mem size by at least min deduction, without rounding
102 min_kbytes = int(usable_kbytes / (1.0 - mindeduct))
103 # increase mem size further by 2**n rounding, by 0..roundKb or more
104 round_kbytes = int(usable_kbytes / (1.0 - maxdeduct)) - min_kbytes
105 # find least binary roundup 2**n that covers worst-cast roundKb
106 mod2n = 1 << int(math.ceil(math.log(round_kbytes, 2)))
107 # have round_kbytes <= mod2n < round_kbytes*2
108 # round min_kbytes up to next multiple of mod2n
109 phys_kbytes = min_kbytes + mod2n - 1
110 phys_kbytes -= phys_kbytes % mod2n # clear low bits
111 self.phys_kbytes = phys_kbytes
112
113 def _GetMemoryInfo(self):
Ahmad Sharif4467f002012-12-20 12:09:49 -0800114 #TODO yunlian: when the machine in rebooting, it will not return
Ahmad Shariff395c262012-10-09 17:48:09 -0700115 #meminfo, the assert does not catch it either
Ahmad Shariff395c262012-10-09 17:48:09 -0700116 command = "cat /proc/meminfo"
Yunlian Jiang93950172015-12-10 13:20:48 -0800117 ret, self.meminfo, _ = self.ce.CrosRunCommand(
118 command, return_output=True,
119 machine=self.name,
120 chromeos_root=self.chromeos_root)
Ahmad Shariff395c262012-10-09 17:48:09 -0700121 assert ret == 0, "Could not get meminfo from machine: %s" % self.name
122 if ret == 0:
123 self._ParseMemoryInfo()
124
Ahmad Shariff395c262012-10-09 17:48:09 -0700125 def _GetCPUInfo(self):
Ahmad Shariff395c262012-10-09 17:48:09 -0700126 command = "cat /proc/cpuinfo"
Yunlian Jiang93950172015-12-10 13:20:48 -0800127 ret, self.cpuinfo, _ = self.ce.CrosRunCommand(
128 command, return_output=True,
129 machine=self.name,
130 chromeos_root=self.chromeos_root)
Ahmad Shariff395c262012-10-09 17:48:09 -0700131 assert ret == 0, "Could not get cpuinfo from machine: %s" % self.name
132 if ret == 0:
133 self._ParseCPUInfo(self.cpuinfo)
134
135 def _ComputeMachineChecksumString(self):
136 self.checksum_string = ""
137 exclude_lines_list = ["MHz", "BogoMIPS", "bogomips"]
138 for line in self.cpuinfo.splitlines():
139 if not any([e in line for e in exclude_lines_list]):
140 self.checksum_string += line
141 self.checksum_string += " " + str(self.phys_kbytes)
142
Ahmad Sharif4467f002012-12-20 12:09:49 -0800143 def _GetMD5Checksum(self, ss):
144 if ss:
145 return hashlib.md5(ss).hexdigest()
Ahmad Shariff395c262012-10-09 17:48:09 -0700146 else:
Ahmad Sharif4467f002012-12-20 12:09:49 -0800147 return ""
148
149 def _GetMachineID(self):
Luis Lozanof81680c2013-03-15 14:44:13 -0700150 command = "dump_vpd_log --full --stdout"
Yunlian Jiang93950172015-12-10 13:20:48 -0800151 _, if_out, _ = self.ce.CrosRunCommand(command, return_output=True,
152 machine=self.name,
153 chromeos_root=self.chromeos_root)
Ahmad Sharif4467f002012-12-20 12:09:49 -0800154 b = if_out.splitlines()
Luis Lozanof81680c2013-03-15 14:44:13 -0700155 a = [l for l in b if "Product" in l]
Yunlian Jiang9fc99192013-05-29 16:29:51 -0700156 if len(a):
157 self.machine_id = a[0]
158 return
159 command = "ifconfig"
Yunlian Jiang93950172015-12-10 13:20:48 -0800160 _, if_out, _ = self.ce.CrosRunCommand(command, return_output=True,
161 machine=self.name,
162 chromeos_root=self.chromeos_root)
Yunlian Jiang9fc99192013-05-29 16:29:51 -0700163 b = if_out.splitlines()
164 a = [l for l in b if "HWaddr" in l]
165 if len(a):
166 self.machine_id = "_".join(a)
167 return
cmtice870c1842013-11-27 11:17:57 -0800168 a = [l for l in b if "ether" in l]
169 if len(a):
170 self.machine_id = "_".join(a)
171 return
Yunlian Jiang9fc99192013-05-29 16:29:51 -0700172 assert 0, "Could not get machine_id from machine: %s" % self.name
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -0800173
174 def __str__(self):
175 l = []
176 l.append(self.name)
177 l.append(str(self.image))
178 l.append(str(self.checksum))
179 l.append(str(self.locked))
180 l.append(str(self.released_time))
181 return ", ".join(l)
182
183
184class MachineManager(object):
cmticee5bc63b2015-05-27 16:59:37 -0700185 """Lock, image and unlock machines locally for benchmark runs.
186
187 This class contains methods and calls to lock, unlock and image
188 machines and distribute machines to each benchmark run. The assumption is
189 that all of the machines for the experiment have been globally locked
190 (using an AFE server) in the ExperimentRunner, but the machines still need
191 to be locally locked/unlocked (allocated to benchmark runs) to prevent
192 multiple benchmark runs within the same experiment from trying to use the
193 same machine at the same time.
194 """
cmtice5c09fc22015-04-22 09:25:53 -0700195 def __init__(self, chromeos_root, acquire_timeout, log_level, locks_dir,
196 cmd_exec=None, lgr=None):
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -0800197 self._lock = threading.RLock()
198 self._all_machines = []
199 self._machines = []
200 self.image_lock = threading.Lock()
201 self.num_reimages = 0
202 self.chromeos_root = None
Ahmad Sharif4467f002012-12-20 12:09:49 -0800203 self.machine_checksum = {}
204 self.machine_checksum_string = {}
Luis Lozanof81680c2013-03-15 14:44:13 -0700205 self.acquire_timeout = acquire_timeout
cmtice13909242014-03-11 13:38:07 -0700206 self.log_level = log_level
cmtice517dc982015-06-12 12:22:32 -0700207 self.locks_dir = locks_dir
cmtice1505b6a2014-06-04 14:19:19 -0700208 self.ce = cmd_exec or command_executer.GetCommandExecuter(
209 log_level=self.log_level)
210 self.logger = lgr or logger.GetLogger()
Ahmad Sharif4467f002012-12-20 12:09:49 -0800211
cmtice517dc982015-06-12 12:22:32 -0700212 if self.locks_dir and not os.path.isdir(self.locks_dir):
213 raise MissingLocksDirectory("Cannot access locks directory: %s"
Yunlian Jiang93950172015-12-10 13:20:48 -0800214 % self.locks_dir)
cmtice517dc982015-06-12 12:22:32 -0700215
Ahmad Sharif4467f002012-12-20 12:09:49 -0800216 self._initialized_machines = []
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -0800217 self.chromeos_root = chromeos_root
218
cmticef3eb8032015-07-27 13:55:52 -0700219 def RemoveNonLockedMachines(self, locked_machines):
220 for m in self._all_machines:
221 if m.name not in locked_machines:
222 self._all_machines.remove(m)
223
224 for m in self._machines:
225 if m.name not in locked_machines:
226 self._machines.remove(m)
227
Caroline Tice31fedb02015-09-14 16:30:37 -0700228 def GetChromeVersion(self, machine):
229 """Get the version of Chrome running on the DUT."""
230
231 cmd = "/opt/google/chrome/chrome --version"
232 ret, version, _ = self.ce.CrosRunCommand(cmd, return_output=True,
233 machine=machine.name,
Caroline Tice31fedb02015-09-14 16:30:37 -0700234 chromeos_root=self.chromeos_root)
235 if ret != 0:
236 raise CrosCommandError("Couldn't get Chrome version from %s."
237 % machine.name)
238
239 if ret != 0:
240 version = ""
241 return version.rstrip()
242
Ahmad Sharif4467f002012-12-20 12:09:49 -0800243 def ImageMachine(self, machine, label):
Han Shenba649282015-08-05 17:19:55 -0700244 checksum = label.checksum
cmtice0cc4e772014-01-30 15:52:37 -0800245
246 if checksum and (machine.checksum == checksum):
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -0800247 return
Ahmad Sharif4467f002012-12-20 12:09:49 -0800248 chromeos_root = label.chromeos_root
Ahmad Shariff1d70cb2012-02-06 21:51:59 -0800249 if not chromeos_root:
250 chromeos_root = self.chromeos_root
Ahmad Sharif4467f002012-12-20 12:09:49 -0800251 image_chromeos_args = [image_chromeos.__file__,
cmticee5bc63b2015-05-27 16:59:37 -0700252 "--no_lock",
Ahmad Sharif4467f002012-12-20 12:09:49 -0800253 "--chromeos_root=%s" % chromeos_root,
254 "--image=%s" % label.chromeos_image,
255 "--image_args=%s" % label.image_args,
cmtice13909242014-03-11 13:38:07 -0700256 "--remote=%s" % machine.name,
257 "--logging_level=%s" % self.log_level]
Ahmad Sharif4467f002012-12-20 12:09:49 -0800258 if label.board:
259 image_chromeos_args.append("--board=%s" % label.board)
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -0800260
261 # Currently can't image two machines at once.
262 # So have to serialized on this lock.
cmtice1505b6a2014-06-04 14:19:19 -0700263 save_ce_log_level = self.ce.log_level
cmtice13909242014-03-11 13:38:07 -0700264 if self.log_level != "verbose":
cmtice1505b6a2014-06-04 14:19:19 -0700265 self.ce.log_level = "average"
266
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -0800267 with self.image_lock:
cmtice13909242014-03-11 13:38:07 -0700268 if self.log_level != "verbose":
cmtice1505b6a2014-06-04 14:19:19 -0700269 self.logger.LogOutput("Pushing image onto machine.")
Han Shenba649282015-08-05 17:19:55 -0700270 self.logger.LogOutput("Running image_chromeos.DoImage with %s"
Yunlian Jiang93950172015-12-10 13:20:48 -0800271 % " ".join(image_chromeos_args))
Han Shenba649282015-08-05 17:19:55 -0700272 retval = image_chromeos.DoImage(image_chromeos_args)
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -0800273 if retval:
cmtice1505b6a2014-06-04 14:19:19 -0700274 cmd = "reboot && exit"
cmtice13909242014-03-11 13:38:07 -0700275 if self.log_level != "verbose":
cmtice1505b6a2014-06-04 14:19:19 -0700276 self.logger.LogOutput("reboot & exit.")
277 self.ce.CrosRunCommand(cmd, machine=machine.name,
278 chromeos_root=self.chromeos_root)
Yunlian Jiang36f91ad2013-03-28 17:13:29 -0700279 time.sleep(60)
cmtice13909242014-03-11 13:38:07 -0700280 if self.log_level != "verbose":
cmtice1505b6a2014-06-04 14:19:19 -0700281 self.logger.LogOutput("Pushing image onto machine.")
Han Shenba649282015-08-05 17:19:55 -0700282 self.logger.LogOutput("Running image_chromeos.DoImage with %s"
Yunlian Jiang93950172015-12-10 13:20:48 -0800283 % " ".join(image_chromeos_args))
Han Shenba649282015-08-05 17:19:55 -0700284 retval = image_chromeos.DoImage(image_chromeos_args)
Yunlian Jiang36f91ad2013-03-28 17:13:29 -0700285 if retval:
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -0800286 raise Exception("Could not image machine: '%s'." % machine.name)
Ahmad Sharif4467f002012-12-20 12:09:49 -0800287 else:
288 self.num_reimages += 1
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -0800289 machine.checksum = checksum
Ahmad Sharif4467f002012-12-20 12:09:49 -0800290 machine.image = label.chromeos_image
Han Shenba649282015-08-05 17:19:55 -0700291 machine.label = label
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -0800292
Caroline Tice31fedb02015-09-14 16:30:37 -0700293 if not label.chrome_version:
294 label.chrome_version = self.GetChromeVersion(machine)
295
cmtice1505b6a2014-06-04 14:19:19 -0700296 self.ce.log_level = save_ce_log_level
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -0800297 return retval
298
Ahmad Sharif4467f002012-12-20 12:09:49 -0800299 def ComputeCommonCheckSum(self, label):
Caroline Tice5ea9f002015-09-02 12:36:47 -0700300 # Since this is used for cache lookups before the machines have been
301 # compared/verified, check here to make sure they all have the same
302 # checksum (otherwise the cache lookup may not be valid).
303 common_checksum = None
Ahmad Sharif4467f002012-12-20 12:09:49 -0800304 for machine in self.GetMachines(label):
Caroline Tice5ea9f002015-09-02 12:36:47 -0700305 # Make sure the machine's checksums are calculated.
306 if not machine.machine_checksum:
307 machine.SetUpChecksumInfo()
308 cs = machine.machine_checksum
309 # If this is the first machine we've examined, initialize
310 # common_checksum.
311 if not common_checksum:
312 common_checksum = cs
313 # Make sure this machine's checksum matches our 'common' checksum.
314 if cs != common_checksum:
315 raise BadChecksum("Machine checksums do not match!")
316 self.machine_checksum[label.name] = common_checksum
Ahmad Shariff395c262012-10-09 17:48:09 -0700317
Ahmad Sharif4467f002012-12-20 12:09:49 -0800318 def ComputeCommonCheckSumString(self, label):
Caroline Tice5ea9f002015-09-02 12:36:47 -0700319 # The assumption is that this function is only called AFTER
320 # ComputeCommonCheckSum, so there is no need to verify the machines
321 # are the same here. If this is ever changed, this function should be
322 # modified to verify that all the machines for a given label are the
323 # same.
Ahmad Sharif4467f002012-12-20 12:09:49 -0800324 for machine in self.GetMachines(label):
Ahmad Shariff395c262012-10-09 17:48:09 -0700325 if machine.checksum_string:
Ahmad Sharif4467f002012-12-20 12:09:49 -0800326 self.machine_checksum_string[label.name] = machine.checksum_string
Ahmad Shariff395c262012-10-09 17:48:09 -0700327 break
328
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -0800329 def _TryToLockMachine(self, cros_machine):
330 with self._lock:
331 assert cros_machine, "Machine can't be None"
332 for m in self._machines:
Luis Lozanof81680c2013-03-15 14:44:13 -0700333 if m.name == cros_machine.name:
334 return
cmtice517dc982015-06-12 12:22:32 -0700335 locked = True
336 if self.locks_dir:
337 locked = file_lock_machine.Machine(cros_machine.name,
338 self.locks_dir).Lock(True,
339 sys.argv[0])
340 if locked:
341 self._machines.append(cros_machine)
342 command = "cat %s" % CHECKSUM_FILE
343 ret, out, _ = self.ce.CrosRunCommand(
344 command, return_output=True, chromeos_root=self.chromeos_root,
345 machine=cros_machine.name)
346 if ret == 0:
347 cros_machine.checksum = out.strip()
348 elif self.locks_dir:
349 self.logger.LogOutput("Couldn't lock: %s" % cros_machine.name)
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -0800350
351 # This is called from single threaded mode.
352 def AddMachine(self, machine_name):
353 with self._lock:
354 for m in self._all_machines:
355 assert m.name != machine_name, "Tried to double-add %s" % machine_name
Han Shen98eacdf2015-12-07 16:00:43 -0800356
357 if self.log_level != "verbose":
358 self.logger.LogOutput("Setting up remote access to %s" % machine_name)
359 self.logger.LogOutput(
360 "Checking machine characteristics for %s" % machine_name)
cmtice13909242014-03-11 13:38:07 -0700361 cm = CrosMachine(machine_name, self.chromeos_root, self.log_level)
Yunlian Jiang837e07a2013-05-22 16:23:28 -0700362 if cm.machine_checksum:
363 self._all_machines.append(cm)
Ahmad Shariff395c262012-10-09 17:48:09 -0700364
cmtice1505b6a2014-06-04 14:19:19 -0700365
Yunlian Jiange5b673f2013-05-23 11:42:53 -0700366 def RemoveMachine(self, machine_name):
367 with self._lock:
368 self._machines = [m for m in self._machines
369 if m.name != machine_name]
cmtice517dc982015-06-12 12:22:32 -0700370 if self.locks_dir:
371 res = file_lock_machine.Machine(machine_name,
372 self.locks_dir).Unlock(True)
373 if not res:
374 self.logger.LogError("Could not unlock machine: '%s'."
375 % machine_name)
Yunlian Jiange5b673f2013-05-23 11:42:53 -0700376
cmtice798a8fa2014-05-12 13:56:42 -0700377 def ForceSameImageToAllMachines(self, label):
378 machines = self.GetMachines(label)
cmtice798a8fa2014-05-12 13:56:42 -0700379 for m in machines:
380 self.ImageMachine(m, label)
381 m.SetUpChecksumInfo()
382
Yunlian Jiang93950172015-12-10 13:20:48 -0800383 def AcquireMachine(self, label):
Han Shenba649282015-08-05 17:19:55 -0700384 image_checksum = label.checksum
Ahmad Sharif4467f002012-12-20 12:09:49 -0800385 machines = self.GetMachines(label)
Luis Lozanof81680c2013-03-15 14:44:13 -0700386 check_interval_time = 120
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -0800387 with self._lock:
388 # Lazily external lock machines
Luis Lozanof81680c2013-03-15 14:44:13 -0700389 while self.acquire_timeout >= 0:
390 for m in machines:
391 new_machine = m not in self._all_machines
Ahmad Sharif4467f002012-12-20 12:09:49 -0800392 self._TryToLockMachine(m)
Luis Lozanof81680c2013-03-15 14:44:13 -0700393 if new_machine:
394 m.released_time = time.time()
Luis Lozanof81680c2013-03-15 14:44:13 -0700395 if self.GetAvailableMachines(label):
396 break
397 else:
398 sleep_time = max(1, min(self.acquire_timeout, check_interval_time))
399 time.sleep(sleep_time)
400 self.acquire_timeout -= sleep_time
401
402 if self.acquire_timeout < 0:
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -0800403 machine_names = []
Ahmad Sharif4467f002012-12-20 12:09:49 -0800404 for machine in machines:
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -0800405 machine_names.append(machine.name)
cmtice1505b6a2014-06-04 14:19:19 -0700406 self.logger.LogFatal("Could not acquire any of the "
407 "following machines: '%s'"
408 % ", ".join(machine_names))
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -0800409
410### for m in self._machines:
411### if (m.locked and time.time() - m.released_time < 10 and
412### m.checksum == image_checksum):
413### return None
Ahmad Sharif4467f002012-12-20 12:09:49 -0800414 for m in [machine for machine in self.GetAvailableMachines(label)
415 if not machine.locked]:
cmtice0cc4e772014-01-30 15:52:37 -0800416 if image_checksum and (m.checksum == image_checksum):
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -0800417 m.locked = True
Yunlian Jiang04dc5dc2013-04-23 15:05:05 -0700418 m.test_run = threading.current_thread()
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -0800419 return m
Ahmad Sharif4467f002012-12-20 12:09:49 -0800420 for m in [machine for machine in self.GetAvailableMachines(label)
421 if not machine.locked]:
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -0800422 if not m.checksum:
423 m.locked = True
Yunlian Jiang04dc5dc2013-04-23 15:05:05 -0700424 m.test_run = threading.current_thread()
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -0800425 return m
426 # This logic ensures that threads waiting on a machine will get a machine
427 # with a checksum equal to their image over other threads. This saves time
428 # when crosperf initially assigns the machines to threads by minimizing
429 # the number of re-images.
430 # TODO(asharif): If we centralize the thread-scheduler, we wont need this
431 # code and can implement minimal reimaging code more cleanly.
Ahmad Sharif4467f002012-12-20 12:09:49 -0800432 for m in [machine for machine in self.GetAvailableMachines(label)
433 if not machine.locked]:
Yunlian Jianga8446712015-02-26 10:25:11 -0800434 if time.time() - m.released_time > 15:
435 # The release time gap is too large, so it is probably in the start
436 # stage, we need to reset the released_time.
437 m.released_time = time.time()
438 elif time.time() - m.released_time > 8:
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -0800439 m.locked = True
Yunlian Jiang04dc5dc2013-04-23 15:05:05 -0700440 m.test_run = threading.current_thread()
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -0800441 return m
442 return None
443
Ahmad Sharif4467f002012-12-20 12:09:49 -0800444 def GetAvailableMachines(self, label=None):
445 if not label:
446 return self._machines
447 return [m for m in self._machines if m.name in label.remote]
448
449 def GetMachines(self, label=None):
450 if not label:
451 return self._all_machines
452 return [m for m in self._all_machines if m.name in label.remote]
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -0800453
454 def ReleaseMachine(self, machine):
455 with self._lock:
456 for m in self._machines:
457 if machine.name == m.name:
458 assert m.locked == True, "Tried to double-release %s" % m.name
459 m.released_time = time.time()
460 m.locked = False
461 m.status = "Available"
462 break
463
cmtice517dc982015-06-12 12:22:32 -0700464 def Cleanup(self):
465 with self._lock:
466 # Unlock all machines (via file lock)
467 for m in self._machines:
468 res = file_lock_machine.Machine(m.name, self.locks_dir).Unlock(True)
469
470 if not res:
471 self.logger.LogError("Could not unlock machine: '%s'."
472 % m.name)
473
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -0800474 def __str__(self):
475 with self._lock:
476 l = ["MachineManager Status:"]
477 for m in self._machines:
478 l.append(str(m))
479 return "\n".join(l)
480
481 def AsString(self):
482 with self._lock:
483 stringify_fmt = "%-30s %-10s %-4s %-25s %-32s"
484 header = stringify_fmt % ("Machine", "Thread", "Lock", "Status",
485 "Checksum")
486 table = [header]
487 for m in self._machines:
Yunlian Jiang04dc5dc2013-04-23 15:05:05 -0700488 if m.test_run:
489 test_name = m.test_run.name
490 test_status = m.test_run.timeline.GetLastEvent()
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -0800491 else:
Yunlian Jiang04dc5dc2013-04-23 15:05:05 -0700492 test_name = ""
493 test_status = ""
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -0800494
495 try:
496 machine_string = stringify_fmt % (m.name,
Yunlian Jiang04dc5dc2013-04-23 15:05:05 -0700497 test_name,
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -0800498 m.locked,
Yunlian Jiang04dc5dc2013-04-23 15:05:05 -0700499 test_status,
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -0800500 m.checksum)
Yunlian Jiangc152cdb2015-12-07 09:39:26 -0800501 except ValueError:
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -0800502 machine_string = ""
503 table.append(machine_string)
504 return "Machine Status:\n%s" % "\n".join(table)
505
Ahmad Sharif4467f002012-12-20 12:09:49 -0800506 def GetAllCPUInfo(self, labels):
507 """Get cpuinfo for labels, merge them if their cpuinfo are the same."""
508 dic = {}
509 for label in labels:
510 for machine in self._all_machines:
511 if machine.name in label.remote:
512 if machine.cpuinfo not in dic:
513 dic[machine.cpuinfo] = [label.name]
514 else:
515 dic[machine.cpuinfo].append(label.name)
516 break
517 output = ""
518 for key, v in dic.items():
519 output += " ".join(v)
520 output += "\n-------------------\n"
521 output += key
522 output += "\n\n\n"
523 return output
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -0800524
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -0800525
Ahmad Sharif4467f002012-12-20 12:09:49 -0800526class MockCrosMachine(CrosMachine):
Yunlian Jiang93950172015-12-10 13:20:48 -0800527 """Mock cros machine class."""
528 # pylint: disable=super-init-not-called
Caroline Tice7057cf62015-12-10 12:09:40 -0800529
530 MEMINFO_STRING = """MemTotal: 3990332 kB
531MemFree: 2608396 kB
532Buffers: 147168 kB
533Cached: 811560 kB
534SwapCached: 0 kB
535Active: 503480 kB
536Inactive: 628572 kB
537Active(anon): 174532 kB
538Inactive(anon): 88576 kB
539Active(file): 328948 kB
540Inactive(file): 539996 kB
541Unevictable: 0 kB
542Mlocked: 0 kB
543SwapTotal: 5845212 kB
544SwapFree: 5845212 kB
545Dirty: 9384 kB
546Writeback: 0 kB
547AnonPages: 173408 kB
548Mapped: 146268 kB
549Shmem: 89676 kB
550Slab: 188260 kB
551SReclaimable: 169208 kB
552SUnreclaim: 19052 kB
553KernelStack: 2032 kB
554PageTables: 7120 kB
555NFS_Unstable: 0 kB
556Bounce: 0 kB
557WritebackTmp: 0 kB
558CommitLimit: 7840376 kB
559Committed_AS: 1082032 kB
560VmallocTotal: 34359738367 kB
561VmallocUsed: 364980 kB
562VmallocChunk: 34359369407 kB
563DirectMap4k: 45824 kB
564DirectMap2M: 4096000 kB
565"""
566
567 CPUINFO_STRING = """processor: 0
568vendor_id: GenuineIntel
569cpu family: 6
570model: 42
571model name: Intel(R) Celeron(R) CPU 867 @ 1.30GHz
572stepping: 7
573microcode: 0x25
574cpu MHz: 1300.000
575cache size: 2048 KB
576physical id: 0
577siblings: 2
578core id: 0
579cpu cores: 2
580apicid: 0
581initial apicid: 0
582fpu: yes
583fpu_exception: yes
584cpuid level: 13
585wp: yes
586flags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx rdtscp lm constant_tsc arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc aperfmperf pni pclmulqdq dtes64 monitor ds_cpl vmx est tm2 ssse3 cx16 xtpr pdcm pcid sse4_1 sse4_2 x2apic popcnt tsc_deadline_timer xsave lahf_lm arat epb xsaveopt pln pts dts tpr_shadow vnmi flexpriority ept vpid
587bogomips: 2594.17
588clflush size: 64
589cache_alignment: 64
590address sizes: 36 bits physical, 48 bits virtual
591power management:
592
593processor: 1
594vendor_id: GenuineIntel
595cpu family: 6
596model: 42
597model name: Intel(R) Celeron(R) CPU 867 @ 1.30GHz
598stepping: 7
599microcode: 0x25
600cpu MHz: 1300.000
601cache size: 2048 KB
602physical id: 0
603siblings: 2
604core id: 1
605cpu cores: 2
606apicid: 2
607initial apicid: 2
608fpu: yes
609fpu_exception: yes
610cpuid level: 13
611wp: yes
612flags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx rdtscp lm constant_tsc arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc aperfmperf pni pclmulqdq dtes64 monitor ds_cpl vmx est tm2 ssse3 cx16 xtpr pdcm pcid sse4_1 sse4_2 x2apic popcnt tsc_deadline_timer xsave lahf_lm arat epb xsaveopt pln pts dts tpr_shadow vnmi flexpriority ept vpid
613bogomips: 2594.17
614clflush size: 64
615cache_alignment: 64
616address sizes: 36 bits physical, 48 bits virtual
617power management:
618"""
619
cmtice13909242014-03-11 13:38:07 -0700620 def __init__(self, name, chromeos_root, log_level):
Ahmad Sharif4467f002012-12-20 12:09:49 -0800621 self.name = name
622 self.image = None
623 self.checksum = None
624 self.locked = False
625 self.released_time = time.time()
Yunlian Jiang04dc5dc2013-04-23 15:05:05 -0700626 self.test_run = None
Ahmad Sharif4467f002012-12-20 12:09:49 -0800627 self.chromeos_root = chromeos_root
Yunlian Jiang93950172015-12-10 13:20:48 -0800628 self.checksum_string = re.sub(r"\d", "", name)
Ahmad Sharif4467f002012-12-20 12:09:49 -0800629 #In test, we assume "lumpy1", "lumpy2" are the same machine.
Yunlian Jiang93950172015-12-10 13:20:48 -0800630 self.machine_checksum = self._GetMD5Checksum(self.checksum_string)
cmtice13909242014-03-11 13:38:07 -0700631 self.log_level = log_level
Han Shene0662972015-09-18 16:53:34 -0700632 self.label = None
Caroline Tice7057cf62015-12-10 12:09:40 -0800633 self.ce = command_executer.GetCommandExecuter(log_level=self.log_level)
Ahmad Sharif4467f002012-12-20 12:09:49 -0800634
cmticec454cee2014-04-09 10:58:51 -0700635 def IsReachable(self):
636 return True
637
Caroline Tice7057cf62015-12-10 12:09:40 -0800638 def _GetMemoryInfo(self):
639 self.meminfo = self.MEMINFO_STRING
640 self._ParseMemoryInfo()
641
642 def _GetCPUInfo(self):
643 self.cpuinfo = self.CPUINFO_STRING
Ahmad Sharif4467f002012-12-20 12:09:49 -0800644
645class MockMachineManager(MachineManager):
Yunlian Jiang93950172015-12-10 13:20:48 -0800646 """Mock machine manager class."""
647 def __init__(self, chromeos_root, acquire_timeout,
648 log_level):
649 super(MockMachineManager, self).__init__(
650 chromeos_root, acquire_timeout,
651 log_level,
652 file_lock_machine.Machine.LOCKS_DIR)
Ahmad Sharif4467f002012-12-20 12:09:49 -0800653
654 def _TryToLockMachine(self, cros_machine):
655 self._machines.append(cros_machine)
656 cros_machine.checksum = ""
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -0800657
658 def AddMachine(self, machine_name):
Ahmad Sharif4467f002012-12-20 12:09:49 -0800659 with self._lock:
660 for m in self._all_machines:
661 assert m.name != machine_name, "Tried to double-add %s" % machine_name
cmtice13909242014-03-11 13:38:07 -0700662 cm = MockCrosMachine(machine_name, self.chromeos_root, self.log_level)
Ahmad Sharif4467f002012-12-20 12:09:49 -0800663 assert cm.machine_checksum, ("Could not find checksum for machine %s" %
664 machine_name)
Han Shen624bae72015-09-17 15:07:31 -0700665 # In Original MachineManager, the test is 'if cm.machine_checksum:' - if a
666 # machine is unreachable, then its machine_checksum is None. Here we
667 # cannot do this, because machine_checksum is always faked, so we directly
668 # test cm.IsReachable, which is properly mocked.
669 if cm.IsReachable():
670 self._all_machines.append(cm)
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -0800671
Caroline Tice7057cf62015-12-10 12:09:40 -0800672 def GetChromeVersion(self, machine):
673 return "Mock Chrome Version R50"
674
Yunlian Jiang93950172015-12-10 13:20:48 -0800675 def AcquireMachine(self, label):
Ahmad Sharif4467f002012-12-20 12:09:49 -0800676 for machine in self._all_machines:
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -0800677 if not machine.locked:
678 machine.locked = True
679 return machine
680 return None
681
Ahmad Sharif4467f002012-12-20 12:09:49 -0800682 def ImageMachine(self, machine_name, label):
Yunlian Jiang93950172015-12-10 13:20:48 -0800683 if machine_name or label:
684 return 0
Ahmad Sharif4467f002012-12-20 12:09:49 -0800685
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -0800686 def ReleaseMachine(self, machine):
687 machine.locked = False
688
Yunlian Jiang93950172015-12-10 13:20:48 -0800689 def GetMachines(self, label=None):
Ahmad Sharif4467f002012-12-20 12:09:49 -0800690 return self._all_machines
691
Yunlian Jiang93950172015-12-10 13:20:48 -0800692 def GetAvailableMachines(self, label=None):
Ahmad Sharif4467f002012-12-20 12:09:49 -0800693 return self._all_machines
Caroline Tice7057cf62015-12-10 12:09:40 -0800694
695 def ForceSameImageToAllMachines(self, label):
696 return 0
697
698 def ComputeCommonCheckSum(self, label):
699 common_checksum = 12345
700 for machine in self.GetMachines(label):
701 machine.machine_checksum = common_checksum
702 self.machine_checksum[label.name] = common_checksum