blob: 5c86afe592dcf92ef292c90d815044061268cff8 [file] [log] [blame]
Ahmad Sharif4467f002012-12-20 12:09:49 -08001#!/usr/bin/python
Yunlian Jiang36f91ad2013-03-28 17:13:29 -07002
3# Copyright (c) 2013 The Chromium OS Authors. All rights reserved.
4# Use of this source code is governed by a BSD-style license that can be
5# found in the LICENSE file.
Ahmad Sharif4467f002012-12-20 12:09:49 -08006
Ahmad Shariff395c262012-10-09 17:48:09 -07007import hashlib
8import image_chromeos
cmtice517dc982015-06-12 12:22:32 -07009import file_lock_machine
Ahmad Shariff395c262012-10-09 17:48:09 -070010import math
11import os.path
Ahmad Sharif4467f002012-12-20 12:09:49 -080012import re
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -080013import sys
14import threading
15import time
Ahmad Sharif4467f002012-12-20 12:09:49 -080016
cmtice1505b6a2014-06-04 14:19:19 -070017
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -080018from utils import command_executer
19from utils import logger
cmtice5c09fc22015-04-22 09:25:53 -070020from utils import misc
Ahmad Shariff1d70cb2012-02-06 21:51:59 -080021from utils.file_utils import FileUtils
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -080022
23CHECKSUM_FILE = "/usr/local/osimage_checksum_file"
24
Caroline Tice5ea9f002015-09-02 12:36:47 -070025class BadChecksum(Exception):
26 """Raised if all machines for a label don't have the same checksum."""
27 pass
28
29class BadChecksumString(Exception):
30 """Raised if all machines for a label don't have the same checksum string."""
cmtice798a8fa2014-05-12 13:56:42 -070031 pass
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -080032
cmtice5c09fc22015-04-22 09:25:53 -070033class MissingLocksDirectory(Exception):
34 """Raised when cannot find/access the machine locks directory."""
35
Caroline Tice31fedb02015-09-14 16:30:37 -070036class CrosCommandError(Exception):
37 """Raised when an error occurs running command on DUT."""
38
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -080039class CrosMachine(object):
cmtice1505b6a2014-06-04 14:19:19 -070040 def __init__(self, name, chromeos_root, log_level, cmd_exec=None):
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -080041 self.name = name
42 self.image = None
Han Shenba649282015-08-05 17:19:55 -070043 # We relate a dut with a label if we reimage the dut using label or we
44 # detect at the very beginning that the dut is running this label.
45 self.label = None
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -080046 self.checksum = None
47 self.locked = False
48 self.released_time = time.time()
Yunlian Jiang04dc5dc2013-04-23 15:05:05 -070049 self.test_run = None
Ahmad Shariff395c262012-10-09 17:48:09 -070050 self.chromeos_root = chromeos_root
cmtice13909242014-03-11 13:38:07 -070051 self.log_level = log_level
cmtice1505b6a2014-06-04 14:19:19 -070052 self.ce = cmd_exec or command_executer.GetCommandExecuter(
53 log_level=self.log_level)
cmtice798a8fa2014-05-12 13:56:42 -070054 self.SetUpChecksumInfo()
55
56 def SetUpChecksumInfo(self):
Yunlian Jiange5b673f2013-05-23 11:42:53 -070057 if not self.IsReachable():
Yunlian Jiang837e07a2013-05-22 16:23:28 -070058 self.machine_checksum = None
59 return
Ahmad Shariff395c262012-10-09 17:48:09 -070060 self._GetMemoryInfo()
61 self._GetCPUInfo()
62 self._ComputeMachineChecksumString()
Ahmad Sharif4467f002012-12-20 12:09:49 -080063 self._GetMachineID()
64 self.machine_checksum = self._GetMD5Checksum(self.checksum_string)
65 self.machine_id_checksum = self._GetMD5Checksum(self.machine_id)
Ahmad Shariff395c262012-10-09 17:48:09 -070066
Yunlian Jiange5b673f2013-05-23 11:42:53 -070067 def IsReachable(self):
Yunlian Jiang837e07a2013-05-22 16:23:28 -070068 command = "ls"
cmtice1505b6a2014-06-04 14:19:19 -070069 ret = self.ce.CrosRunCommand(command,
70 machine=self.name,
71 chromeos_root=self.chromeos_root)
Yunlian Jiang837e07a2013-05-22 16:23:28 -070072 if ret:
73 return False
74 return True
75
Ahmad Shariff395c262012-10-09 17:48:09 -070076 def _ParseMemoryInfo(self):
77 line = self.meminfo.splitlines()[0]
78 usable_kbytes = int(line.split()[1])
Yunlian Jiang04dc5dc2013-04-23 15:05:05 -070079 # This code is from src/third_party/test/files/client/bin/base_utils.py
Ahmad Shariff395c262012-10-09 17:48:09 -070080 # usable_kbytes is system's usable DRAM in kbytes,
81 # as reported by memtotal() from device /proc/meminfo memtotal
82 # after Linux deducts 1.5% to 9.5% for system table overhead
83 # Undo the unknown actual deduction by rounding up
84 # to next small multiple of a big power-of-two
85 # eg 12GB - 5.1% gets rounded back up to 12GB
86 mindeduct = 0.005 # 0.5 percent
87 maxdeduct = 0.095 # 9.5 percent
88 # deduction range 1.5% .. 9.5% supports physical mem sizes
89 # 6GB .. 12GB in steps of .5GB
90 # 12GB .. 24GB in steps of 1 GB
91 # 24GB .. 48GB in steps of 2 GB ...
92 # Finer granularity in physical mem sizes would require
93 # tighter spread between min and max possible deductions
94
95 # increase mem size by at least min deduction, without rounding
96 min_kbytes = int(usable_kbytes / (1.0 - mindeduct))
97 # increase mem size further by 2**n rounding, by 0..roundKb or more
98 round_kbytes = int(usable_kbytes / (1.0 - maxdeduct)) - min_kbytes
99 # find least binary roundup 2**n that covers worst-cast roundKb
100 mod2n = 1 << int(math.ceil(math.log(round_kbytes, 2)))
101 # have round_kbytes <= mod2n < round_kbytes*2
102 # round min_kbytes up to next multiple of mod2n
103 phys_kbytes = min_kbytes + mod2n - 1
104 phys_kbytes -= phys_kbytes % mod2n # clear low bits
105 self.phys_kbytes = phys_kbytes
106
107 def _GetMemoryInfo(self):
Ahmad Sharif4467f002012-12-20 12:09:49 -0800108 #TODO yunlian: when the machine in rebooting, it will not return
Ahmad Shariff395c262012-10-09 17:48:09 -0700109 #meminfo, the assert does not catch it either
Ahmad Shariff395c262012-10-09 17:48:09 -0700110 command = "cat /proc/meminfo"
cmtice1505b6a2014-06-04 14:19:19 -0700111 ret, self.meminfo, _ = self.ce.CrosRunCommand(command, return_output=True,
112 machine=self.name,
cmtice1505b6a2014-06-04 14:19:19 -0700113 chromeos_root=self.chromeos_root)
Ahmad Shariff395c262012-10-09 17:48:09 -0700114 assert ret == 0, "Could not get meminfo from machine: %s" % self.name
115 if ret == 0:
116 self._ParseMemoryInfo()
117
118 #cpuinfo format is different across architecture
119 #need to find a better way to parse it.
cmtice1505b6a2014-06-04 14:19:19 -0700120 def _ParseCPUInfo(self, cpuinfo):
Ahmad Shariff395c262012-10-09 17:48:09 -0700121 return 0
122
123 def _GetCPUInfo(self):
Ahmad Shariff395c262012-10-09 17:48:09 -0700124 command = "cat /proc/cpuinfo"
cmtice1505b6a2014-06-04 14:19:19 -0700125 ret, self.cpuinfo, _ = self.ce.CrosRunCommand(command, return_output=True,
126 machine=self.name,
cmtice1505b6a2014-06-04 14:19:19 -0700127 chromeos_root=self.chromeos_root)
Ahmad Shariff395c262012-10-09 17:48:09 -0700128 assert ret == 0, "Could not get cpuinfo from machine: %s" % self.name
129 if ret == 0:
130 self._ParseCPUInfo(self.cpuinfo)
131
132 def _ComputeMachineChecksumString(self):
133 self.checksum_string = ""
134 exclude_lines_list = ["MHz", "BogoMIPS", "bogomips"]
135 for line in self.cpuinfo.splitlines():
136 if not any([e in line for e in exclude_lines_list]):
137 self.checksum_string += line
138 self.checksum_string += " " + str(self.phys_kbytes)
139
Ahmad Sharif4467f002012-12-20 12:09:49 -0800140 def _GetMD5Checksum(self, ss):
141 if ss:
142 return hashlib.md5(ss).hexdigest()
Ahmad Shariff395c262012-10-09 17:48:09 -0700143 else:
Ahmad Sharif4467f002012-12-20 12:09:49 -0800144 return ""
145
146 def _GetMachineID(self):
Luis Lozanof81680c2013-03-15 14:44:13 -0700147 command = "dump_vpd_log --full --stdout"
cmtice1505b6a2014-06-04 14:19:19 -0700148 ret, if_out, _ = self.ce.CrosRunCommand(command, return_output=True,
149 machine=self.name,
150 chromeos_root=self.chromeos_root)
Ahmad Sharif4467f002012-12-20 12:09:49 -0800151 b = if_out.splitlines()
Luis Lozanof81680c2013-03-15 14:44:13 -0700152 a = [l for l in b if "Product" in l]
Yunlian Jiang9fc99192013-05-29 16:29:51 -0700153 if len(a):
154 self.machine_id = a[0]
155 return
156 command = "ifconfig"
cmtice1505b6a2014-06-04 14:19:19 -0700157 ret, if_out, _ = self.ce.CrosRunCommand(command, return_output=True,
158 machine=self.name,
159 chromeos_root=self.chromeos_root)
Yunlian Jiang9fc99192013-05-29 16:29:51 -0700160 b = if_out.splitlines()
161 a = [l for l in b if "HWaddr" in l]
162 if len(a):
163 self.machine_id = "_".join(a)
164 return
cmtice870c1842013-11-27 11:17:57 -0800165 a = [l for l in b if "ether" in l]
166 if len(a):
167 self.machine_id = "_".join(a)
168 return
Yunlian Jiang9fc99192013-05-29 16:29:51 -0700169 assert 0, "Could not get machine_id from machine: %s" % self.name
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -0800170
171 def __str__(self):
172 l = []
173 l.append(self.name)
174 l.append(str(self.image))
175 l.append(str(self.checksum))
176 l.append(str(self.locked))
177 l.append(str(self.released_time))
178 return ", ".join(l)
179
180
181class MachineManager(object):
cmticee5bc63b2015-05-27 16:59:37 -0700182 """Lock, image and unlock machines locally for benchmark runs.
183
184 This class contains methods and calls to lock, unlock and image
185 machines and distribute machines to each benchmark run. The assumption is
186 that all of the machines for the experiment have been globally locked
187 (using an AFE server) in the ExperimentRunner, but the machines still need
188 to be locally locked/unlocked (allocated to benchmark runs) to prevent
189 multiple benchmark runs within the same experiment from trying to use the
190 same machine at the same time.
191 """
cmtice5c09fc22015-04-22 09:25:53 -0700192 def __init__(self, chromeos_root, acquire_timeout, log_level, locks_dir,
193 cmd_exec=None, lgr=None):
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -0800194 self._lock = threading.RLock()
195 self._all_machines = []
196 self._machines = []
197 self.image_lock = threading.Lock()
198 self.num_reimages = 0
199 self.chromeos_root = None
Ahmad Sharif4467f002012-12-20 12:09:49 -0800200 self.machine_checksum = {}
201 self.machine_checksum_string = {}
Luis Lozanof81680c2013-03-15 14:44:13 -0700202 self.acquire_timeout = acquire_timeout
cmtice13909242014-03-11 13:38:07 -0700203 self.log_level = log_level
cmtice517dc982015-06-12 12:22:32 -0700204 self.locks_dir = locks_dir
cmtice1505b6a2014-06-04 14:19:19 -0700205 self.ce = cmd_exec or command_executer.GetCommandExecuter(
206 log_level=self.log_level)
207 self.logger = lgr or logger.GetLogger()
Ahmad Sharif4467f002012-12-20 12:09:49 -0800208
cmtice517dc982015-06-12 12:22:32 -0700209 if self.locks_dir and not os.path.isdir(self.locks_dir):
210 raise MissingLocksDirectory("Cannot access locks directory: %s"
211 % self.locks_dir)
212
Ahmad Sharif4467f002012-12-20 12:09:49 -0800213 self._initialized_machines = []
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -0800214 self.chromeos_root = chromeos_root
215
cmticef3eb8032015-07-27 13:55:52 -0700216 def RemoveNonLockedMachines(self, locked_machines):
217 for m in self._all_machines:
218 if m.name not in locked_machines:
219 self._all_machines.remove(m)
220
221 for m in self._machines:
222 if m.name not in locked_machines:
223 self._machines.remove(m)
224
Caroline Tice31fedb02015-09-14 16:30:37 -0700225 def GetChromeVersion(self, machine):
226 """Get the version of Chrome running on the DUT."""
227
228 cmd = "/opt/google/chrome/chrome --version"
229 ret, version, _ = self.ce.CrosRunCommand(cmd, return_output=True,
230 machine=machine.name,
Caroline Tice31fedb02015-09-14 16:30:37 -0700231 chromeos_root=self.chromeos_root)
232 if ret != 0:
233 raise CrosCommandError("Couldn't get Chrome version from %s."
234 % machine.name)
235
236 if ret != 0:
237 version = ""
238 return version.rstrip()
239
Ahmad Sharif4467f002012-12-20 12:09:49 -0800240 def ImageMachine(self, machine, label):
Han Shenba649282015-08-05 17:19:55 -0700241 checksum = label.checksum
cmtice0cc4e772014-01-30 15:52:37 -0800242
243 if checksum and (machine.checksum == checksum):
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -0800244 return
Ahmad Sharif4467f002012-12-20 12:09:49 -0800245 chromeos_root = label.chromeos_root
Ahmad Shariff1d70cb2012-02-06 21:51:59 -0800246 if not chromeos_root:
247 chromeos_root = self.chromeos_root
Ahmad Sharif4467f002012-12-20 12:09:49 -0800248 image_chromeos_args = [image_chromeos.__file__,
cmticee5bc63b2015-05-27 16:59:37 -0700249 "--no_lock",
Ahmad Sharif4467f002012-12-20 12:09:49 -0800250 "--chromeos_root=%s" % chromeos_root,
251 "--image=%s" % label.chromeos_image,
252 "--image_args=%s" % label.image_args,
cmtice13909242014-03-11 13:38:07 -0700253 "--remote=%s" % machine.name,
254 "--logging_level=%s" % self.log_level]
Ahmad Sharif4467f002012-12-20 12:09:49 -0800255 if label.board:
256 image_chromeos_args.append("--board=%s" % label.board)
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -0800257
258 # Currently can't image two machines at once.
259 # So have to serialized on this lock.
cmtice1505b6a2014-06-04 14:19:19 -0700260 save_ce_log_level = self.ce.log_level
cmtice13909242014-03-11 13:38:07 -0700261 if self.log_level != "verbose":
cmtice1505b6a2014-06-04 14:19:19 -0700262 self.ce.log_level = "average"
263
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -0800264 with self.image_lock:
cmtice13909242014-03-11 13:38:07 -0700265 if self.log_level != "verbose":
cmtice1505b6a2014-06-04 14:19:19 -0700266 self.logger.LogOutput("Pushing image onto machine.")
Han Shenba649282015-08-05 17:19:55 -0700267 self.logger.LogOutput("Running image_chromeos.DoImage with %s"
cmtice13909242014-03-11 13:38:07 -0700268 % " ".join(image_chromeos_args))
Han Shenba649282015-08-05 17:19:55 -0700269 retval = image_chromeos.DoImage(image_chromeos_args)
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -0800270 if retval:
cmtice1505b6a2014-06-04 14:19:19 -0700271 cmd = "reboot && exit"
cmtice13909242014-03-11 13:38:07 -0700272 if self.log_level != "verbose":
cmtice1505b6a2014-06-04 14:19:19 -0700273 self.logger.LogOutput("reboot & exit.")
274 self.ce.CrosRunCommand(cmd, machine=machine.name,
275 chromeos_root=self.chromeos_root)
Yunlian Jiang36f91ad2013-03-28 17:13:29 -0700276 time.sleep(60)
cmtice13909242014-03-11 13:38:07 -0700277 if self.log_level != "verbose":
cmtice1505b6a2014-06-04 14:19:19 -0700278 self.logger.LogOutput("Pushing image onto machine.")
Han Shenba649282015-08-05 17:19:55 -0700279 self.logger.LogOutput("Running image_chromeos.DoImage with %s"
cmtice13909242014-03-11 13:38:07 -0700280 % " ".join(image_chromeos_args))
Han Shenba649282015-08-05 17:19:55 -0700281 retval = image_chromeos.DoImage(image_chromeos_args)
Yunlian Jiang36f91ad2013-03-28 17:13:29 -0700282 if retval:
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -0800283 raise Exception("Could not image machine: '%s'." % machine.name)
Ahmad Sharif4467f002012-12-20 12:09:49 -0800284 else:
285 self.num_reimages += 1
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -0800286 machine.checksum = checksum
Ahmad Sharif4467f002012-12-20 12:09:49 -0800287 machine.image = label.chromeos_image
Han Shenba649282015-08-05 17:19:55 -0700288 machine.label = label
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -0800289
Caroline Tice31fedb02015-09-14 16:30:37 -0700290 if not label.chrome_version:
291 label.chrome_version = self.GetChromeVersion(machine)
292
cmtice1505b6a2014-06-04 14:19:19 -0700293 self.ce.log_level = save_ce_log_level
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -0800294 return retval
295
Ahmad Sharif4467f002012-12-20 12:09:49 -0800296 def ComputeCommonCheckSum(self, label):
Caroline Tice5ea9f002015-09-02 12:36:47 -0700297 # Since this is used for cache lookups before the machines have been
298 # compared/verified, check here to make sure they all have the same
299 # checksum (otherwise the cache lookup may not be valid).
300 common_checksum = None
Ahmad Sharif4467f002012-12-20 12:09:49 -0800301 for machine in self.GetMachines(label):
Caroline Tice5ea9f002015-09-02 12:36:47 -0700302 # Make sure the machine's checksums are calculated.
303 if not machine.machine_checksum:
304 machine.SetUpChecksumInfo()
305 cs = machine.machine_checksum
306 # If this is the first machine we've examined, initialize
307 # common_checksum.
308 if not common_checksum:
309 common_checksum = cs
310 # Make sure this machine's checksum matches our 'common' checksum.
311 if cs != common_checksum:
312 raise BadChecksum("Machine checksums do not match!")
313 self.machine_checksum[label.name] = common_checksum
Ahmad Shariff395c262012-10-09 17:48:09 -0700314
Ahmad Sharif4467f002012-12-20 12:09:49 -0800315 def ComputeCommonCheckSumString(self, label):
Caroline Tice5ea9f002015-09-02 12:36:47 -0700316 # The assumption is that this function is only called AFTER
317 # ComputeCommonCheckSum, so there is no need to verify the machines
318 # are the same here. If this is ever changed, this function should be
319 # modified to verify that all the machines for a given label are the
320 # same.
Ahmad Sharif4467f002012-12-20 12:09:49 -0800321 for machine in self.GetMachines(label):
Ahmad Shariff395c262012-10-09 17:48:09 -0700322 if machine.checksum_string:
Ahmad Sharif4467f002012-12-20 12:09:49 -0800323 self.machine_checksum_string[label.name] = machine.checksum_string
Ahmad Shariff395c262012-10-09 17:48:09 -0700324 break
325
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -0800326 def _TryToLockMachine(self, cros_machine):
327 with self._lock:
328 assert cros_machine, "Machine can't be None"
329 for m in self._machines:
Luis Lozanof81680c2013-03-15 14:44:13 -0700330 if m.name == cros_machine.name:
331 return
cmtice517dc982015-06-12 12:22:32 -0700332 locked = True
333 if self.locks_dir:
334 locked = file_lock_machine.Machine(cros_machine.name,
335 self.locks_dir).Lock(True,
336 sys.argv[0])
337 if locked:
338 self._machines.append(cros_machine)
339 command = "cat %s" % CHECKSUM_FILE
340 ret, out, _ = self.ce.CrosRunCommand(
341 command, return_output=True, chromeos_root=self.chromeos_root,
342 machine=cros_machine.name)
343 if ret == 0:
344 cros_machine.checksum = out.strip()
345 elif self.locks_dir:
346 self.logger.LogOutput("Couldn't lock: %s" % cros_machine.name)
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -0800347
348 # This is called from single threaded mode.
349 def AddMachine(self, machine_name):
350 with self._lock:
351 for m in self._all_machines:
352 assert m.name != machine_name, "Tried to double-add %s" % machine_name
cmtice1505b6a2014-06-04 14:19:19 -0700353 if self.log_level != "verbose":
354 self.logger.LogOutput("Setting up remote access to %s"
355 % machine_name)
356 self.logger.LogOutput("Checking machine characteristics for %s"
357 % machine_name)
cmtice13909242014-03-11 13:38:07 -0700358 cm = CrosMachine(machine_name, self.chromeos_root, self.log_level)
Yunlian Jiang837e07a2013-05-22 16:23:28 -0700359 if cm.machine_checksum:
360 self._all_machines.append(cm)
Ahmad Shariff395c262012-10-09 17:48:09 -0700361
cmtice1505b6a2014-06-04 14:19:19 -0700362
Yunlian Jiange5b673f2013-05-23 11:42:53 -0700363 def RemoveMachine(self, machine_name):
364 with self._lock:
365 self._machines = [m for m in self._machines
366 if m.name != machine_name]
cmtice517dc982015-06-12 12:22:32 -0700367 if self.locks_dir:
368 res = file_lock_machine.Machine(machine_name,
369 self.locks_dir).Unlock(True)
370 if not res:
371 self.logger.LogError("Could not unlock machine: '%s'."
372 % machine_name)
Yunlian Jiange5b673f2013-05-23 11:42:53 -0700373
cmtice798a8fa2014-05-12 13:56:42 -0700374 def ForceSameImageToAllMachines(self, label):
375 machines = self.GetMachines(label)
cmtice798a8fa2014-05-12 13:56:42 -0700376 for m in machines:
377 self.ImageMachine(m, label)
378 m.SetUpChecksumInfo()
379
380 def AcquireMachine(self, chromeos_image, label, throw=False):
Han Shenba649282015-08-05 17:19:55 -0700381 image_checksum = label.checksum
Ahmad Sharif4467f002012-12-20 12:09:49 -0800382 machines = self.GetMachines(label)
Luis Lozanof81680c2013-03-15 14:44:13 -0700383 check_interval_time = 120
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -0800384 with self._lock:
385 # Lazily external lock machines
Luis Lozanof81680c2013-03-15 14:44:13 -0700386 while self.acquire_timeout >= 0:
387 for m in machines:
388 new_machine = m not in self._all_machines
Ahmad Sharif4467f002012-12-20 12:09:49 -0800389 self._TryToLockMachine(m)
Luis Lozanof81680c2013-03-15 14:44:13 -0700390 if new_machine:
391 m.released_time = time.time()
Luis Lozanof81680c2013-03-15 14:44:13 -0700392 if self.GetAvailableMachines(label):
393 break
394 else:
395 sleep_time = max(1, min(self.acquire_timeout, check_interval_time))
396 time.sleep(sleep_time)
397 self.acquire_timeout -= sleep_time
398
399 if self.acquire_timeout < 0:
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -0800400 machine_names = []
Ahmad Sharif4467f002012-12-20 12:09:49 -0800401 for machine in machines:
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -0800402 machine_names.append(machine.name)
cmtice1505b6a2014-06-04 14:19:19 -0700403 self.logger.LogFatal("Could not acquire any of the "
404 "following machines: '%s'"
405 % ", ".join(machine_names))
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -0800406
407### for m in self._machines:
408### if (m.locked and time.time() - m.released_time < 10 and
409### m.checksum == image_checksum):
410### return None
Ahmad Sharif4467f002012-12-20 12:09:49 -0800411 for m in [machine for machine in self.GetAvailableMachines(label)
412 if not machine.locked]:
cmtice0cc4e772014-01-30 15:52:37 -0800413 if image_checksum and (m.checksum == image_checksum):
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -0800414 m.locked = True
Yunlian Jiang04dc5dc2013-04-23 15:05:05 -0700415 m.test_run = threading.current_thread()
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -0800416 return m
Ahmad Sharif4467f002012-12-20 12:09:49 -0800417 for m in [machine for machine in self.GetAvailableMachines(label)
418 if not machine.locked]:
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -0800419 if not m.checksum:
420 m.locked = True
Yunlian Jiang04dc5dc2013-04-23 15:05:05 -0700421 m.test_run = threading.current_thread()
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -0800422 return m
423 # This logic ensures that threads waiting on a machine will get a machine
424 # with a checksum equal to their image over other threads. This saves time
425 # when crosperf initially assigns the machines to threads by minimizing
426 # the number of re-images.
427 # TODO(asharif): If we centralize the thread-scheduler, we wont need this
428 # code and can implement minimal reimaging code more cleanly.
Ahmad Sharif4467f002012-12-20 12:09:49 -0800429 for m in [machine for machine in self.GetAvailableMachines(label)
430 if not machine.locked]:
Yunlian Jianga8446712015-02-26 10:25:11 -0800431 if time.time() - m.released_time > 15:
432 # The release time gap is too large, so it is probably in the start
433 # stage, we need to reset the released_time.
434 m.released_time = time.time()
435 elif time.time() - m.released_time > 8:
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -0800436 m.locked = True
Yunlian Jiang04dc5dc2013-04-23 15:05:05 -0700437 m.test_run = threading.current_thread()
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -0800438 return m
439 return None
440
Ahmad Sharif4467f002012-12-20 12:09:49 -0800441 def GetAvailableMachines(self, label=None):
442 if not label:
443 return self._machines
444 return [m for m in self._machines if m.name in label.remote]
445
446 def GetMachines(self, label=None):
447 if not label:
448 return self._all_machines
449 return [m for m in self._all_machines if m.name in label.remote]
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -0800450
451 def ReleaseMachine(self, machine):
452 with self._lock:
453 for m in self._machines:
454 if machine.name == m.name:
455 assert m.locked == True, "Tried to double-release %s" % m.name
456 m.released_time = time.time()
457 m.locked = False
458 m.status = "Available"
459 break
460
cmtice517dc982015-06-12 12:22:32 -0700461 def Cleanup(self):
462 with self._lock:
463 # Unlock all machines (via file lock)
464 for m in self._machines:
465 res = file_lock_machine.Machine(m.name, self.locks_dir).Unlock(True)
466
467 if not res:
468 self.logger.LogError("Could not unlock machine: '%s'."
469 % m.name)
470
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -0800471 def __str__(self):
472 with self._lock:
473 l = ["MachineManager Status:"]
474 for m in self._machines:
475 l.append(str(m))
476 return "\n".join(l)
477
478 def AsString(self):
479 with self._lock:
480 stringify_fmt = "%-30s %-10s %-4s %-25s %-32s"
481 header = stringify_fmt % ("Machine", "Thread", "Lock", "Status",
482 "Checksum")
483 table = [header]
484 for m in self._machines:
Yunlian Jiang04dc5dc2013-04-23 15:05:05 -0700485 if m.test_run:
486 test_name = m.test_run.name
487 test_status = m.test_run.timeline.GetLastEvent()
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -0800488 else:
Yunlian Jiang04dc5dc2013-04-23 15:05:05 -0700489 test_name = ""
490 test_status = ""
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -0800491
492 try:
493 machine_string = stringify_fmt % (m.name,
Yunlian Jiang04dc5dc2013-04-23 15:05:05 -0700494 test_name,
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -0800495 m.locked,
Yunlian Jiang04dc5dc2013-04-23 15:05:05 -0700496 test_status,
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -0800497 m.checksum)
498 except Exception:
499 machine_string = ""
500 table.append(machine_string)
501 return "Machine Status:\n%s" % "\n".join(table)
502
Ahmad Sharif4467f002012-12-20 12:09:49 -0800503 def GetAllCPUInfo(self, labels):
504 """Get cpuinfo for labels, merge them if their cpuinfo are the same."""
505 dic = {}
506 for label in labels:
507 for machine in self._all_machines:
508 if machine.name in label.remote:
509 if machine.cpuinfo not in dic:
510 dic[machine.cpuinfo] = [label.name]
511 else:
512 dic[machine.cpuinfo].append(label.name)
513 break
514 output = ""
515 for key, v in dic.items():
516 output += " ".join(v)
517 output += "\n-------------------\n"
518 output += key
519 output += "\n\n\n"
520 return output
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -0800521
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -0800522
Ahmad Sharif4467f002012-12-20 12:09:49 -0800523class MockCrosMachine(CrosMachine):
cmtice13909242014-03-11 13:38:07 -0700524 def __init__(self, name, chromeos_root, log_level):
Ahmad Sharif4467f002012-12-20 12:09:49 -0800525 self.name = name
526 self.image = None
527 self.checksum = None
528 self.locked = False
529 self.released_time = time.time()
Yunlian Jiang04dc5dc2013-04-23 15:05:05 -0700530 self.test_run = None
Ahmad Sharif4467f002012-12-20 12:09:49 -0800531 self.chromeos_root = chromeos_root
532 self.checksum_string = re.sub("\d", "", name)
533 #In test, we assume "lumpy1", "lumpy2" are the same machine.
534 self.machine_checksum = self._GetMD5Checksum(self.checksum_string)
cmtice13909242014-03-11 13:38:07 -0700535 self.log_level = log_level
Han Shene0662972015-09-18 16:53:34 -0700536 self.label = None
Ahmad Sharif4467f002012-12-20 12:09:49 -0800537
cmticec454cee2014-04-09 10:58:51 -0700538 def IsReachable(self):
539 return True
540
Ahmad Sharif4467f002012-12-20 12:09:49 -0800541
542class MockMachineManager(MachineManager):
543
cmticed96e4572015-05-19 16:19:25 -0700544 def __init__(self, chromeos_root, acquire_timeout, log_level, dummy_locks_dir):
cmtice13909242014-03-11 13:38:07 -0700545 super(MockMachineManager, self).__init__(chromeos_root, acquire_timeout,
cmticed96e4572015-05-19 16:19:25 -0700546 log_level,
Han Shen7a939a32015-09-16 11:08:09 -0700547 file_lock_machine.Machine.LOCKS_DIR)
Ahmad Sharif4467f002012-12-20 12:09:49 -0800548
549 def _TryToLockMachine(self, cros_machine):
550 self._machines.append(cros_machine)
551 cros_machine.checksum = ""
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -0800552
553 def AddMachine(self, machine_name):
Ahmad Sharif4467f002012-12-20 12:09:49 -0800554 with self._lock:
555 for m in self._all_machines:
556 assert m.name != machine_name, "Tried to double-add %s" % machine_name
cmtice13909242014-03-11 13:38:07 -0700557 cm = MockCrosMachine(machine_name, self.chromeos_root, self.log_level)
Ahmad Sharif4467f002012-12-20 12:09:49 -0800558 assert cm.machine_checksum, ("Could not find checksum for machine %s" %
559 machine_name)
Han Shen624bae72015-09-17 15:07:31 -0700560 # In Original MachineManager, the test is 'if cm.machine_checksum:' - if a
561 # machine is unreachable, then its machine_checksum is None. Here we
562 # cannot do this, because machine_checksum is always faked, so we directly
563 # test cm.IsReachable, which is properly mocked.
564 if cm.IsReachable():
565 self._all_machines.append(cm)
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -0800566
cmtice4d24b3a2014-05-29 16:11:04 -0700567 def AcquireMachine(self, chromeos_image, label, throw=False):
Ahmad Sharif4467f002012-12-20 12:09:49 -0800568 for machine in self._all_machines:
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -0800569 if not machine.locked:
570 machine.locked = True
571 return machine
572 return None
573
Ahmad Sharif4467f002012-12-20 12:09:49 -0800574 def ImageMachine(self, machine_name, label):
575 return 0
576
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -0800577 def ReleaseMachine(self, machine):
578 machine.locked = False
579
Ahmad Sharif4467f002012-12-20 12:09:49 -0800580 def GetMachines(self, label):
581 return self._all_machines
582
583 def GetAvailableMachines(self, label):
584 return self._all_machines