blob: caf2569c4827f913c4abc22a96f9cd829b84a039 [file] [log] [blame]
Yunlian Jiang00cc30e2013-03-28 13:23:57 -07001# Copyright (c) 2013 The Chromium OS Authors. All rights reserved.
2# Use of this source code is governed by a BSD-style license that can be
3# found in the LICENSE file.
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -08004
Ahmad Sharif4467f002012-12-20 12:09:49 -08005"""The experiment setting module."""
6
Yunlian Jiang742ed2c2015-12-10 10:05:59 -08007from __future__ import print_function
8
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -08009import os
10import time
Ahmad Sharif4467f002012-12-20 12:09:49 -080011
cmticee5bc63b2015-05-27 16:59:37 -070012import afe_lock_machine
Han Shenba649282015-08-05 17:19:55 -070013from threading import Lock
cmticee5bc63b2015-05-27 16:59:37 -070014
Yunlian Jiang0d1a9f32015-12-09 10:47:11 -080015from cros_utils import logger
16from cros_utils import misc
Ahmad Sharif4467f002012-12-20 12:09:49 -080017
Han Shene0662972015-09-18 16:53:34 -070018import benchmark_run
Han Shen738e6de2015-12-07 13:22:25 -080019from machine_manager import BadChecksum
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -080020from machine_manager import MachineManager
Ahmad Sharif4467f002012-12-20 12:09:49 -080021from machine_manager import MockMachineManager
Ahmad Sharif4467f002012-12-20 12:09:49 -080022import test_flag
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -080023
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -080024class Experiment(object):
25 """Class representing an Experiment to be run."""
26
Luis Lozanof81680c2013-03-15 14:44:13 -070027 def __init__(self, name, remote, working_directory,
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -080028 chromeos_root, cache_conditions, labels, benchmarks,
Luis Lozanof81680c2013-03-15 14:44:13 -070029 experiment_file, email_to, acquire_timeout, log_dir,
cmtice5c09fc22015-04-22 09:25:53 -070030 log_level, share_cache, results_directory, locks_directory):
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -080031 self.name = name
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -080032 self.working_directory = working_directory
33 self.remote = remote
34 self.chromeos_root = chromeos_root
35 self.cache_conditions = cache_conditions
36 self.experiment_file = experiment_file
Ahmad Shariff395c262012-10-09 17:48:09 -070037 self.email_to = email_to
Yunlian Jiang00cc30e2013-03-28 13:23:57 -070038 if not results_directory:
39 self.results_directory = os.path.join(self.working_directory,
40 self.name + "_results")
41 else:
42 self.results_directory = misc.CanonicalizePath(results_directory)
Luis Lozanof81680c2013-03-15 14:44:13 -070043 self.log_dir = log_dir
cmtice13909242014-03-11 13:38:07 -070044 self.log_level = log_level
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -080045 self.labels = labels
46 self.benchmarks = benchmarks
47 self.num_complete = 0
Ahmad Sharif4467f002012-12-20 12:09:49 -080048 self.num_run_complete = 0
cmtice1a224362014-10-16 15:49:56 -070049 self.share_cache = share_cache
cmtice517dc982015-06-12 12:22:32 -070050 # If locks_directory (self.lock_dir) not blank, we will use the file
51 # locking mechanism; if it is blank then we will use the AFE server
52 # locking mechanism.
53 self.locks_dir = locks_directory
cmticef3eb8032015-07-27 13:55:52 -070054 self.locked_machines = []
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -080055
Luis Lozanodd417612015-12-08 12:08:44 -080056 if not remote:
57 raise RuntimeError("No remote hosts specified")
58 if not self.benchmarks:
59 raise RuntimeError("No benchmarks specified")
60 if not self.labels:
61 raise RuntimeError("No labels specified")
62
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -080063 # We need one chromeos_root to run the benchmarks in, but it doesn't
64 # matter where it is, unless the ABIs are different.
65 if not chromeos_root:
66 for label in self.labels:
67 if label.chromeos_root:
68 chromeos_root = label.chromeos_root
Luis Lozanodd417612015-12-08 12:08:44 -080069 break
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -080070 if not chromeos_root:
Luis Lozanodd417612015-12-08 12:08:44 -080071 raise RuntimeError("No chromeos_root given and could not determine "
72 "one from the image path.")
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -080073
Ahmad Sharif4467f002012-12-20 12:09:49 -080074 if test_flag.GetTestMode():
cmtice13909242014-03-11 13:38:07 -070075 self.machine_manager = MockMachineManager(chromeos_root, acquire_timeout,
cmticed96e4572015-05-19 16:19:25 -070076 log_level, locks_directory)
Ahmad Sharif4467f002012-12-20 12:09:49 -080077 else:
cmtice13909242014-03-11 13:38:07 -070078 self.machine_manager = MachineManager(chromeos_root, acquire_timeout,
cmtice517dc982015-06-12 12:22:32 -070079 log_level, locks_directory)
Luis Lozanof81680c2013-03-15 14:44:13 -070080 self.l = logger.GetLogger(log_dir)
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -080081
Han Shenf9b50352015-09-17 11:26:22 -070082 for machine in self.remote:
83 # machine_manager.AddMachine only adds reachable machines.
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -080084 self.machine_manager.AddMachine(machine)
Han Shenf9b50352015-09-17 11:26:22 -070085 # Now machine_manager._all_machines contains a list of reachable
86 # machines. This is a subset of self.remote. We make both lists the same.
87 self.remote = [m.name for m in self.machine_manager._all_machines]
Caroline Tice51d7a9b2015-12-09 08:01:54 -080088 if not self.remote:
89 raise RuntimeError("No machine available for running experiment.")
Han Shenf9b50352015-09-17 11:26:22 -070090
Ahmad Sharif4467f002012-12-20 12:09:49 -080091 for label in labels:
Han Shenf9b50352015-09-17 11:26:22 -070092 # We filter out label remotes that are not reachable (not in
93 # self.remote). So each label.remote is a sublist of experiment.remote.
94 label.remote = filter(lambda x: x in self.remote, label.remote)
Han Shen738e6de2015-12-07 13:22:25 -080095 try:
96 self.machine_manager.ComputeCommonCheckSum(label)
97 except BadChecksum:
98 # Force same image on all machines, then we do checksum again. No
99 # bailout if checksums still do not match.
100 self.machine_manager.ForceSameImageToAllMachines(label)
101 self.machine_manager.ComputeCommonCheckSum(label)
102
Ahmad Sharif4467f002012-12-20 12:09:49 -0800103 self.machine_manager.ComputeCommonCheckSumString(label)
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -0800104
105 self.start_time = None
106 self.benchmark_runs = self._GenerateBenchmarkRuns()
107
Han Shenba649282015-08-05 17:19:55 -0700108 self._schedv2 = None
109 self._internal_counter_lock = Lock()
110
111 def set_schedv2(self, schedv2):
Caroline Ticeddde5052015-09-23 09:43:35 -0700112 self._schedv2 = schedv2
Han Shenba649282015-08-05 17:19:55 -0700113
114 def schedv2(self):
Caroline Ticeddde5052015-09-23 09:43:35 -0700115 return self._schedv2
Han Shenba649282015-08-05 17:19:55 -0700116
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -0800117 def _GenerateBenchmarkRuns(self):
118 """Generate benchmark runs from labels and benchmark defintions."""
119 benchmark_runs = []
120 for label in self.labels:
121 for benchmark in self.benchmarks:
122 for iteration in range(1, benchmark.iterations + 1):
123
124 benchmark_run_name = "%s: %s (%s)" % (label.name, benchmark.name,
125 iteration)
126 full_name = "%s_%s_%s" % (label.name, benchmark.name, iteration)
Luis Lozanof81680c2013-03-15 14:44:13 -0700127 logger_to_use = logger.Logger(self.log_dir,
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -0800128 "run.%s" % (full_name),
cmtice77892942014-03-18 13:47:17 -0700129 True)
Han Shene0662972015-09-18 16:53:34 -0700130 benchmark_runs.append(benchmark_run.BenchmarkRun(
131 benchmark_run_name,
132 benchmark,
133 label,
134 iteration,
135 self.cache_conditions,
136 self.machine_manager,
137 logger_to_use,
138 self.log_level,
139 self.share_cache))
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -0800140
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -0800141 return benchmark_runs
142
143 def Build(self):
144 pass
145
146 def Terminate(self):
Han Shenba649282015-08-05 17:19:55 -0700147 if self._schedv2 is not None:
148 self._schedv2.terminate()
149 else:
150 for t in self.benchmark_runs:
151 if t.isAlive():
152 self.l.LogError("Terminating run: '%s'." % t.name)
153 t.Terminate()
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -0800154
155 def IsComplete(self):
Han Shenba649282015-08-05 17:19:55 -0700156 if self._schedv2:
157 return self._schedv2.is_complete()
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -0800158 if self.active_threads:
159 for t in self.active_threads:
160 if t.isAlive():
161 t.join(0)
162 if not t.isAlive():
163 self.num_complete += 1
Ahmad Sharif4467f002012-12-20 12:09:49 -0800164 if not t.cache_hit:
165 self.num_run_complete += 1
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -0800166 self.active_threads.remove(t)
167 return False
168 return True
169
Han Shenba649282015-08-05 17:19:55 -0700170 def BenchmarkRunFinished(self, br):
Yunlian Jiang742ed2c2015-12-10 10:05:59 -0800171 """Update internal counters after br finishes.
Han Shenba649282015-08-05 17:19:55 -0700172
Yunlian Jiang742ed2c2015-12-10 10:05:59 -0800173 Note this is only used by schedv2 and is called by multiple threads.
174 Never throw any exception here.
175 """
Han Shenba649282015-08-05 17:19:55 -0700176
Yunlian Jiang742ed2c2015-12-10 10:05:59 -0800177 assert self._schedv2 is not None
178 with self._internal_counter_lock:
179 self.num_complete += 1
180 if not br.cache_hit:
181 self.num_run_complete += 1
Han Shenba649282015-08-05 17:19:55 -0700182
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -0800183 def Run(self):
184 self.start_time = time.time()
Han Shenba649282015-08-05 17:19:55 -0700185 if self._schedv2 is not None:
186 self._schedv2.run_sched()
187 else:
188 self.active_threads = []
189 for benchmark_run in self.benchmark_runs:
190 # Set threads to daemon so program exits when ctrl-c is pressed.
191 benchmark_run.daemon = True
192 benchmark_run.start()
193 self.active_threads.append(benchmark_run)
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -0800194
195 def SetCacheConditions(self, cache_conditions):
196 for benchmark_run in self.benchmark_runs:
197 benchmark_run.SetCacheConditions(cache_conditions)
198
199 def Cleanup(self):
cmticee5bc63b2015-05-27 16:59:37 -0700200 """Make sure all machines are unlocked."""
cmtice517dc982015-06-12 12:22:32 -0700201 if self.locks_dir:
202 # We are using the file locks mechanism, so call machine_manager.Cleanup
203 # to unlock everything.
204 self.machine_manager.Cleanup()
205 else:
Caroline Tice7057cf62015-12-10 12:09:40 -0800206 if test_flag.GetTestMode():
207 return
208
cmticef3eb8032015-07-27 13:55:52 -0700209 all_machines = self.locked_machines
210 if not all_machines:
211 return
212
213 # If we locked any machines earlier, make sure we unlock them now.
cmtice517dc982015-06-12 12:22:32 -0700214 lock_mgr = afe_lock_machine.AFELockManager(all_machines, "",
215 self.labels[0].chromeos_root,
216 None)
217 machine_states = lock_mgr.GetMachineStates("unlock")
218 for k, state in machine_states.iteritems():
219 if state["locked"]:
220 lock_mgr.UpdateLockInAFE(False, k)