blob: 971b967c0a69933ead1978dd2816cbcaa204795f [file] [log] [blame]
Han Shen7a939a32015-09-16 11:08:09 -07001# Copyright 2011-2015 Google Inc. All Rights Reserved.
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -08002
Ahmad Sharif4467f002012-12-20 12:09:49 -08003"""The experiment runner module."""
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -08004import getpass
5import os
Caroline Tice68c1d612015-09-02 14:49:45 -07006import shutil
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -08007import time
Ahmad Sharif4467f002012-12-20 12:09:49 -08008
cmticee5bc63b2015-05-27 16:59:37 -07009import afe_lock_machine
Caroline Tice7057cf62015-12-10 12:09:40 -080010import test_flag
cmticee5bc63b2015-05-27 16:59:37 -070011
Yunlian Jiang0d1a9f32015-12-09 10:47:11 -080012from cros_utils import command_executer
13from cros_utils import logger
14from cros_utils.email_sender import EmailSender
15from cros_utils.file_utils import FileUtils
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -080016
Luis Lozanof81680c2013-03-15 14:44:13 -070017import config
Ahmad Sharif4467f002012-12-20 12:09:49 -080018from experiment_status import ExperimentStatus
Caroline Tice68c1d612015-09-02 14:49:45 -070019from results_cache import CacheConditions
20from results_cache import ResultsCache
Ahmad Sharif4467f002012-12-20 12:09:49 -080021from results_report import HTMLResultsReport
22from results_report import TextResultsReport
Caroline Ticeef4ca8a2015-08-25 12:53:38 -070023from results_report import JSONResultsReport
Han Shen7a939a32015-09-16 11:08:09 -070024from schedv2 import Schedv2
Ahmad Sharif4467f002012-12-20 12:09:49 -080025
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -080026
27class ExperimentRunner(object):
Ahmad Sharif4467f002012-12-20 12:09:49 -080028 """ExperimentRunner Class."""
29
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -080030 STATUS_TIME_DELAY = 30
31 THREAD_MONITOR_DELAY = 2
32
Caroline Ticeef4ca8a2015-08-25 12:53:38 -070033 def __init__(self, experiment, json_report, using_schedv2=False, log=None,
34 cmd_exec=None):
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -080035 self._experiment = experiment
cmticeba0d0de2014-05-21 08:39:15 -070036 self.l = log or logger.GetLogger(experiment.log_dir)
cmtice42709762014-05-16 13:35:54 -070037 self._ce = cmd_exec or command_executer.GetCommandExecuter(self.l)
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -080038 self._terminated = False
Caroline Ticeef4ca8a2015-08-25 12:53:38 -070039 self.json_report = json_report
cmticef3eb8032015-07-27 13:55:52 -070040 self.locked_machines = []
cmtice13909242014-03-11 13:38:07 -070041 if experiment.log_level != "verbose":
42 self.STATUS_TIME_DELAY = 10
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -080043
Han Shenba649282015-08-05 17:19:55 -070044 # Setting this to True will use crosperf sched v2 (feature in progress).
45 self._using_schedv2 = using_schedv2
46
cmticee5bc63b2015-05-27 16:59:37 -070047 def _GetMachineList(self):
48 """Return a list of all requested machines.
49
50 Create a list of all the requested machines, both global requests and
51 label-specific requests, and return the list.
52 """
53 machines = self._experiment.remote
Han Shenf9b50352015-09-17 11:26:22 -070054 # All Label.remote is a sublist of experiment.remote.
cmticee5bc63b2015-05-27 16:59:37 -070055 for l in self._experiment.labels:
Han Shenf9b50352015-09-17 11:26:22 -070056 for r in l.remote:
57 assert r in machines
cmticee5bc63b2015-05-27 16:59:37 -070058 return machines
59
cmticef3eb8032015-07-27 13:55:52 -070060 def _UpdateMachineList(self, locked_machines):
61 """Update machines lists to contain only locked machines.
62
63 Go through all the lists of requested machines, both global and
64 label-specific requests, and remove any machine that we were not
65 able to lock.
66
67 Args:
68 locked_machines: A list of the machines we successfully locked.
69 """
70 for m in self._experiment.remote:
71 if m not in locked_machines:
72 self._experiment.remote.remove(m)
73
74 for l in self._experiment.labels:
75 for m in l.remote:
76 if m not in locked_machines:
77 l.remote.remove(m)
78
cmticee5bc63b2015-05-27 16:59:37 -070079 def _LockAllMachines(self, experiment):
80 """Attempt to globally lock all of the machines requested for run.
81
82 This method will use the AFE server to globally lock all of the machines
83 requested for this crosperf run, to prevent any other crosperf runs from
84 being able to update/use the machines while this experiment is running.
85 """
Caroline Tice7057cf62015-12-10 12:09:40 -080086 if test_flag.GetTestMode():
87 self.locked_machines = self._GetMachineList()
88 self._experiment.locked_machines = self.locked_machines
89 else:
90 lock_mgr = afe_lock_machine.AFELockManager(
91 self._GetMachineList(),
92 "",
93 experiment.labels[0].chromeos_root,
94 None,
95 log=self.l,
96 )
97 for m in lock_mgr.machines:
98 if not lock_mgr.MachineIsKnown(m):
99 lock_mgr.AddLocalMachine(m)
100 machine_states = lock_mgr.GetMachineStates("lock")
101 lock_mgr.CheckMachineLocks(machine_states, "lock")
102 self.locked_machines = lock_mgr.UpdateMachines(True)
103 self._experiment.locked_machines = self.locked_machines
104 self._UpdateMachineList(self.locked_machines)
105 self._experiment.machine_manager.RemoveNonLockedMachines(
106 self.locked_machines)
107 if len(self.locked_machines) == 0:
108 raise RuntimeError("Unable to lock any machines.")
cmticee5bc63b2015-05-27 16:59:37 -0700109
110 def _UnlockAllMachines(self, experiment):
111 """Attempt to globally unlock all of the machines requested for run.
112
113 The method will use the AFE server to globally unlock all of the machines
114 requested for this crosperf run.
115 """
Caroline Tice7057cf62015-12-10 12:09:40 -0800116 if not self.locked_machines or test_flag.GetTestMode():
Luis Lozanodb77ec32015-10-20 14:22:54 -0700117 return
cmticef3eb8032015-07-27 13:55:52 -0700118
cmticee5bc63b2015-05-27 16:59:37 -0700119 lock_mgr = afe_lock_machine.AFELockManager(
cmticef3eb8032015-07-27 13:55:52 -0700120 self.locked_machines,
cmticee5bc63b2015-05-27 16:59:37 -0700121 "",
122 experiment.labels[0].chromeos_root,
123 None,
124 log=self.l,
125 )
126 machine_states = lock_mgr.GetMachineStates("unlock")
127 lock_mgr.CheckMachineLocks(machine_states, "unlock")
128 lock_mgr.UpdateMachines(False)
129
Caroline Tice68c1d612015-09-02 14:49:45 -0700130 def _ClearCacheEntries(self, experiment):
131 for br in experiment.benchmark_runs:
132 cache = ResultsCache()
Luis Lozanodb77ec32015-10-20 14:22:54 -0700133 cache.Init(br.label.chromeos_image, br.label.chromeos_root,
134 br.benchmark.test_name, br.iteration, br.test_args,
135 br.profiler_args, br.machine_manager, br.machine,
136 br.label.board, br.cache_conditions, br._logger, br.log_level,
137 br.label, br.share_cache, br.benchmark.suite,
138 br.benchmark.show_all_results, br.benchmark.run_local)
Caroline Tice68c1d612015-09-02 14:49:45 -0700139 cache_dir = cache._GetCacheDirForWrite()
Caroline Tice9b852cf2015-09-08 16:28:57 -0700140 if os.path.exists(cache_dir):
141 self.l.LogOutput("Removing cache dir: %s" % cache_dir)
142 shutil.rmtree(cache_dir)
Caroline Tice68c1d612015-09-02 14:49:45 -0700143
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -0800144 def _Run(self, experiment):
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -0800145 try:
cmtice517dc982015-06-12 12:22:32 -0700146 if not experiment.locks_dir:
147 self._LockAllMachines(experiment)
Han Shenba649282015-08-05 17:19:55 -0700148 if self._using_schedv2:
149 schedv2 = Schedv2(experiment)
150 experiment.set_schedv2(schedv2)
Caroline Tice68c1d612015-09-02 14:49:45 -0700151 if CacheConditions.FALSE in experiment.cache_conditions:
152 self._ClearCacheEntries(experiment)
cmticee5bc63b2015-05-27 16:59:37 -0700153 status = ExperimentStatus(experiment)
154 experiment.Run()
155 last_status_time = 0
156 last_status_string = ""
157 try:
158 if experiment.log_level != "verbose":
159 self.l.LogStartDots()
160 while not experiment.IsComplete():
161 if last_status_time + self.STATUS_TIME_DELAY < time.time():
162 last_status_time = time.time()
163 border = "=============================="
164 if experiment.log_level == "verbose":
cmtice13909242014-03-11 13:38:07 -0700165 self.l.LogOutput(border)
cmticee5bc63b2015-05-27 16:59:37 -0700166 self.l.LogOutput(status.GetProgressString())
167 self.l.LogOutput(status.GetStatusString())
cmtice13909242014-03-11 13:38:07 -0700168 self.l.LogOutput(border)
cmtice13909242014-03-11 13:38:07 -0700169 else:
cmticee5bc63b2015-05-27 16:59:37 -0700170 current_status_string = status.GetStatusString()
Luis Lozanodb77ec32015-10-20 14:22:54 -0700171 if current_status_string != last_status_string:
cmticee5bc63b2015-05-27 16:59:37 -0700172 self.l.LogEndDots()
173 self.l.LogOutput(border)
174 self.l.LogOutput(current_status_string)
175 self.l.LogOutput(border)
176 last_status_string = current_status_string
177 else:
178 self.l.LogAppendDot()
179 time.sleep(self.THREAD_MONITOR_DELAY)
180 except KeyboardInterrupt:
181 self._terminated = True
182 self.l.LogError("Ctrl-c pressed. Cleaning up...")
183 experiment.Terminate()
Luis Lozano45b53c52015-09-30 11:36:27 -0700184 raise
185 except SystemExit:
Luis Lozanodb77ec32015-10-20 14:22:54 -0700186 self._terminated = True
Luis Lozano45b53c52015-09-30 11:36:27 -0700187 self.l.LogError("Unexpected exit. Cleaning up...")
188 experiment.Terminate()
189 raise
cmticee5bc63b2015-05-27 16:59:37 -0700190 finally:
cmtice517dc982015-06-12 12:22:32 -0700191 if not experiment.locks_dir:
192 self._UnlockAllMachines(experiment)
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -0800193
194 def _PrintTable(self, experiment):
195 self.l.LogOutput(TextResultsReport(experiment).GetReport())
196
197 def _Email(self, experiment):
198 # Only email by default if a new run was completed.
199 send_mail = False
200 for benchmark_run in experiment.benchmark_runs:
201 if not benchmark_run.cache_hit:
202 send_mail = True
203 break
Luis Lozanof81680c2013-03-15 14:44:13 -0700204 if (not send_mail and not experiment.email_to
205 or config.GetConfig("no_email")):
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -0800206 return
207
208 label_names = []
209 for label in experiment.labels:
210 label_names.append(label.name)
211 subject = "%s: %s" % (experiment.name, " vs. ".join(label_names))
212
Ahmad Shariff395c262012-10-09 17:48:09 -0700213 text_report = TextResultsReport(experiment, True).GetReport()
cmtice7658aec2015-05-22 11:36:35 -0700214 text_report += ("\nResults are stored in %s.\n" %
215 experiment.results_directory)
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -0800216 text_report = "<pre style='font-size: 13px'>%s</pre>" % text_report
217 html_report = HTMLResultsReport(experiment).GetReport()
218 attachment = EmailSender.Attachment("report.html", html_report)
Ahmad Shariff395c262012-10-09 17:48:09 -0700219 email_to = [getpass.getuser()] + experiment.email_to
220 EmailSender().SendEmail(email_to,
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -0800221 subject,
222 text_report,
223 attachments=[attachment],
224 msg_type="html")
225
Luis Lozanodb77ec32015-10-20 14:22:54 -0700226 def _StoreResults(self, experiment):
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -0800227 if self._terminated:
228 return
229 results_directory = experiment.results_directory
230 FileUtils().RmDir(results_directory)
231 FileUtils().MkDirP(results_directory)
cmtice666d1752014-04-24 09:08:22 -0700232 self.l.LogOutput("Storing experiment file in %s." % results_directory)
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -0800233 experiment_file_path = os.path.join(results_directory,
234 "experiment.exp")
235 FileUtils().WriteFile(experiment_file_path, experiment.experiment_file)
236
cmtice666d1752014-04-24 09:08:22 -0700237 self.l.LogOutput("Storing results report in %s." % results_directory)
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -0800238 results_table_path = os.path.join(results_directory, "results.html")
239 report = HTMLResultsReport(experiment).GetReport()
Caroline Ticeef4ca8a2015-08-25 12:53:38 -0700240 if self.json_report:
241 JSONResultsReport(experiment).GetReport(results_directory)
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -0800242 FileUtils().WriteFile(results_table_path, report)
243
cmtice7658aec2015-05-22 11:36:35 -0700244 self.l.LogOutput("Storing email message body in %s." % results_directory)
245 msg_file_path = os.path.join(results_directory, "msg_body.html")
246 text_report = TextResultsReport(experiment, True).GetReport()
247 text_report += ("\nResults are stored in %s.\n" %
248 experiment.results_directory)
249 msg_body = "<pre style='font-size: 13px'>%s</pre>" % text_report
250 FileUtils().WriteFile(msg_file_path, msg_body)
251
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -0800252 self.l.LogOutput("Storing results of each benchmark run.")
253 for benchmark_run in experiment.benchmark_runs:
Ahmad Shariff395c262012-10-09 17:48:09 -0700254 if benchmark_run.result:
255 benchmark_run_name = filter(str.isalnum, benchmark_run.name)
256 benchmark_run_path = os.path.join(results_directory,
257 benchmark_run_name)
258 benchmark_run.result.CopyResultsTo(benchmark_run_path)
Luis Lozanof81680c2013-03-15 14:44:13 -0700259 benchmark_run.result.CleanUp(benchmark_run.benchmark.rm_chroot_tmp)
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -0800260
261 def Run(self):
Luis Lozano45b53c52015-09-30 11:36:27 -0700262 try:
263 self._Run(self._experiment)
264 finally:
265 # Always print the report at the end of the run.
266 self._PrintTable(self._experiment)
267 if not self._terminated:
268 self._StoreResults(self._experiment)
269 self._Email(self._experiment)
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -0800270
271
272class MockExperimentRunner(ExperimentRunner):
Ahmad Sharif4467f002012-12-20 12:09:49 -0800273 """Mocked ExperimentRunner for testing."""
274
Caroline Tice6e8726d2015-12-09 12:42:13 -0800275 def __init__(self, experiment, json_report):
276 super(MockExperimentRunner, self).__init__(experiment, json_report)
Ahmad Sharif0dcbc4b2012-02-02 16:37:18 -0800277
278 def _Run(self, experiment):
279 self.l.LogOutput("Would run the following experiment: '%s'." %
280 experiment.name)
281
282 def _PrintTable(self, experiment):
283 self.l.LogOutput("Would print the experiment table.")
284
285 def _Email(self, experiment):
286 self.l.LogOutput("Would send result email.")
287
288 def _StoreResults(self, experiment):
289 self.l.LogOutput("Would store the results.")