Han Shen | 7a939a3 | 2015-09-16 11:08:09 -0700 | [diff] [blame] | 1 | # Copyright 2011-2015 Google Inc. All Rights Reserved. |
Ahmad Sharif | 0dcbc4b | 2012-02-02 16:37:18 -0800 | [diff] [blame] | 2 | |
Ahmad Sharif | 4467f00 | 2012-12-20 12:09:49 -0800 | [diff] [blame] | 3 | """The experiment runner module.""" |
Ahmad Sharif | 0dcbc4b | 2012-02-02 16:37:18 -0800 | [diff] [blame] | 4 | import getpass |
| 5 | import os |
Caroline Tice | 68c1d61 | 2015-09-02 14:49:45 -0700 | [diff] [blame] | 6 | import shutil |
Ahmad Sharif | 0dcbc4b | 2012-02-02 16:37:18 -0800 | [diff] [blame] | 7 | import time |
Ahmad Sharif | 4467f00 | 2012-12-20 12:09:49 -0800 | [diff] [blame] | 8 | |
cmtice | e5bc63b | 2015-05-27 16:59:37 -0700 | [diff] [blame] | 9 | import afe_lock_machine |
Caroline Tice | 7057cf6 | 2015-12-10 12:09:40 -0800 | [diff] [blame^] | 10 | import test_flag |
cmtice | e5bc63b | 2015-05-27 16:59:37 -0700 | [diff] [blame] | 11 | |
Yunlian Jiang | 0d1a9f3 | 2015-12-09 10:47:11 -0800 | [diff] [blame] | 12 | from cros_utils import command_executer |
| 13 | from cros_utils import logger |
| 14 | from cros_utils.email_sender import EmailSender |
| 15 | from cros_utils.file_utils import FileUtils |
Ahmad Sharif | 0dcbc4b | 2012-02-02 16:37:18 -0800 | [diff] [blame] | 16 | |
Luis Lozano | f81680c | 2013-03-15 14:44:13 -0700 | [diff] [blame] | 17 | import config |
Ahmad Sharif | 4467f00 | 2012-12-20 12:09:49 -0800 | [diff] [blame] | 18 | from experiment_status import ExperimentStatus |
Caroline Tice | 68c1d61 | 2015-09-02 14:49:45 -0700 | [diff] [blame] | 19 | from results_cache import CacheConditions |
| 20 | from results_cache import ResultsCache |
Ahmad Sharif | 4467f00 | 2012-12-20 12:09:49 -0800 | [diff] [blame] | 21 | from results_report import HTMLResultsReport |
| 22 | from results_report import TextResultsReport |
Caroline Tice | ef4ca8a | 2015-08-25 12:53:38 -0700 | [diff] [blame] | 23 | from results_report import JSONResultsReport |
Han Shen | 7a939a3 | 2015-09-16 11:08:09 -0700 | [diff] [blame] | 24 | from schedv2 import Schedv2 |
Ahmad Sharif | 4467f00 | 2012-12-20 12:09:49 -0800 | [diff] [blame] | 25 | |
Ahmad Sharif | 0dcbc4b | 2012-02-02 16:37:18 -0800 | [diff] [blame] | 26 | |
| 27 | class ExperimentRunner(object): |
Ahmad Sharif | 4467f00 | 2012-12-20 12:09:49 -0800 | [diff] [blame] | 28 | """ExperimentRunner Class.""" |
| 29 | |
Ahmad Sharif | 0dcbc4b | 2012-02-02 16:37:18 -0800 | [diff] [blame] | 30 | STATUS_TIME_DELAY = 30 |
| 31 | THREAD_MONITOR_DELAY = 2 |
| 32 | |
Caroline Tice | ef4ca8a | 2015-08-25 12:53:38 -0700 | [diff] [blame] | 33 | def __init__(self, experiment, json_report, using_schedv2=False, log=None, |
| 34 | cmd_exec=None): |
Ahmad Sharif | 0dcbc4b | 2012-02-02 16:37:18 -0800 | [diff] [blame] | 35 | self._experiment = experiment |
cmtice | ba0d0de | 2014-05-21 08:39:15 -0700 | [diff] [blame] | 36 | self.l = log or logger.GetLogger(experiment.log_dir) |
cmtice | 4270976 | 2014-05-16 13:35:54 -0700 | [diff] [blame] | 37 | self._ce = cmd_exec or command_executer.GetCommandExecuter(self.l) |
Ahmad Sharif | 0dcbc4b | 2012-02-02 16:37:18 -0800 | [diff] [blame] | 38 | self._terminated = False |
Caroline Tice | ef4ca8a | 2015-08-25 12:53:38 -0700 | [diff] [blame] | 39 | self.json_report = json_report |
cmtice | f3eb803 | 2015-07-27 13:55:52 -0700 | [diff] [blame] | 40 | self.locked_machines = [] |
cmtice | 1390924 | 2014-03-11 13:38:07 -0700 | [diff] [blame] | 41 | if experiment.log_level != "verbose": |
| 42 | self.STATUS_TIME_DELAY = 10 |
Ahmad Sharif | 0dcbc4b | 2012-02-02 16:37:18 -0800 | [diff] [blame] | 43 | |
Han Shen | ba64928 | 2015-08-05 17:19:55 -0700 | [diff] [blame] | 44 | # Setting this to True will use crosperf sched v2 (feature in progress). |
| 45 | self._using_schedv2 = using_schedv2 |
| 46 | |
cmtice | e5bc63b | 2015-05-27 16:59:37 -0700 | [diff] [blame] | 47 | def _GetMachineList(self): |
| 48 | """Return a list of all requested machines. |
| 49 | |
| 50 | Create a list of all the requested machines, both global requests and |
| 51 | label-specific requests, and return the list. |
| 52 | """ |
| 53 | machines = self._experiment.remote |
Han Shen | f9b5035 | 2015-09-17 11:26:22 -0700 | [diff] [blame] | 54 | # All Label.remote is a sublist of experiment.remote. |
cmtice | e5bc63b | 2015-05-27 16:59:37 -0700 | [diff] [blame] | 55 | for l in self._experiment.labels: |
Han Shen | f9b5035 | 2015-09-17 11:26:22 -0700 | [diff] [blame] | 56 | for r in l.remote: |
| 57 | assert r in machines |
cmtice | e5bc63b | 2015-05-27 16:59:37 -0700 | [diff] [blame] | 58 | return machines |
| 59 | |
cmtice | f3eb803 | 2015-07-27 13:55:52 -0700 | [diff] [blame] | 60 | def _UpdateMachineList(self, locked_machines): |
| 61 | """Update machines lists to contain only locked machines. |
| 62 | |
| 63 | Go through all the lists of requested machines, both global and |
| 64 | label-specific requests, and remove any machine that we were not |
| 65 | able to lock. |
| 66 | |
| 67 | Args: |
| 68 | locked_machines: A list of the machines we successfully locked. |
| 69 | """ |
| 70 | for m in self._experiment.remote: |
| 71 | if m not in locked_machines: |
| 72 | self._experiment.remote.remove(m) |
| 73 | |
| 74 | for l in self._experiment.labels: |
| 75 | for m in l.remote: |
| 76 | if m not in locked_machines: |
| 77 | l.remote.remove(m) |
| 78 | |
cmtice | e5bc63b | 2015-05-27 16:59:37 -0700 | [diff] [blame] | 79 | def _LockAllMachines(self, experiment): |
| 80 | """Attempt to globally lock all of the machines requested for run. |
| 81 | |
| 82 | This method will use the AFE server to globally lock all of the machines |
| 83 | requested for this crosperf run, to prevent any other crosperf runs from |
| 84 | being able to update/use the machines while this experiment is running. |
| 85 | """ |
Caroline Tice | 7057cf6 | 2015-12-10 12:09:40 -0800 | [diff] [blame^] | 86 | if test_flag.GetTestMode(): |
| 87 | self.locked_machines = self._GetMachineList() |
| 88 | self._experiment.locked_machines = self.locked_machines |
| 89 | else: |
| 90 | lock_mgr = afe_lock_machine.AFELockManager( |
| 91 | self._GetMachineList(), |
| 92 | "", |
| 93 | experiment.labels[0].chromeos_root, |
| 94 | None, |
| 95 | log=self.l, |
| 96 | ) |
| 97 | for m in lock_mgr.machines: |
| 98 | if not lock_mgr.MachineIsKnown(m): |
| 99 | lock_mgr.AddLocalMachine(m) |
| 100 | machine_states = lock_mgr.GetMachineStates("lock") |
| 101 | lock_mgr.CheckMachineLocks(machine_states, "lock") |
| 102 | self.locked_machines = lock_mgr.UpdateMachines(True) |
| 103 | self._experiment.locked_machines = self.locked_machines |
| 104 | self._UpdateMachineList(self.locked_machines) |
| 105 | self._experiment.machine_manager.RemoveNonLockedMachines( |
| 106 | self.locked_machines) |
| 107 | if len(self.locked_machines) == 0: |
| 108 | raise RuntimeError("Unable to lock any machines.") |
cmtice | e5bc63b | 2015-05-27 16:59:37 -0700 | [diff] [blame] | 109 | |
| 110 | def _UnlockAllMachines(self, experiment): |
| 111 | """Attempt to globally unlock all of the machines requested for run. |
| 112 | |
| 113 | The method will use the AFE server to globally unlock all of the machines |
| 114 | requested for this crosperf run. |
| 115 | """ |
Caroline Tice | 7057cf6 | 2015-12-10 12:09:40 -0800 | [diff] [blame^] | 116 | if not self.locked_machines or test_flag.GetTestMode(): |
Luis Lozano | db77ec3 | 2015-10-20 14:22:54 -0700 | [diff] [blame] | 117 | return |
cmtice | f3eb803 | 2015-07-27 13:55:52 -0700 | [diff] [blame] | 118 | |
cmtice | e5bc63b | 2015-05-27 16:59:37 -0700 | [diff] [blame] | 119 | lock_mgr = afe_lock_machine.AFELockManager( |
cmtice | f3eb803 | 2015-07-27 13:55:52 -0700 | [diff] [blame] | 120 | self.locked_machines, |
cmtice | e5bc63b | 2015-05-27 16:59:37 -0700 | [diff] [blame] | 121 | "", |
| 122 | experiment.labels[0].chromeos_root, |
| 123 | None, |
| 124 | log=self.l, |
| 125 | ) |
| 126 | machine_states = lock_mgr.GetMachineStates("unlock") |
| 127 | lock_mgr.CheckMachineLocks(machine_states, "unlock") |
| 128 | lock_mgr.UpdateMachines(False) |
| 129 | |
Caroline Tice | 68c1d61 | 2015-09-02 14:49:45 -0700 | [diff] [blame] | 130 | def _ClearCacheEntries(self, experiment): |
| 131 | for br in experiment.benchmark_runs: |
| 132 | cache = ResultsCache() |
Luis Lozano | db77ec3 | 2015-10-20 14:22:54 -0700 | [diff] [blame] | 133 | cache.Init(br.label.chromeos_image, br.label.chromeos_root, |
| 134 | br.benchmark.test_name, br.iteration, br.test_args, |
| 135 | br.profiler_args, br.machine_manager, br.machine, |
| 136 | br.label.board, br.cache_conditions, br._logger, br.log_level, |
| 137 | br.label, br.share_cache, br.benchmark.suite, |
| 138 | br.benchmark.show_all_results, br.benchmark.run_local) |
Caroline Tice | 68c1d61 | 2015-09-02 14:49:45 -0700 | [diff] [blame] | 139 | cache_dir = cache._GetCacheDirForWrite() |
Caroline Tice | 9b852cf | 2015-09-08 16:28:57 -0700 | [diff] [blame] | 140 | if os.path.exists(cache_dir): |
| 141 | self.l.LogOutput("Removing cache dir: %s" % cache_dir) |
| 142 | shutil.rmtree(cache_dir) |
Caroline Tice | 68c1d61 | 2015-09-02 14:49:45 -0700 | [diff] [blame] | 143 | |
Ahmad Sharif | 0dcbc4b | 2012-02-02 16:37:18 -0800 | [diff] [blame] | 144 | def _Run(self, experiment): |
Ahmad Sharif | 0dcbc4b | 2012-02-02 16:37:18 -0800 | [diff] [blame] | 145 | try: |
cmtice | 517dc98 | 2015-06-12 12:22:32 -0700 | [diff] [blame] | 146 | if not experiment.locks_dir: |
| 147 | self._LockAllMachines(experiment) |
Han Shen | ba64928 | 2015-08-05 17:19:55 -0700 | [diff] [blame] | 148 | if self._using_schedv2: |
| 149 | schedv2 = Schedv2(experiment) |
| 150 | experiment.set_schedv2(schedv2) |
Caroline Tice | 68c1d61 | 2015-09-02 14:49:45 -0700 | [diff] [blame] | 151 | if CacheConditions.FALSE in experiment.cache_conditions: |
| 152 | self._ClearCacheEntries(experiment) |
cmtice | e5bc63b | 2015-05-27 16:59:37 -0700 | [diff] [blame] | 153 | status = ExperimentStatus(experiment) |
| 154 | experiment.Run() |
| 155 | last_status_time = 0 |
| 156 | last_status_string = "" |
| 157 | try: |
| 158 | if experiment.log_level != "verbose": |
| 159 | self.l.LogStartDots() |
| 160 | while not experiment.IsComplete(): |
| 161 | if last_status_time + self.STATUS_TIME_DELAY < time.time(): |
| 162 | last_status_time = time.time() |
| 163 | border = "==============================" |
| 164 | if experiment.log_level == "verbose": |
cmtice | 1390924 | 2014-03-11 13:38:07 -0700 | [diff] [blame] | 165 | self.l.LogOutput(border) |
cmtice | e5bc63b | 2015-05-27 16:59:37 -0700 | [diff] [blame] | 166 | self.l.LogOutput(status.GetProgressString()) |
| 167 | self.l.LogOutput(status.GetStatusString()) |
cmtice | 1390924 | 2014-03-11 13:38:07 -0700 | [diff] [blame] | 168 | self.l.LogOutput(border) |
cmtice | 1390924 | 2014-03-11 13:38:07 -0700 | [diff] [blame] | 169 | else: |
cmtice | e5bc63b | 2015-05-27 16:59:37 -0700 | [diff] [blame] | 170 | current_status_string = status.GetStatusString() |
Luis Lozano | db77ec3 | 2015-10-20 14:22:54 -0700 | [diff] [blame] | 171 | if current_status_string != last_status_string: |
cmtice | e5bc63b | 2015-05-27 16:59:37 -0700 | [diff] [blame] | 172 | self.l.LogEndDots() |
| 173 | self.l.LogOutput(border) |
| 174 | self.l.LogOutput(current_status_string) |
| 175 | self.l.LogOutput(border) |
| 176 | last_status_string = current_status_string |
| 177 | else: |
| 178 | self.l.LogAppendDot() |
| 179 | time.sleep(self.THREAD_MONITOR_DELAY) |
| 180 | except KeyboardInterrupt: |
| 181 | self._terminated = True |
| 182 | self.l.LogError("Ctrl-c pressed. Cleaning up...") |
| 183 | experiment.Terminate() |
Luis Lozano | 45b53c5 | 2015-09-30 11:36:27 -0700 | [diff] [blame] | 184 | raise |
| 185 | except SystemExit: |
Luis Lozano | db77ec3 | 2015-10-20 14:22:54 -0700 | [diff] [blame] | 186 | self._terminated = True |
Luis Lozano | 45b53c5 | 2015-09-30 11:36:27 -0700 | [diff] [blame] | 187 | self.l.LogError("Unexpected exit. Cleaning up...") |
| 188 | experiment.Terminate() |
| 189 | raise |
cmtice | e5bc63b | 2015-05-27 16:59:37 -0700 | [diff] [blame] | 190 | finally: |
cmtice | 517dc98 | 2015-06-12 12:22:32 -0700 | [diff] [blame] | 191 | if not experiment.locks_dir: |
| 192 | self._UnlockAllMachines(experiment) |
Ahmad Sharif | 0dcbc4b | 2012-02-02 16:37:18 -0800 | [diff] [blame] | 193 | |
| 194 | def _PrintTable(self, experiment): |
| 195 | self.l.LogOutput(TextResultsReport(experiment).GetReport()) |
| 196 | |
| 197 | def _Email(self, experiment): |
| 198 | # Only email by default if a new run was completed. |
| 199 | send_mail = False |
| 200 | for benchmark_run in experiment.benchmark_runs: |
| 201 | if not benchmark_run.cache_hit: |
| 202 | send_mail = True |
| 203 | break |
Luis Lozano | f81680c | 2013-03-15 14:44:13 -0700 | [diff] [blame] | 204 | if (not send_mail and not experiment.email_to |
| 205 | or config.GetConfig("no_email")): |
Ahmad Sharif | 0dcbc4b | 2012-02-02 16:37:18 -0800 | [diff] [blame] | 206 | return |
| 207 | |
| 208 | label_names = [] |
| 209 | for label in experiment.labels: |
| 210 | label_names.append(label.name) |
| 211 | subject = "%s: %s" % (experiment.name, " vs. ".join(label_names)) |
| 212 | |
Ahmad Sharif | f395c26 | 2012-10-09 17:48:09 -0700 | [diff] [blame] | 213 | text_report = TextResultsReport(experiment, True).GetReport() |
cmtice | 7658aec | 2015-05-22 11:36:35 -0700 | [diff] [blame] | 214 | text_report += ("\nResults are stored in %s.\n" % |
| 215 | experiment.results_directory) |
Ahmad Sharif | 0dcbc4b | 2012-02-02 16:37:18 -0800 | [diff] [blame] | 216 | text_report = "<pre style='font-size: 13px'>%s</pre>" % text_report |
| 217 | html_report = HTMLResultsReport(experiment).GetReport() |
| 218 | attachment = EmailSender.Attachment("report.html", html_report) |
Ahmad Sharif | f395c26 | 2012-10-09 17:48:09 -0700 | [diff] [blame] | 219 | email_to = [getpass.getuser()] + experiment.email_to |
| 220 | EmailSender().SendEmail(email_to, |
Ahmad Sharif | 0dcbc4b | 2012-02-02 16:37:18 -0800 | [diff] [blame] | 221 | subject, |
| 222 | text_report, |
| 223 | attachments=[attachment], |
| 224 | msg_type="html") |
| 225 | |
Luis Lozano | db77ec3 | 2015-10-20 14:22:54 -0700 | [diff] [blame] | 226 | def _StoreResults(self, experiment): |
Ahmad Sharif | 0dcbc4b | 2012-02-02 16:37:18 -0800 | [diff] [blame] | 227 | if self._terminated: |
| 228 | return |
| 229 | results_directory = experiment.results_directory |
| 230 | FileUtils().RmDir(results_directory) |
| 231 | FileUtils().MkDirP(results_directory) |
cmtice | 666d175 | 2014-04-24 09:08:22 -0700 | [diff] [blame] | 232 | self.l.LogOutput("Storing experiment file in %s." % results_directory) |
Ahmad Sharif | 0dcbc4b | 2012-02-02 16:37:18 -0800 | [diff] [blame] | 233 | experiment_file_path = os.path.join(results_directory, |
| 234 | "experiment.exp") |
| 235 | FileUtils().WriteFile(experiment_file_path, experiment.experiment_file) |
| 236 | |
cmtice | 666d175 | 2014-04-24 09:08:22 -0700 | [diff] [blame] | 237 | self.l.LogOutput("Storing results report in %s." % results_directory) |
Ahmad Sharif | 0dcbc4b | 2012-02-02 16:37:18 -0800 | [diff] [blame] | 238 | results_table_path = os.path.join(results_directory, "results.html") |
| 239 | report = HTMLResultsReport(experiment).GetReport() |
Caroline Tice | ef4ca8a | 2015-08-25 12:53:38 -0700 | [diff] [blame] | 240 | if self.json_report: |
| 241 | JSONResultsReport(experiment).GetReport(results_directory) |
Ahmad Sharif | 0dcbc4b | 2012-02-02 16:37:18 -0800 | [diff] [blame] | 242 | FileUtils().WriteFile(results_table_path, report) |
| 243 | |
cmtice | 7658aec | 2015-05-22 11:36:35 -0700 | [diff] [blame] | 244 | self.l.LogOutput("Storing email message body in %s." % results_directory) |
| 245 | msg_file_path = os.path.join(results_directory, "msg_body.html") |
| 246 | text_report = TextResultsReport(experiment, True).GetReport() |
| 247 | text_report += ("\nResults are stored in %s.\n" % |
| 248 | experiment.results_directory) |
| 249 | msg_body = "<pre style='font-size: 13px'>%s</pre>" % text_report |
| 250 | FileUtils().WriteFile(msg_file_path, msg_body) |
| 251 | |
Ahmad Sharif | 0dcbc4b | 2012-02-02 16:37:18 -0800 | [diff] [blame] | 252 | self.l.LogOutput("Storing results of each benchmark run.") |
| 253 | for benchmark_run in experiment.benchmark_runs: |
Ahmad Sharif | f395c26 | 2012-10-09 17:48:09 -0700 | [diff] [blame] | 254 | if benchmark_run.result: |
| 255 | benchmark_run_name = filter(str.isalnum, benchmark_run.name) |
| 256 | benchmark_run_path = os.path.join(results_directory, |
| 257 | benchmark_run_name) |
| 258 | benchmark_run.result.CopyResultsTo(benchmark_run_path) |
Luis Lozano | f81680c | 2013-03-15 14:44:13 -0700 | [diff] [blame] | 259 | benchmark_run.result.CleanUp(benchmark_run.benchmark.rm_chroot_tmp) |
Ahmad Sharif | 0dcbc4b | 2012-02-02 16:37:18 -0800 | [diff] [blame] | 260 | |
| 261 | def Run(self): |
Luis Lozano | 45b53c5 | 2015-09-30 11:36:27 -0700 | [diff] [blame] | 262 | try: |
| 263 | self._Run(self._experiment) |
| 264 | finally: |
| 265 | # Always print the report at the end of the run. |
| 266 | self._PrintTable(self._experiment) |
| 267 | if not self._terminated: |
| 268 | self._StoreResults(self._experiment) |
| 269 | self._Email(self._experiment) |
Ahmad Sharif | 0dcbc4b | 2012-02-02 16:37:18 -0800 | [diff] [blame] | 270 | |
| 271 | |
| 272 | class MockExperimentRunner(ExperimentRunner): |
Ahmad Sharif | 4467f00 | 2012-12-20 12:09:49 -0800 | [diff] [blame] | 273 | """Mocked ExperimentRunner for testing.""" |
| 274 | |
Caroline Tice | 6e8726d | 2015-12-09 12:42:13 -0800 | [diff] [blame] | 275 | def __init__(self, experiment, json_report): |
| 276 | super(MockExperimentRunner, self).__init__(experiment, json_report) |
Ahmad Sharif | 0dcbc4b | 2012-02-02 16:37:18 -0800 | [diff] [blame] | 277 | |
| 278 | def _Run(self, experiment): |
| 279 | self.l.LogOutput("Would run the following experiment: '%s'." % |
| 280 | experiment.name) |
| 281 | |
| 282 | def _PrintTable(self, experiment): |
| 283 | self.l.LogOutput("Would print the experiment table.") |
| 284 | |
| 285 | def _Email(self, experiment): |
| 286 | self.l.LogOutput("Would send result email.") |
| 287 | |
| 288 | def _StoreResults(self, experiment): |
| 289 | self.l.LogOutput("Would store the results.") |