Mike Frysinger | 0e2cb7a | 2019-08-20 17:04:52 -0400 | [diff] [blame] | 1 | #!/usr/bin/python2 |
Dan Shi | 691925f | 2014-06-23 10:30:16 -0700 | [diff] [blame] | 2 | |
| 3 | # The tool gathers the time used by special tasks/test jobs to a tab-separated |
| 4 | # output. |
| 5 | # Import the output to google spreadsheet can generate a sheet like this: |
| 6 | # https://docs.google.com/a/google.com/spreadsheets/d/ |
| 7 | # 1iLPSRAgSVz9rGVusTb2yaHJ88iv0QY3hZI_ZI-RdatI/edit#gid=51630294 |
| 8 | |
| 9 | |
| 10 | import os |
| 11 | import argparse |
Dan Shi | 691925f | 2014-06-23 10:30:16 -0700 | [diff] [blame] | 12 | from datetime import datetime |
Dan Shi | 691925f | 2014-06-23 10:30:16 -0700 | [diff] [blame] | 13 | import re |
Dan Shi | 322f4d1 | 2015-10-01 11:08:54 -0700 | [diff] [blame] | 14 | import subprocess |
Dan Shi | 691925f | 2014-06-23 10:30:16 -0700 | [diff] [blame] | 15 | import sys |
Dan Shi | 322f4d1 | 2015-10-01 11:08:54 -0700 | [diff] [blame] | 16 | import urllib2 |
Dan Shi | 691925f | 2014-06-23 10:30:16 -0700 | [diff] [blame] | 17 | |
| 18 | import common |
| 19 | from autotest_lib.client.common_lib import global_config |
Dan Shi | 691925f | 2014-06-23 10:30:16 -0700 | [diff] [blame] | 20 | from autotest_lib.frontend import setup_django_environment |
| 21 | from autotest_lib.frontend.afe import models |
| 22 | from autotest_lib.frontend.tko import models as tko_models |
| 23 | from autotest_lib.server.cros.dynamic_suite import job_status |
| 24 | |
| 25 | |
| 26 | CONFIG = global_config.global_config |
| 27 | AUTOTEST_SERVER = CONFIG.get_config_value('SERVER', 'hostname', type=str) |
| 28 | |
| 29 | LOG_BASE_URL = 'http://%s/tko/retrieve_logs.cgi?job=/results/' % AUTOTEST_SERVER |
| 30 | JOB_URL = LOG_BASE_URL + '%(job_id)s-%(owner)s/%(hostname)s' |
| 31 | LOG_PATH_FMT = 'hosts/%(hostname)s/%(task_id)d-%(taskname)s' |
| 32 | TASK_URL = LOG_BASE_URL + LOG_PATH_FMT |
| 33 | AUTOSERV_DEBUG_LOG = 'debug/autoserv.DEBUG' |
Dan Shi | 322f4d1 | 2015-10-01 11:08:54 -0700 | [diff] [blame] | 34 | HYPERLINK = '=HYPERLINK("%s","%0.1f")' |
| 35 | |
| 36 | GS_URI = 'gs://chromeos-autotest-results' |
Dan Shi | 691925f | 2014-06-23 10:30:16 -0700 | [diff] [blame] | 37 | |
| 38 | # A cache of special tasks, hostname:[task] |
| 39 | tasks_cache = {} |
| 40 | |
Dan Shi | 322f4d1 | 2015-10-01 11:08:54 -0700 | [diff] [blame] | 41 | def get_debug_log(autoserv_log_url, autoserv_log_path): |
| 42 | """Get a list of strings or a stream for autoserv.DEBUG log file. |
| 43 | |
| 44 | @param autoserv_log_url: Url to the repair job's autoserv.DEBUG log. |
| 45 | @param autoserv_log_path: Path to autoserv.DEBUG log, e.g., |
| 46 | hosts/hostname/job_id-repair/debug/autoserv.DEBUG. |
| 47 | @return: A list of string if debug log was moved to GS already, or a |
| 48 | stream of the autoserv.DEBUG file. |
| 49 | """ |
| 50 | url = urllib2.urlopen(autoserv_log_url).geturl() |
| 51 | if not 'accounts.google.com' in url: |
| 52 | return urllib2.urlopen(url) |
| 53 | else: |
| 54 | # The file was moved to Google storage already, read the file from GS. |
| 55 | debug_log_link = '%s/%s' % (GS_URI, autoserv_log_path) |
| 56 | cmd = 'gsutil cat %s' % debug_log_link |
| 57 | proc = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, |
| 58 | stderr=subprocess.PIPE) |
| 59 | stdout, stderr = proc.communicate() |
| 60 | if proc.returncode == 0: |
| 61 | return stdout.split(os.linesep) |
| 62 | else: |
| 63 | print 'Failed to read %s: %s' % (debug_log_link, stderr) |
| 64 | |
| 65 | |
Dan Shi | 691925f | 2014-06-23 10:30:16 -0700 | [diff] [blame] | 66 | class task_runtime(object): |
| 67 | """Details about the task run time. |
| 68 | """ |
| 69 | def __init__(self, task): |
| 70 | """Init task_runtime |
| 71 | """ |
| 72 | # Special task ID |
| 73 | self.id = task.id |
| 74 | # AFE special_task model |
| 75 | self.task = task |
| 76 | self.hostname = task.host.hostname |
| 77 | |
| 78 | # Link to log |
| 79 | task_info = {'task_id': task.id, 'taskname': task.task.lower(), |
| 80 | 'hostname': self.hostname} |
| 81 | self.log = TASK_URL % task_info |
| 82 | |
| 83 | autoserv_log_url = '%s/%s' % (self.log, AUTOSERV_DEBUG_LOG) |
| 84 | log_path = LOG_PATH_FMT % task_info |
| 85 | autoserv_log_path = '%s/%s' % (log_path, AUTOSERV_DEBUG_LOG) |
Dan Shi | 322f4d1 | 2015-10-01 11:08:54 -0700 | [diff] [blame] | 86 | debug_log = get_debug_log(autoserv_log_url, autoserv_log_path) |
Dan Shi | 691925f | 2014-06-23 10:30:16 -0700 | [diff] [blame] | 87 | lines = [line for line in debug_log if line] |
| 88 | # Task start time |
| 89 | self.start_time = _get_timestamp(lines[0]) |
| 90 | # Task end time |
| 91 | self.end_time = _get_timestamp(lines[-1]) |
| 92 | # Run time across end of a year. |
| 93 | if self.end_time < self.start_time: |
| 94 | self.end_time = self.end_time.replace(year=self.end_time.year+1) |
| 95 | self.time_used = self.end_time - self.start_time |
| 96 | |
| 97 | # Start/end time from afe_special_tasks table. |
| 98 | # To support old special tasks, the value falls back to the ones from |
| 99 | # debug log. |
| 100 | self.start_time_db = (self.task.time_started if self.task.time_started |
| 101 | else self.start_time) |
| 102 | self.end_time_db = (self.task.time_finished if self.task.time_finished |
| 103 | else self.end_time) |
| 104 | self.time_used_db = self.end_time_db - self.start_time_db |
| 105 | |
| 106 | |
| 107 | class job_runtime(object): |
| 108 | """Details about the job run time, including time spent on special tasks. |
| 109 | """ |
| 110 | def __init__(self, job, suite_start_time=None, suite_end_time=None): |
| 111 | """Init job_run_time |
| 112 | """ |
| 113 | # AFE job ID |
| 114 | self.id = job.id |
| 115 | # AFE job model |
| 116 | self.job = job |
| 117 | # Name of the job, strip all build and suite info. |
| 118 | self.name = job.name.split('/')[-1] |
| 119 | |
| 120 | try: |
| 121 | self.tko_job = tko_models.Job.objects.filter(afe_job_id=self.id)[0] |
| 122 | self.host_id = self.tko_job.machine_id |
| 123 | self.hostname = self.tko_job.machine.hostname |
| 124 | # Job start time |
| 125 | self.start_time = self.tko_job.started_time |
| 126 | # Job end time |
| 127 | self.end_time = self.tko_job.finished_time |
| 128 | self.time_used = self.end_time - self.start_time |
| 129 | except IndexError: |
| 130 | self.tko_job = None |
| 131 | self.host_id = 0 |
| 132 | self.time_used = 0 |
| 133 | |
| 134 | # Link to log |
| 135 | self.log = JOB_URL % {'job_id': job.id, 'owner': job.owner, |
| 136 | 'hostname': self.hostname} |
| 137 | |
| 138 | # Special tasks run before job starts. |
| 139 | self.tasks_before = [] |
| 140 | # Special tasks run after job finished. |
| 141 | self.tasks_after = [] |
| 142 | |
| 143 | # If job time used is not calculated, skip locating special tasks. |
| 144 | if not self.time_used: |
| 145 | return |
| 146 | |
| 147 | # suite job has no special tasks |
| 148 | if not self.hostname: |
| 149 | return |
| 150 | |
| 151 | tasks = tasks_cache.get(self.hostname, None) |
| 152 | if not tasks: |
| 153 | tasks = models.SpecialTask.objects.filter( |
| 154 | host__hostname=self.hostname, |
| 155 | time_started__gte=suite_start_time, |
| 156 | time_started__lte=suite_end_time) |
| 157 | tasks_cache[self.hostname] = tasks |
| 158 | for task in tasks: |
| 159 | if not task.queue_entry or task.queue_entry.job_id != self.id: |
| 160 | continue |
| 161 | t_runtime = task_runtime(task) |
| 162 | if task.time_started < self.start_time: |
| 163 | self.tasks_before.append(t_runtime) |
| 164 | else: |
| 165 | self.tasks_after.append(t_runtime) |
| 166 | |
| 167 | |
| 168 | def get_all_tasks(self): |
| 169 | return self.tasks_before + self.tasks_after |
| 170 | |
| 171 | |
| 172 | def get_first_task_start_time(self): |
| 173 | """Get the start time of first task, return test job start time if |
| 174 | there is no tasks_before |
| 175 | """ |
| 176 | start = self.start_time |
| 177 | for task in self.tasks_before: |
| 178 | if task.start_time_db < start: |
| 179 | start = task.start_time_db |
| 180 | return start |
| 181 | |
| 182 | |
| 183 | def get_last_task_end_time(self): |
| 184 | """Get the end time of last task, return test job end time if there is |
| 185 | no tasks_after. |
| 186 | """ |
| 187 | end = self.end_time |
| 188 | for task in self.tasks_after: |
| 189 | if end < task.end_time_db: |
| 190 | end = task.end_time_db |
| 191 | return end |
| 192 | |
| 193 | |
| 194 | def get_total_time(self): |
Dan Shi | 322f4d1 | 2015-10-01 11:08:54 -0700 | [diff] [blame] | 195 | return (self.get_last_task_end_time() - |
| 196 | self.get_first_task_start_time()).total_seconds() |
Dan Shi | 691925f | 2014-06-23 10:30:16 -0700 | [diff] [blame] | 197 | |
| 198 | |
| 199 | def get_time_on_tasks(self): |
Dan Shi | 322f4d1 | 2015-10-01 11:08:54 -0700 | [diff] [blame] | 200 | time = 0 |
| 201 | for task in self.tasks_before + self.tasks_after: |
| 202 | time += task.time_used.total_seconds() |
| 203 | return time |
Dan Shi | 691925f | 2014-06-23 10:30:16 -0700 | [diff] [blame] | 204 | |
| 205 | |
| 206 | def get_time_on_tasks_from_db(self): |
Dan Shi | 322f4d1 | 2015-10-01 11:08:54 -0700 | [diff] [blame] | 207 | time = 0 |
| 208 | for task in self.tasks_before + self.tasks_after: |
| 209 | time += task.time_used_db.total_seconds() |
| 210 | return time |
Dan Shi | 691925f | 2014-06-23 10:30:16 -0700 | [diff] [blame] | 211 | |
| 212 | |
| 213 | def get_time_on_wait(self): |
Dan Shi | 322f4d1 | 2015-10-01 11:08:54 -0700 | [diff] [blame] | 214 | return (self.get_total_time() - |
| 215 | self.get_time_on_tasks() - |
| 216 | self.time_used.total_seconds()) |
Dan Shi | 691925f | 2014-06-23 10:30:16 -0700 | [diff] [blame] | 217 | |
| 218 | |
| 219 | def get_time_on_wait_from_db(self): |
Dan Shi | 322f4d1 | 2015-10-01 11:08:54 -0700 | [diff] [blame] | 220 | return (self.get_total_time() - |
| 221 | self.get_time_on_tasks_from_db() - |
| 222 | self.time_used.total_seconds()) |
Dan Shi | 691925f | 2014-06-23 10:30:16 -0700 | [diff] [blame] | 223 | |
| 224 | |
| 225 | def get_time_per_task_type(self, task_type): |
| 226 | """only used for suite job |
| 227 | """ |
| 228 | tasks = [] |
| 229 | for job in self.test_jobs: |
| 230 | tasks += [task for task in job.get_all_tasks() |
| 231 | if task.task.task == task_type] |
| 232 | if not tasks: |
| 233 | return None |
| 234 | |
| 235 | task_min = tasks[0] |
| 236 | task_max = tasks[0] |
| 237 | total = 0 |
| 238 | for task in tasks: |
| 239 | if task.time_used < task_min.time_used: |
| 240 | task_min = task |
| 241 | if task.time_used > task_max.time_used: |
| 242 | task_max = task |
| 243 | total += task.time_used.total_seconds() |
| 244 | return (task_min, task_max, total/len(tasks), len(tasks)) |
| 245 | |
| 246 | |
| 247 | def get_time_per_task_type_from_db(self, task_type): |
| 248 | """only used for suite job |
| 249 | """ |
| 250 | tasks = [] |
| 251 | for job in self.test_jobs: |
| 252 | tasks += [task for task in job.get_all_tasks() |
| 253 | if task.task.task == task_type] |
| 254 | if not tasks: |
| 255 | return None |
| 256 | |
| 257 | task_min = tasks[0] |
| 258 | task_max = tasks[0] |
| 259 | total = 0 |
| 260 | for task in tasks: |
| 261 | if task.time_used_db < task_min.time_used_db: |
| 262 | task_min = task |
| 263 | if task.time_used_db > task_max.time_used_db: |
| 264 | task_max = task |
| 265 | total += task.time_used_db.total_seconds() |
| 266 | return (task_min, task_max, total/len(tasks), len(tasks)) |
| 267 | |
| 268 | |
| 269 | def _get_timestamp(line): |
| 270 | """Get the time from the beginning of a log entry. |
| 271 | |
| 272 | @param line: A line of log entry, e.g., "06/19 19:56:53.602 INFO |" |
| 273 | @return: A datetime value of the log entry. |
| 274 | """ |
| 275 | try: |
| 276 | time = re.match( '^(\d\d\/\d\d \d\d:\d\d:\d\d.\d+).*', line).group(1) |
| 277 | time = '%d/%s' % (datetime.now().year, time) |
| 278 | return datetime.strptime(time, '%Y/%m/%d %H:%M:%S.%f') |
| 279 | except: |
| 280 | return None |
| 281 | |
| 282 | |
| 283 | def _parse_args(args): |
| 284 | if not args: |
| 285 | print 'Try ./contrib/compare_suite.py --jobs 51,52,53' |
| 286 | sys.exit(0) |
| 287 | parser = argparse.ArgumentParser( |
| 288 | description=('A script to compare the performance of multiple suite' |
| 289 | ' runs.')) |
| 290 | parser.add_argument('--jobs', |
| 291 | help='A list of job IDs.') |
| 292 | return parser.parse_args(args) |
| 293 | |
| 294 | |
| 295 | def merge_time_dict(time_dict_list): |
| 296 | merged = {} |
| 297 | for time_dict in time_dict_list: |
| 298 | for key,val in time_dict.items(): |
| 299 | time_used = merged.get(key, 0) |
| 300 | merged[key] = time_used + val |
| 301 | return merged |
| 302 | |
| 303 | |
| 304 | def print_task_data(all_jobs, time_data): |
| 305 | percent_string = delimiter.join( |
| 306 | [str(100.0*data[2]*data[3]/suite_job.total_time) |
| 307 | if data else '_' for (data, suite_job) in |
| 308 | zip(time_data, all_jobs.keys())]) |
| 309 | print '%% on %s %s%s' % (task_type, delimiter, percent_string) |
| 310 | min_string = delimiter.join( |
| 311 | [(HYPERLINK % (data[0].log, data[0].time_used.total_seconds())) |
| 312 | if data else '_' for data in time_data]) |
| 313 | print ' %s min (seconds)%s%s' % (task_type, delimiter, min_string) |
| 314 | max_string = delimiter.join( |
| 315 | [HYPERLINK % (data[1].log, data[1].time_used.total_seconds()) |
| 316 | if data else '_' for data in time_data]) |
| 317 | print ' %s max (seconds)%s%s' % (task_type, delimiter, max_string) |
| 318 | ave_string = delimiter.join( |
| 319 | [str(data[2]) if data else '_' for data in time_data]) |
| 320 | print ' %s average (seconds)%s%s' % (task_type, delimiter, ave_string) |
| 321 | |
| 322 | |
| 323 | if __name__ == '__main__': |
| 324 | args = _parse_args(sys.argv[1:]) |
| 325 | print 'Comparing jobs: %s' % args.jobs |
| 326 | job_ids = [int(id) for id in args.jobs.split(',')] |
| 327 | |
| 328 | # Make sure all jobs are suite jobs |
| 329 | parent_jobs = [job.parent_job_id for job in |
| 330 | models.Job.objects.filter(id__in=job_ids)] |
| 331 | if any(parent_jobs): |
| 332 | print ('Some jobs are not suite job. Please provide a list of suite job' |
| 333 | ' IDs.') |
| 334 | sys.exit(1) |
| 335 | |
| 336 | # A dictionary of suite_job_runtime:{test_job_name:test_job_runtime} |
| 337 | all_jobs = {} |
| 338 | for job_id in job_ids: |
| 339 | suite_job = models.Job.objects.filter(id=job_id)[0] |
| 340 | suite_job_runtime = job_runtime(suite_job) |
| 341 | test_jobs = models.Job.objects.filter(parent_job_id=job_id) |
| 342 | if len(test_jobs) == 0: |
| 343 | print 'No child job found for suite job: %d' % job_id |
| 344 | sys.exit(1) |
| 345 | test_job_runtimes = [job_runtime(job, suite_job_runtime.start_time, |
| 346 | suite_job_runtime.end_time) |
| 347 | for job in test_jobs] |
| 348 | suite_job_runtime.test_jobs = test_job_runtimes |
| 349 | suite_job_runtime.hosts = set([job.host_id |
| 350 | for job in test_job_runtimes |
| 351 | if job.host_id != 0]) |
| 352 | suite_job_runtime.total_time = sum( |
| 353 | [job.get_total_time() for job in test_job_runtimes]) |
| 354 | suite_job_runtime.total_time_on_tasks = sum( |
| 355 | [job.get_time_on_tasks() for job in test_job_runtimes]) |
| 356 | suite_job_runtime.total_time_on_tasks_from_db = sum( |
| 357 | [job.get_time_on_tasks_from_db() for job in test_job_runtimes]) |
| 358 | suite_job_runtime.total_time_on_wait = sum( |
| 359 | [job.get_time_on_wait() for job in test_job_runtimes]) |
| 360 | suite_job_runtime.total_time_on_wait_from_db = sum( |
| 361 | [job.get_time_on_wait_from_db() for job in test_job_runtimes]) |
| 362 | suite_job_runtime.percent_on_tasks = 100*( |
| 363 | suite_job_runtime.total_time_on_tasks / |
| 364 | suite_job_runtime.total_time) |
| 365 | suite_job_runtime.percent_on_wait = 100*( |
| 366 | suite_job_runtime.total_time_on_wait / |
| 367 | suite_job_runtime.total_time) |
| 368 | suite_job_runtime.percent_on_tasks_from_db = 100*( |
| 369 | suite_job_runtime.total_time_on_tasks_from_db / |
| 370 | suite_job_runtime.total_time) |
| 371 | suite_job_runtime.percent_on_wait_from_db = 100*( |
| 372 | suite_job_runtime.total_time_on_wait_from_db / |
| 373 | suite_job_runtime.total_time) |
| 374 | all_jobs[suite_job_runtime] = {r.name:r for r in test_job_runtimes} |
| 375 | |
| 376 | delimiter = '\t' |
| 377 | # Print a row of suite job IDs. |
| 378 | print ('job ID%s' % delimiter + |
| 379 | delimiter.join([str(suite_job.id) |
| 380 | for suite_job in all_jobs.keys()])) |
| 381 | |
| 382 | # Print a row of platforms, e.g., lumpy-release. |
| 383 | print ('platform%s' % delimiter + |
| 384 | delimiter.join([suite_job.job.name.split('/')[0] |
| 385 | for suite_job in all_jobs.keys()])) |
| 386 | |
| 387 | # Print time to run suite |
| 388 | print ('time(mins)%s' % delimiter + |
| 389 | delimiter.join([str(suite_job.time_used.total_seconds()/60) |
| 390 | for suite_job in all_jobs.keys()])) |
| 391 | |
| 392 | # Print percent of time on tasks |
| 393 | print ('%% on special tasks%s' % delimiter + |
| 394 | delimiter.join([str(suite_job.percent_on_tasks) |
| 395 | for suite_job in all_jobs.keys()])) |
| 396 | |
| 397 | # Print percent of time on tasks based on time data from afe_special_tasks |
| 398 | print ('%% on special tasks with data from DB%s' % delimiter + |
| 399 | delimiter.join([str(suite_job.percent_on_tasks_from_db) |
| 400 | for suite_job in all_jobs.keys()])) |
| 401 | |
| 402 | # Print percent of time on tasks per task type |
| 403 | all_task_types = set() |
| 404 | for suite_job in all_jobs.keys(): |
| 405 | for job in suite_job.test_jobs: |
| 406 | all_task_types.update( |
| 407 | [task.task.task for task in job.get_all_tasks()]) |
| 408 | for task_type in all_task_types: |
| 409 | print 'Time data retrieved from debug log.' |
| 410 | time_data = [suite_job.get_time_per_task_type(task_type) |
| 411 | for suite_job in all_jobs.keys()] |
| 412 | print_task_data(all_jobs, time_data) |
| 413 | |
| 414 | print 'Time data retrieved from database' |
| 415 | time_data = [suite_job.get_time_per_task_type_from_db(task_type) |
| 416 | for suite_job in all_jobs.keys()] |
| 417 | print_task_data(all_jobs, time_data) |
| 418 | |
| 419 | count_string = delimiter.join( |
| 420 | [str(data[3]) if data else '_' for data in time_data]) |
| 421 | print ' %s count%s%s' % (task_type, delimiter, count_string) |
| 422 | |
| 423 | # Print percent of time on wait |
| 424 | print ('%% on wait%s' % delimiter + |
| 425 | delimiter.join([str(suite_job.percent_on_wait) |
| 426 | for suite_job in all_jobs.keys()])) |
| 427 | |
| 428 | # Print percent of time on wait based on special task time data from |
| 429 | # database |
| 430 | print ('%% on wait based on time data from db%s' % delimiter + |
| 431 | delimiter.join([str(suite_job.percent_on_wait_from_db) |
| 432 | for suite_job in all_jobs.keys()])) |
| 433 | |
| 434 | # Print the number of duts used for suite. |
| 435 | print ('dut #%s' % delimiter + |
| 436 | delimiter.join([str(len(suite_job.hosts)) |
| 437 | for suite_job in all_jobs.keys()])) |
| 438 | |
| 439 | for test_name in [job for job in all_jobs.values()[0].keys()]: |
| 440 | line = '%s%s' % (test_name, delimiter) |
| 441 | for suite_job in all_jobs.keys(): |
| 442 | test_job = all_jobs[suite_job].get(test_name) |
| 443 | if test_job: |
| 444 | line += (HYPERLINK % |
| 445 | (test_job.log, test_job.time_used.total_seconds()) |
| 446 | + delimiter) |
| 447 | else: |
| 448 | line += '_%s' % delimiter |
| 449 | print line |