Dan Shi | 691925f | 2014-06-23 10:30:16 -0700 | [diff] [blame] | 1 | #! /usr/bin/python |
| 2 | |
| 3 | # The tool gathers the time used by special tasks/test jobs to a tab-separated |
| 4 | # output. |
| 5 | # Import the output to google spreadsheet can generate a sheet like this: |
| 6 | # https://docs.google.com/a/google.com/spreadsheets/d/ |
| 7 | # 1iLPSRAgSVz9rGVusTb2yaHJ88iv0QY3hZI_ZI-RdatI/edit#gid=51630294 |
| 8 | |
| 9 | |
| 10 | import os |
| 11 | import argparse |
| 12 | import datetime as datetime_base |
| 13 | from datetime import datetime |
| 14 | import logging |
| 15 | import re |
| 16 | import sys |
| 17 | |
| 18 | import common |
| 19 | from autotest_lib.client.common_lib import global_config |
| 20 | #from autotest_lib.contrib import sheriff_servo |
| 21 | import sheriff_servo |
| 22 | from autotest_lib.frontend import setup_django_environment |
| 23 | from autotest_lib.frontend.afe import models |
| 24 | from autotest_lib.frontend.tko import models as tko_models |
| 25 | from autotest_lib.server.cros.dynamic_suite import job_status |
| 26 | |
| 27 | |
| 28 | CONFIG = global_config.global_config |
| 29 | AUTOTEST_SERVER = CONFIG.get_config_value('SERVER', 'hostname', type=str) |
| 30 | |
| 31 | LOG_BASE_URL = 'http://%s/tko/retrieve_logs.cgi?job=/results/' % AUTOTEST_SERVER |
| 32 | JOB_URL = LOG_BASE_URL + '%(job_id)s-%(owner)s/%(hostname)s' |
| 33 | LOG_PATH_FMT = 'hosts/%(hostname)s/%(task_id)d-%(taskname)s' |
| 34 | TASK_URL = LOG_BASE_URL + LOG_PATH_FMT |
| 35 | AUTOSERV_DEBUG_LOG = 'debug/autoserv.DEBUG' |
| 36 | HYPERLINK = '=HYPERLINK("""%s""","""%0.1f""")' |
| 37 | |
| 38 | # A cache of special tasks, hostname:[task] |
| 39 | tasks_cache = {} |
| 40 | |
| 41 | class task_runtime(object): |
| 42 | """Details about the task run time. |
| 43 | """ |
| 44 | def __init__(self, task): |
| 45 | """Init task_runtime |
| 46 | """ |
| 47 | # Special task ID |
| 48 | self.id = task.id |
| 49 | # AFE special_task model |
| 50 | self.task = task |
| 51 | self.hostname = task.host.hostname |
| 52 | |
| 53 | # Link to log |
| 54 | task_info = {'task_id': task.id, 'taskname': task.task.lower(), |
| 55 | 'hostname': self.hostname} |
| 56 | self.log = TASK_URL % task_info |
| 57 | |
| 58 | autoserv_log_url = '%s/%s' % (self.log, AUTOSERV_DEBUG_LOG) |
| 59 | log_path = LOG_PATH_FMT % task_info |
| 60 | autoserv_log_path = '%s/%s' % (log_path, AUTOSERV_DEBUG_LOG) |
| 61 | debug_log = sheriff_servo.Failure.get_debug_log(autoserv_log_url, |
| 62 | autoserv_log_path) |
| 63 | lines = [line for line in debug_log if line] |
| 64 | # Task start time |
| 65 | self.start_time = _get_timestamp(lines[0]) |
| 66 | # Task end time |
| 67 | self.end_time = _get_timestamp(lines[-1]) |
| 68 | # Run time across end of a year. |
| 69 | if self.end_time < self.start_time: |
| 70 | self.end_time = self.end_time.replace(year=self.end_time.year+1) |
| 71 | self.time_used = self.end_time - self.start_time |
| 72 | |
| 73 | # Start/end time from afe_special_tasks table. |
| 74 | # To support old special tasks, the value falls back to the ones from |
| 75 | # debug log. |
| 76 | self.start_time_db = (self.task.time_started if self.task.time_started |
| 77 | else self.start_time) |
| 78 | self.end_time_db = (self.task.time_finished if self.task.time_finished |
| 79 | else self.end_time) |
| 80 | self.time_used_db = self.end_time_db - self.start_time_db |
| 81 | |
| 82 | |
| 83 | class job_runtime(object): |
| 84 | """Details about the job run time, including time spent on special tasks. |
| 85 | """ |
| 86 | def __init__(self, job, suite_start_time=None, suite_end_time=None): |
| 87 | """Init job_run_time |
| 88 | """ |
| 89 | # AFE job ID |
| 90 | self.id = job.id |
| 91 | # AFE job model |
| 92 | self.job = job |
| 93 | # Name of the job, strip all build and suite info. |
| 94 | self.name = job.name.split('/')[-1] |
| 95 | |
| 96 | try: |
| 97 | self.tko_job = tko_models.Job.objects.filter(afe_job_id=self.id)[0] |
| 98 | self.host_id = self.tko_job.machine_id |
| 99 | self.hostname = self.tko_job.machine.hostname |
| 100 | # Job start time |
| 101 | self.start_time = self.tko_job.started_time |
| 102 | # Job end time |
| 103 | self.end_time = self.tko_job.finished_time |
| 104 | self.time_used = self.end_time - self.start_time |
| 105 | except IndexError: |
| 106 | self.tko_job = None |
| 107 | self.host_id = 0 |
| 108 | self.time_used = 0 |
| 109 | |
| 110 | # Link to log |
| 111 | self.log = JOB_URL % {'job_id': job.id, 'owner': job.owner, |
| 112 | 'hostname': self.hostname} |
| 113 | |
| 114 | # Special tasks run before job starts. |
| 115 | self.tasks_before = [] |
| 116 | # Special tasks run after job finished. |
| 117 | self.tasks_after = [] |
| 118 | |
| 119 | # If job time used is not calculated, skip locating special tasks. |
| 120 | if not self.time_used: |
| 121 | return |
| 122 | |
| 123 | # suite job has no special tasks |
| 124 | if not self.hostname: |
| 125 | return |
| 126 | |
| 127 | tasks = tasks_cache.get(self.hostname, None) |
| 128 | if not tasks: |
| 129 | tasks = models.SpecialTask.objects.filter( |
| 130 | host__hostname=self.hostname, |
| 131 | time_started__gte=suite_start_time, |
| 132 | time_started__lte=suite_end_time) |
| 133 | tasks_cache[self.hostname] = tasks |
| 134 | for task in tasks: |
| 135 | if not task.queue_entry or task.queue_entry.job_id != self.id: |
| 136 | continue |
| 137 | t_runtime = task_runtime(task) |
| 138 | if task.time_started < self.start_time: |
| 139 | self.tasks_before.append(t_runtime) |
| 140 | else: |
| 141 | self.tasks_after.append(t_runtime) |
| 142 | |
| 143 | |
| 144 | def get_all_tasks(self): |
| 145 | return self.tasks_before + self.tasks_after |
| 146 | |
| 147 | |
| 148 | def get_first_task_start_time(self): |
| 149 | """Get the start time of first task, return test job start time if |
| 150 | there is no tasks_before |
| 151 | """ |
| 152 | start = self.start_time |
| 153 | for task in self.tasks_before: |
| 154 | if task.start_time_db < start: |
| 155 | start = task.start_time_db |
| 156 | return start |
| 157 | |
| 158 | |
| 159 | def get_last_task_end_time(self): |
| 160 | """Get the end time of last task, return test job end time if there is |
| 161 | no tasks_after. |
| 162 | """ |
| 163 | end = self.end_time |
| 164 | for task in self.tasks_after: |
| 165 | if end < task.end_time_db: |
| 166 | end = task.end_time_db |
| 167 | return end |
| 168 | |
| 169 | |
| 170 | def get_total_time(self): |
| 171 | return (self.get_last_task_end_time() - |
| 172 | self.get_first_task_start_time()).total_seconds() |
| 173 | |
| 174 | |
| 175 | def get_time_on_tasks(self): |
| 176 | time = 0 |
| 177 | for task in self.tasks_before + self.tasks_after: |
| 178 | time += task.time_used.total_seconds() |
| 179 | return time |
| 180 | |
| 181 | |
| 182 | def get_time_on_tasks_from_db(self): |
| 183 | time = 0 |
| 184 | for task in self.tasks_before + self.tasks_after: |
| 185 | time += task.time_used_db.total_seconds() |
| 186 | return time |
| 187 | |
| 188 | |
| 189 | def get_time_on_wait(self): |
| 190 | return (self.get_total_time() - |
| 191 | self.get_time_on_tasks() - |
| 192 | self.time_used.total_seconds()) |
| 193 | |
| 194 | |
| 195 | def get_time_on_wait_from_db(self): |
| 196 | return (self.get_total_time() - |
| 197 | self.get_time_on_tasks_from_db() - |
| 198 | self.time_used.total_seconds()) |
| 199 | |
| 200 | |
| 201 | def get_time_per_task_type(self, task_type): |
| 202 | """only used for suite job |
| 203 | """ |
| 204 | tasks = [] |
| 205 | for job in self.test_jobs: |
| 206 | tasks += [task for task in job.get_all_tasks() |
| 207 | if task.task.task == task_type] |
| 208 | if not tasks: |
| 209 | return None |
| 210 | |
| 211 | task_min = tasks[0] |
| 212 | task_max = tasks[0] |
| 213 | total = 0 |
| 214 | for task in tasks: |
| 215 | if task.time_used < task_min.time_used: |
| 216 | task_min = task |
| 217 | if task.time_used > task_max.time_used: |
| 218 | task_max = task |
| 219 | total += task.time_used.total_seconds() |
| 220 | return (task_min, task_max, total/len(tasks), len(tasks)) |
| 221 | |
| 222 | |
| 223 | def get_time_per_task_type_from_db(self, task_type): |
| 224 | """only used for suite job |
| 225 | """ |
| 226 | tasks = [] |
| 227 | for job in self.test_jobs: |
| 228 | tasks += [task for task in job.get_all_tasks() |
| 229 | if task.task.task == task_type] |
| 230 | if not tasks: |
| 231 | return None |
| 232 | |
| 233 | task_min = tasks[0] |
| 234 | task_max = tasks[0] |
| 235 | total = 0 |
| 236 | for task in tasks: |
| 237 | if task.time_used_db < task_min.time_used_db: |
| 238 | task_min = task |
| 239 | if task.time_used_db > task_max.time_used_db: |
| 240 | task_max = task |
| 241 | total += task.time_used_db.total_seconds() |
| 242 | return (task_min, task_max, total/len(tasks), len(tasks)) |
| 243 | |
| 244 | |
| 245 | def _get_timestamp(line): |
| 246 | """Get the time from the beginning of a log entry. |
| 247 | |
| 248 | @param line: A line of log entry, e.g., "06/19 19:56:53.602 INFO |" |
| 249 | @return: A datetime value of the log entry. |
| 250 | """ |
| 251 | try: |
| 252 | time = re.match( '^(\d\d\/\d\d \d\d:\d\d:\d\d.\d+).*', line).group(1) |
| 253 | time = '%d/%s' % (datetime.now().year, time) |
| 254 | return datetime.strptime(time, '%Y/%m/%d %H:%M:%S.%f') |
| 255 | except: |
| 256 | return None |
| 257 | |
| 258 | |
| 259 | def _parse_args(args): |
| 260 | if not args: |
| 261 | print 'Try ./contrib/compare_suite.py --jobs 51,52,53' |
| 262 | sys.exit(0) |
| 263 | parser = argparse.ArgumentParser( |
| 264 | description=('A script to compare the performance of multiple suite' |
| 265 | ' runs.')) |
| 266 | parser.add_argument('--jobs', |
| 267 | help='A list of job IDs.') |
| 268 | return parser.parse_args(args) |
| 269 | |
| 270 | |
| 271 | def merge_time_dict(time_dict_list): |
| 272 | merged = {} |
| 273 | for time_dict in time_dict_list: |
| 274 | for key,val in time_dict.items(): |
| 275 | time_used = merged.get(key, 0) |
| 276 | merged[key] = time_used + val |
| 277 | return merged |
| 278 | |
| 279 | |
| 280 | def print_task_data(all_jobs, time_data): |
| 281 | percent_string = delimiter.join( |
| 282 | [str(100.0*data[2]*data[3]/suite_job.total_time) |
| 283 | if data else '_' for (data, suite_job) in |
| 284 | zip(time_data, all_jobs.keys())]) |
| 285 | print '%% on %s %s%s' % (task_type, delimiter, percent_string) |
| 286 | min_string = delimiter.join( |
| 287 | [(HYPERLINK % (data[0].log, data[0].time_used.total_seconds())) |
| 288 | if data else '_' for data in time_data]) |
| 289 | print ' %s min (seconds)%s%s' % (task_type, delimiter, min_string) |
| 290 | max_string = delimiter.join( |
| 291 | [HYPERLINK % (data[1].log, data[1].time_used.total_seconds()) |
| 292 | if data else '_' for data in time_data]) |
| 293 | print ' %s max (seconds)%s%s' % (task_type, delimiter, max_string) |
| 294 | ave_string = delimiter.join( |
| 295 | [str(data[2]) if data else '_' for data in time_data]) |
| 296 | print ' %s average (seconds)%s%s' % (task_type, delimiter, ave_string) |
| 297 | |
| 298 | |
| 299 | if __name__ == '__main__': |
| 300 | args = _parse_args(sys.argv[1:]) |
| 301 | print 'Comparing jobs: %s' % args.jobs |
| 302 | job_ids = [int(id) for id in args.jobs.split(',')] |
| 303 | |
| 304 | # Make sure all jobs are suite jobs |
| 305 | parent_jobs = [job.parent_job_id for job in |
| 306 | models.Job.objects.filter(id__in=job_ids)] |
| 307 | if any(parent_jobs): |
| 308 | print ('Some jobs are not suite job. Please provide a list of suite job' |
| 309 | ' IDs.') |
| 310 | sys.exit(1) |
| 311 | |
| 312 | # A dictionary of suite_job_runtime:{test_job_name:test_job_runtime} |
| 313 | all_jobs = {} |
| 314 | for job_id in job_ids: |
| 315 | suite_job = models.Job.objects.filter(id=job_id)[0] |
| 316 | suite_job_runtime = job_runtime(suite_job) |
| 317 | test_jobs = models.Job.objects.filter(parent_job_id=job_id) |
| 318 | if len(test_jobs) == 0: |
| 319 | print 'No child job found for suite job: %d' % job_id |
| 320 | sys.exit(1) |
| 321 | test_job_runtimes = [job_runtime(job, suite_job_runtime.start_time, |
| 322 | suite_job_runtime.end_time) |
| 323 | for job in test_jobs] |
| 324 | suite_job_runtime.test_jobs = test_job_runtimes |
| 325 | suite_job_runtime.hosts = set([job.host_id |
| 326 | for job in test_job_runtimes |
| 327 | if job.host_id != 0]) |
| 328 | suite_job_runtime.total_time = sum( |
| 329 | [job.get_total_time() for job in test_job_runtimes]) |
| 330 | suite_job_runtime.total_time_on_tasks = sum( |
| 331 | [job.get_time_on_tasks() for job in test_job_runtimes]) |
| 332 | suite_job_runtime.total_time_on_tasks_from_db = sum( |
| 333 | [job.get_time_on_tasks_from_db() for job in test_job_runtimes]) |
| 334 | suite_job_runtime.total_time_on_wait = sum( |
| 335 | [job.get_time_on_wait() for job in test_job_runtimes]) |
| 336 | suite_job_runtime.total_time_on_wait_from_db = sum( |
| 337 | [job.get_time_on_wait_from_db() for job in test_job_runtimes]) |
| 338 | suite_job_runtime.percent_on_tasks = 100*( |
| 339 | suite_job_runtime.total_time_on_tasks / |
| 340 | suite_job_runtime.total_time) |
| 341 | suite_job_runtime.percent_on_wait = 100*( |
| 342 | suite_job_runtime.total_time_on_wait / |
| 343 | suite_job_runtime.total_time) |
| 344 | suite_job_runtime.percent_on_tasks_from_db = 100*( |
| 345 | suite_job_runtime.total_time_on_tasks_from_db / |
| 346 | suite_job_runtime.total_time) |
| 347 | suite_job_runtime.percent_on_wait_from_db = 100*( |
| 348 | suite_job_runtime.total_time_on_wait_from_db / |
| 349 | suite_job_runtime.total_time) |
| 350 | all_jobs[suite_job_runtime] = {r.name:r for r in test_job_runtimes} |
| 351 | |
| 352 | delimiter = '\t' |
| 353 | # Print a row of suite job IDs. |
| 354 | print ('job ID%s' % delimiter + |
| 355 | delimiter.join([str(suite_job.id) |
| 356 | for suite_job in all_jobs.keys()])) |
| 357 | |
| 358 | # Print a row of platforms, e.g., lumpy-release. |
| 359 | print ('platform%s' % delimiter + |
| 360 | delimiter.join([suite_job.job.name.split('/')[0] |
| 361 | for suite_job in all_jobs.keys()])) |
| 362 | |
| 363 | # Print time to run suite |
| 364 | print ('time(mins)%s' % delimiter + |
| 365 | delimiter.join([str(suite_job.time_used.total_seconds()/60) |
| 366 | for suite_job in all_jobs.keys()])) |
| 367 | |
| 368 | # Print percent of time on tasks |
| 369 | print ('%% on special tasks%s' % delimiter + |
| 370 | delimiter.join([str(suite_job.percent_on_tasks) |
| 371 | for suite_job in all_jobs.keys()])) |
| 372 | |
| 373 | # Print percent of time on tasks based on time data from afe_special_tasks |
| 374 | print ('%% on special tasks with data from DB%s' % delimiter + |
| 375 | delimiter.join([str(suite_job.percent_on_tasks_from_db) |
| 376 | for suite_job in all_jobs.keys()])) |
| 377 | |
| 378 | # Print percent of time on tasks per task type |
| 379 | all_task_types = set() |
| 380 | for suite_job in all_jobs.keys(): |
| 381 | for job in suite_job.test_jobs: |
| 382 | all_task_types.update( |
| 383 | [task.task.task for task in job.get_all_tasks()]) |
| 384 | for task_type in all_task_types: |
| 385 | print 'Time data retrieved from debug log.' |
| 386 | time_data = [suite_job.get_time_per_task_type(task_type) |
| 387 | for suite_job in all_jobs.keys()] |
| 388 | print_task_data(all_jobs, time_data) |
| 389 | |
| 390 | print 'Time data retrieved from database' |
| 391 | time_data = [suite_job.get_time_per_task_type_from_db(task_type) |
| 392 | for suite_job in all_jobs.keys()] |
| 393 | print_task_data(all_jobs, time_data) |
| 394 | |
| 395 | count_string = delimiter.join( |
| 396 | [str(data[3]) if data else '_' for data in time_data]) |
| 397 | print ' %s count%s%s' % (task_type, delimiter, count_string) |
| 398 | |
| 399 | # Print percent of time on wait |
| 400 | print ('%% on wait%s' % delimiter + |
| 401 | delimiter.join([str(suite_job.percent_on_wait) |
| 402 | for suite_job in all_jobs.keys()])) |
| 403 | |
| 404 | # Print percent of time on wait based on special task time data from |
| 405 | # database |
| 406 | print ('%% on wait based on time data from db%s' % delimiter + |
| 407 | delimiter.join([str(suite_job.percent_on_wait_from_db) |
| 408 | for suite_job in all_jobs.keys()])) |
| 409 | |
| 410 | # Print the number of duts used for suite. |
| 411 | print ('dut #%s' % delimiter + |
| 412 | delimiter.join([str(len(suite_job.hosts)) |
| 413 | for suite_job in all_jobs.keys()])) |
| 414 | |
| 415 | for test_name in [job for job in all_jobs.values()[0].keys()]: |
| 416 | line = '%s%s' % (test_name, delimiter) |
| 417 | for suite_job in all_jobs.keys(): |
| 418 | test_job = all_jobs[suite_job].get(test_name) |
| 419 | if test_job: |
| 420 | line += (HYPERLINK % |
| 421 | (test_job.log, test_job.time_used.total_seconds()) |
| 422 | + delimiter) |
| 423 | else: |
| 424 | line += '_%s' % delimiter |
| 425 | print line |