blob: 16f309fcddcc99f85762f25cd68b399a2bb3c1fe [file] [log] [blame]
Dan Shi691925f2014-06-23 10:30:16 -07001#! /usr/bin/python
2
3# The tool gathers the time used by special tasks/test jobs to a tab-separated
4# output.
5# Import the output to google spreadsheet can generate a sheet like this:
6# https://docs.google.com/a/google.com/spreadsheets/d/
7# 1iLPSRAgSVz9rGVusTb2yaHJ88iv0QY3hZI_ZI-RdatI/edit#gid=51630294
8
9
10import os
11import argparse
Dan Shi691925f2014-06-23 10:30:16 -070012from datetime import datetime
Dan Shi691925f2014-06-23 10:30:16 -070013import re
Dan Shi322f4d12015-10-01 11:08:54 -070014import subprocess
Dan Shi691925f2014-06-23 10:30:16 -070015import sys
Dan Shi322f4d12015-10-01 11:08:54 -070016import urllib2
Dan Shi691925f2014-06-23 10:30:16 -070017
18import common
19from autotest_lib.client.common_lib import global_config
Dan Shi691925f2014-06-23 10:30:16 -070020from autotest_lib.frontend import setup_django_environment
21from autotest_lib.frontend.afe import models
22from autotest_lib.frontend.tko import models as tko_models
23from autotest_lib.server.cros.dynamic_suite import job_status
24
25
26CONFIG = global_config.global_config
27AUTOTEST_SERVER = CONFIG.get_config_value('SERVER', 'hostname', type=str)
28
29LOG_BASE_URL = 'http://%s/tko/retrieve_logs.cgi?job=/results/' % AUTOTEST_SERVER
30JOB_URL = LOG_BASE_URL + '%(job_id)s-%(owner)s/%(hostname)s'
31LOG_PATH_FMT = 'hosts/%(hostname)s/%(task_id)d-%(taskname)s'
32TASK_URL = LOG_BASE_URL + LOG_PATH_FMT
33AUTOSERV_DEBUG_LOG = 'debug/autoserv.DEBUG'
Dan Shi322f4d12015-10-01 11:08:54 -070034HYPERLINK = '=HYPERLINK("%s","%0.1f")'
35
36GS_URI = 'gs://chromeos-autotest-results'
Dan Shi691925f2014-06-23 10:30:16 -070037
38# A cache of special tasks, hostname:[task]
39tasks_cache = {}
40
Dan Shi322f4d12015-10-01 11:08:54 -070041def get_debug_log(autoserv_log_url, autoserv_log_path):
42 """Get a list of strings or a stream for autoserv.DEBUG log file.
43
44 @param autoserv_log_url: Url to the repair job's autoserv.DEBUG log.
45 @param autoserv_log_path: Path to autoserv.DEBUG log, e.g.,
46 hosts/hostname/job_id-repair/debug/autoserv.DEBUG.
47 @return: A list of string if debug log was moved to GS already, or a
48 stream of the autoserv.DEBUG file.
49 """
50 url = urllib2.urlopen(autoserv_log_url).geturl()
51 if not 'accounts.google.com' in url:
52 return urllib2.urlopen(url)
53 else:
54 # The file was moved to Google storage already, read the file from GS.
55 debug_log_link = '%s/%s' % (GS_URI, autoserv_log_path)
56 cmd = 'gsutil cat %s' % debug_log_link
57 proc = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE,
58 stderr=subprocess.PIPE)
59 stdout, stderr = proc.communicate()
60 if proc.returncode == 0:
61 return stdout.split(os.linesep)
62 else:
63 print 'Failed to read %s: %s' % (debug_log_link, stderr)
64
65
Dan Shi691925f2014-06-23 10:30:16 -070066class task_runtime(object):
67 """Details about the task run time.
68 """
69 def __init__(self, task):
70 """Init task_runtime
71 """
72 # Special task ID
73 self.id = task.id
74 # AFE special_task model
75 self.task = task
76 self.hostname = task.host.hostname
77
78 # Link to log
79 task_info = {'task_id': task.id, 'taskname': task.task.lower(),
80 'hostname': self.hostname}
81 self.log = TASK_URL % task_info
82
83 autoserv_log_url = '%s/%s' % (self.log, AUTOSERV_DEBUG_LOG)
84 log_path = LOG_PATH_FMT % task_info
85 autoserv_log_path = '%s/%s' % (log_path, AUTOSERV_DEBUG_LOG)
Dan Shi322f4d12015-10-01 11:08:54 -070086 debug_log = get_debug_log(autoserv_log_url, autoserv_log_path)
Dan Shi691925f2014-06-23 10:30:16 -070087 lines = [line for line in debug_log if line]
88 # Task start time
89 self.start_time = _get_timestamp(lines[0])
90 # Task end time
91 self.end_time = _get_timestamp(lines[-1])
92 # Run time across end of a year.
93 if self.end_time < self.start_time:
94 self.end_time = self.end_time.replace(year=self.end_time.year+1)
95 self.time_used = self.end_time - self.start_time
96
97 # Start/end time from afe_special_tasks table.
98 # To support old special tasks, the value falls back to the ones from
99 # debug log.
100 self.start_time_db = (self.task.time_started if self.task.time_started
101 else self.start_time)
102 self.end_time_db = (self.task.time_finished if self.task.time_finished
103 else self.end_time)
104 self.time_used_db = self.end_time_db - self.start_time_db
105
106
107class job_runtime(object):
108 """Details about the job run time, including time spent on special tasks.
109 """
110 def __init__(self, job, suite_start_time=None, suite_end_time=None):
111 """Init job_run_time
112 """
113 # AFE job ID
114 self.id = job.id
115 # AFE job model
116 self.job = job
117 # Name of the job, strip all build and suite info.
118 self.name = job.name.split('/')[-1]
119
120 try:
121 self.tko_job = tko_models.Job.objects.filter(afe_job_id=self.id)[0]
122 self.host_id = self.tko_job.machine_id
123 self.hostname = self.tko_job.machine.hostname
124 # Job start time
125 self.start_time = self.tko_job.started_time
126 # Job end time
127 self.end_time = self.tko_job.finished_time
128 self.time_used = self.end_time - self.start_time
129 except IndexError:
130 self.tko_job = None
131 self.host_id = 0
132 self.time_used = 0
133
134 # Link to log
135 self.log = JOB_URL % {'job_id': job.id, 'owner': job.owner,
136 'hostname': self.hostname}
137
138 # Special tasks run before job starts.
139 self.tasks_before = []
140 # Special tasks run after job finished.
141 self.tasks_after = []
142
143 # If job time used is not calculated, skip locating special tasks.
144 if not self.time_used:
145 return
146
147 # suite job has no special tasks
148 if not self.hostname:
149 return
150
151 tasks = tasks_cache.get(self.hostname, None)
152 if not tasks:
153 tasks = models.SpecialTask.objects.filter(
154 host__hostname=self.hostname,
155 time_started__gte=suite_start_time,
156 time_started__lte=suite_end_time)
157 tasks_cache[self.hostname] = tasks
158 for task in tasks:
159 if not task.queue_entry or task.queue_entry.job_id != self.id:
160 continue
161 t_runtime = task_runtime(task)
162 if task.time_started < self.start_time:
163 self.tasks_before.append(t_runtime)
164 else:
165 self.tasks_after.append(t_runtime)
166
167
168 def get_all_tasks(self):
169 return self.tasks_before + self.tasks_after
170
171
172 def get_first_task_start_time(self):
173 """Get the start time of first task, return test job start time if
174 there is no tasks_before
175 """
176 start = self.start_time
177 for task in self.tasks_before:
178 if task.start_time_db < start:
179 start = task.start_time_db
180 return start
181
182
183 def get_last_task_end_time(self):
184 """Get the end time of last task, return test job end time if there is
185 no tasks_after.
186 """
187 end = self.end_time
188 for task in self.tasks_after:
189 if end < task.end_time_db:
190 end = task.end_time_db
191 return end
192
193
194 def get_total_time(self):
Dan Shi322f4d12015-10-01 11:08:54 -0700195 return (self.get_last_task_end_time() -
196 self.get_first_task_start_time()).total_seconds()
Dan Shi691925f2014-06-23 10:30:16 -0700197
198
199 def get_time_on_tasks(self):
Dan Shi322f4d12015-10-01 11:08:54 -0700200 time = 0
201 for task in self.tasks_before + self.tasks_after:
202 time += task.time_used.total_seconds()
203 return time
Dan Shi691925f2014-06-23 10:30:16 -0700204
205
206 def get_time_on_tasks_from_db(self):
Dan Shi322f4d12015-10-01 11:08:54 -0700207 time = 0
208 for task in self.tasks_before + self.tasks_after:
209 time += task.time_used_db.total_seconds()
210 return time
Dan Shi691925f2014-06-23 10:30:16 -0700211
212
213 def get_time_on_wait(self):
Dan Shi322f4d12015-10-01 11:08:54 -0700214 return (self.get_total_time() -
215 self.get_time_on_tasks() -
216 self.time_used.total_seconds())
Dan Shi691925f2014-06-23 10:30:16 -0700217
218
219 def get_time_on_wait_from_db(self):
Dan Shi322f4d12015-10-01 11:08:54 -0700220 return (self.get_total_time() -
221 self.get_time_on_tasks_from_db() -
222 self.time_used.total_seconds())
Dan Shi691925f2014-06-23 10:30:16 -0700223
224
225 def get_time_per_task_type(self, task_type):
226 """only used for suite job
227 """
228 tasks = []
229 for job in self.test_jobs:
230 tasks += [task for task in job.get_all_tasks()
231 if task.task.task == task_type]
232 if not tasks:
233 return None
234
235 task_min = tasks[0]
236 task_max = tasks[0]
237 total = 0
238 for task in tasks:
239 if task.time_used < task_min.time_used:
240 task_min = task
241 if task.time_used > task_max.time_used:
242 task_max = task
243 total += task.time_used.total_seconds()
244 return (task_min, task_max, total/len(tasks), len(tasks))
245
246
247 def get_time_per_task_type_from_db(self, task_type):
248 """only used for suite job
249 """
250 tasks = []
251 for job in self.test_jobs:
252 tasks += [task for task in job.get_all_tasks()
253 if task.task.task == task_type]
254 if not tasks:
255 return None
256
257 task_min = tasks[0]
258 task_max = tasks[0]
259 total = 0
260 for task in tasks:
261 if task.time_used_db < task_min.time_used_db:
262 task_min = task
263 if task.time_used_db > task_max.time_used_db:
264 task_max = task
265 total += task.time_used_db.total_seconds()
266 return (task_min, task_max, total/len(tasks), len(tasks))
267
268
269def _get_timestamp(line):
270 """Get the time from the beginning of a log entry.
271
272 @param line: A line of log entry, e.g., "06/19 19:56:53.602 INFO |"
273 @return: A datetime value of the log entry.
274 """
275 try:
276 time = re.match( '^(\d\d\/\d\d \d\d:\d\d:\d\d.\d+).*', line).group(1)
277 time = '%d/%s' % (datetime.now().year, time)
278 return datetime.strptime(time, '%Y/%m/%d %H:%M:%S.%f')
279 except:
280 return None
281
282
283def _parse_args(args):
284 if not args:
285 print 'Try ./contrib/compare_suite.py --jobs 51,52,53'
286 sys.exit(0)
287 parser = argparse.ArgumentParser(
288 description=('A script to compare the performance of multiple suite'
289 ' runs.'))
290 parser.add_argument('--jobs',
291 help='A list of job IDs.')
292 return parser.parse_args(args)
293
294
295def merge_time_dict(time_dict_list):
296 merged = {}
297 for time_dict in time_dict_list:
298 for key,val in time_dict.items():
299 time_used = merged.get(key, 0)
300 merged[key] = time_used + val
301 return merged
302
303
304def print_task_data(all_jobs, time_data):
305 percent_string = delimiter.join(
306 [str(100.0*data[2]*data[3]/suite_job.total_time)
307 if data else '_' for (data, suite_job) in
308 zip(time_data, all_jobs.keys())])
309 print '%% on %s %s%s' % (task_type, delimiter, percent_string)
310 min_string = delimiter.join(
311 [(HYPERLINK % (data[0].log, data[0].time_used.total_seconds()))
312 if data else '_' for data in time_data])
313 print ' %s min (seconds)%s%s' % (task_type, delimiter, min_string)
314 max_string = delimiter.join(
315 [HYPERLINK % (data[1].log, data[1].time_used.total_seconds())
316 if data else '_' for data in time_data])
317 print ' %s max (seconds)%s%s' % (task_type, delimiter, max_string)
318 ave_string = delimiter.join(
319 [str(data[2]) if data else '_' for data in time_data])
320 print ' %s average (seconds)%s%s' % (task_type, delimiter, ave_string)
321
322
323if __name__ == '__main__':
324 args = _parse_args(sys.argv[1:])
325 print 'Comparing jobs: %s' % args.jobs
326 job_ids = [int(id) for id in args.jobs.split(',')]
327
328 # Make sure all jobs are suite jobs
329 parent_jobs = [job.parent_job_id for job in
330 models.Job.objects.filter(id__in=job_ids)]
331 if any(parent_jobs):
332 print ('Some jobs are not suite job. Please provide a list of suite job'
333 ' IDs.')
334 sys.exit(1)
335
336 # A dictionary of suite_job_runtime:{test_job_name:test_job_runtime}
337 all_jobs = {}
338 for job_id in job_ids:
339 suite_job = models.Job.objects.filter(id=job_id)[0]
340 suite_job_runtime = job_runtime(suite_job)
341 test_jobs = models.Job.objects.filter(parent_job_id=job_id)
342 if len(test_jobs) == 0:
343 print 'No child job found for suite job: %d' % job_id
344 sys.exit(1)
345 test_job_runtimes = [job_runtime(job, suite_job_runtime.start_time,
346 suite_job_runtime.end_time)
347 for job in test_jobs]
348 suite_job_runtime.test_jobs = test_job_runtimes
349 suite_job_runtime.hosts = set([job.host_id
350 for job in test_job_runtimes
351 if job.host_id != 0])
352 suite_job_runtime.total_time = sum(
353 [job.get_total_time() for job in test_job_runtimes])
354 suite_job_runtime.total_time_on_tasks = sum(
355 [job.get_time_on_tasks() for job in test_job_runtimes])
356 suite_job_runtime.total_time_on_tasks_from_db = sum(
357 [job.get_time_on_tasks_from_db() for job in test_job_runtimes])
358 suite_job_runtime.total_time_on_wait = sum(
359 [job.get_time_on_wait() for job in test_job_runtimes])
360 suite_job_runtime.total_time_on_wait_from_db = sum(
361 [job.get_time_on_wait_from_db() for job in test_job_runtimes])
362 suite_job_runtime.percent_on_tasks = 100*(
363 suite_job_runtime.total_time_on_tasks /
364 suite_job_runtime.total_time)
365 suite_job_runtime.percent_on_wait = 100*(
366 suite_job_runtime.total_time_on_wait /
367 suite_job_runtime.total_time)
368 suite_job_runtime.percent_on_tasks_from_db = 100*(
369 suite_job_runtime.total_time_on_tasks_from_db /
370 suite_job_runtime.total_time)
371 suite_job_runtime.percent_on_wait_from_db = 100*(
372 suite_job_runtime.total_time_on_wait_from_db /
373 suite_job_runtime.total_time)
374 all_jobs[suite_job_runtime] = {r.name:r for r in test_job_runtimes}
375
376 delimiter = '\t'
377 # Print a row of suite job IDs.
378 print ('job ID%s' % delimiter +
379 delimiter.join([str(suite_job.id)
380 for suite_job in all_jobs.keys()]))
381
382 # Print a row of platforms, e.g., lumpy-release.
383 print ('platform%s' % delimiter +
384 delimiter.join([suite_job.job.name.split('/')[0]
385 for suite_job in all_jobs.keys()]))
386
387 # Print time to run suite
388 print ('time(mins)%s' % delimiter +
389 delimiter.join([str(suite_job.time_used.total_seconds()/60)
390 for suite_job in all_jobs.keys()]))
391
392 # Print percent of time on tasks
393 print ('%% on special tasks%s' % delimiter +
394 delimiter.join([str(suite_job.percent_on_tasks)
395 for suite_job in all_jobs.keys()]))
396
397 # Print percent of time on tasks based on time data from afe_special_tasks
398 print ('%% on special tasks with data from DB%s' % delimiter +
399 delimiter.join([str(suite_job.percent_on_tasks_from_db)
400 for suite_job in all_jobs.keys()]))
401
402 # Print percent of time on tasks per task type
403 all_task_types = set()
404 for suite_job in all_jobs.keys():
405 for job in suite_job.test_jobs:
406 all_task_types.update(
407 [task.task.task for task in job.get_all_tasks()])
408 for task_type in all_task_types:
409 print 'Time data retrieved from debug log.'
410 time_data = [suite_job.get_time_per_task_type(task_type)
411 for suite_job in all_jobs.keys()]
412 print_task_data(all_jobs, time_data)
413
414 print 'Time data retrieved from database'
415 time_data = [suite_job.get_time_per_task_type_from_db(task_type)
416 for suite_job in all_jobs.keys()]
417 print_task_data(all_jobs, time_data)
418
419 count_string = delimiter.join(
420 [str(data[3]) if data else '_' for data in time_data])
421 print ' %s count%s%s' % (task_type, delimiter, count_string)
422
423 # Print percent of time on wait
424 print ('%% on wait%s' % delimiter +
425 delimiter.join([str(suite_job.percent_on_wait)
426 for suite_job in all_jobs.keys()]))
427
428 # Print percent of time on wait based on special task time data from
429 # database
430 print ('%% on wait based on time data from db%s' % delimiter +
431 delimiter.join([str(suite_job.percent_on_wait_from_db)
432 for suite_job in all_jobs.keys()]))
433
434 # Print the number of duts used for suite.
435 print ('dut #%s' % delimiter +
436 delimiter.join([str(len(suite_job.hosts))
437 for suite_job in all_jobs.keys()]))
438
439 for test_name in [job for job in all_jobs.values()[0].keys()]:
440 line = '%s%s' % (test_name, delimiter)
441 for suite_job in all_jobs.keys():
442 test_job = all_jobs[suite_job].get(test_name)
443 if test_job:
444 line += (HYPERLINK %
445 (test_job.log, test_job.time_used.total_seconds())
446 + delimiter)
447 else:
448 line += '_%s' % delimiter
449 print line