| #!/usr/bin/python2 |
| |
| # The tool gathers the time used by special tasks/test jobs to a tab-separated |
| # output. |
| # Import the output to google spreadsheet can generate a sheet like this: |
| # https://docs.google.com/a/google.com/spreadsheets/d/ |
| # 1iLPSRAgSVz9rGVusTb2yaHJ88iv0QY3hZI_ZI-RdatI/edit#gid=51630294 |
| |
| |
| import os |
| import argparse |
| from datetime import datetime |
| import re |
| import subprocess |
| import sys |
| import urllib2 |
| |
| import common |
| from autotest_lib.client.common_lib import global_config |
| from autotest_lib.frontend import setup_django_environment |
| from autotest_lib.frontend.afe import models |
| from autotest_lib.frontend.tko import models as tko_models |
| from autotest_lib.server.cros.dynamic_suite import job_status |
| |
| |
| CONFIG = global_config.global_config |
| AUTOTEST_SERVER = CONFIG.get_config_value('SERVER', 'hostname', type=str) |
| |
| LOG_BASE_URL = 'http://%s/tko/retrieve_logs.cgi?job=/results/' % AUTOTEST_SERVER |
| JOB_URL = LOG_BASE_URL + '%(job_id)s-%(owner)s/%(hostname)s' |
| LOG_PATH_FMT = 'hosts/%(hostname)s/%(task_id)d-%(taskname)s' |
| TASK_URL = LOG_BASE_URL + LOG_PATH_FMT |
| AUTOSERV_DEBUG_LOG = 'debug/autoserv.DEBUG' |
| HYPERLINK = '=HYPERLINK("%s","%0.1f")' |
| |
| GS_URI = 'gs://chromeos-autotest-results' |
| |
| # A cache of special tasks, hostname:[task] |
| tasks_cache = {} |
| |
| def get_debug_log(autoserv_log_url, autoserv_log_path): |
| """Get a list of strings or a stream for autoserv.DEBUG log file. |
| |
| @param autoserv_log_url: Url to the repair job's autoserv.DEBUG log. |
| @param autoserv_log_path: Path to autoserv.DEBUG log, e.g., |
| hosts/hostname/job_id-repair/debug/autoserv.DEBUG. |
| @return: A list of string if debug log was moved to GS already, or a |
| stream of the autoserv.DEBUG file. |
| """ |
| url = urllib2.urlopen(autoserv_log_url).geturl() |
| if not 'accounts.google.com' in url: |
| return urllib2.urlopen(url) |
| else: |
| # The file was moved to Google storage already, read the file from GS. |
| debug_log_link = '%s/%s' % (GS_URI, autoserv_log_path) |
| cmd = 'gsutil cat %s' % debug_log_link |
| proc = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, |
| stderr=subprocess.PIPE) |
| stdout, stderr = proc.communicate() |
| if proc.returncode == 0: |
| return stdout.split(os.linesep) |
| else: |
| print 'Failed to read %s: %s' % (debug_log_link, stderr) |
| |
| |
| class task_runtime(object): |
| """Details about the task run time. |
| """ |
| def __init__(self, task): |
| """Init task_runtime |
| """ |
| # Special task ID |
| self.id = task.id |
| # AFE special_task model |
| self.task = task |
| self.hostname = task.host.hostname |
| |
| # Link to log |
| task_info = {'task_id': task.id, 'taskname': task.task.lower(), |
| 'hostname': self.hostname} |
| self.log = TASK_URL % task_info |
| |
| autoserv_log_url = '%s/%s' % (self.log, AUTOSERV_DEBUG_LOG) |
| log_path = LOG_PATH_FMT % task_info |
| autoserv_log_path = '%s/%s' % (log_path, AUTOSERV_DEBUG_LOG) |
| debug_log = get_debug_log(autoserv_log_url, autoserv_log_path) |
| lines = [line for line in debug_log if line] |
| # Task start time |
| self.start_time = _get_timestamp(lines[0]) |
| # Task end time |
| self.end_time = _get_timestamp(lines[-1]) |
| # Run time across end of a year. |
| if self.end_time < self.start_time: |
| self.end_time = self.end_time.replace(year=self.end_time.year+1) |
| self.time_used = self.end_time - self.start_time |
| |
| # Start/end time from afe_special_tasks table. |
| # To support old special tasks, the value falls back to the ones from |
| # debug log. |
| self.start_time_db = (self.task.time_started if self.task.time_started |
| else self.start_time) |
| self.end_time_db = (self.task.time_finished if self.task.time_finished |
| else self.end_time) |
| self.time_used_db = self.end_time_db - self.start_time_db |
| |
| |
| class job_runtime(object): |
| """Details about the job run time, including time spent on special tasks. |
| """ |
| def __init__(self, job, suite_start_time=None, suite_end_time=None): |
| """Init job_run_time |
| """ |
| # AFE job ID |
| self.id = job.id |
| # AFE job model |
| self.job = job |
| # Name of the job, strip all build and suite info. |
| self.name = job.name.split('/')[-1] |
| |
| try: |
| self.tko_job = tko_models.Job.objects.filter(afe_job_id=self.id)[0] |
| self.host_id = self.tko_job.machine_id |
| self.hostname = self.tko_job.machine.hostname |
| # Job start time |
| self.start_time = self.tko_job.started_time |
| # Job end time |
| self.end_time = self.tko_job.finished_time |
| self.time_used = self.end_time - self.start_time |
| except IndexError: |
| self.tko_job = None |
| self.host_id = 0 |
| self.time_used = 0 |
| |
| # Link to log |
| self.log = JOB_URL % {'job_id': job.id, 'owner': job.owner, |
| 'hostname': self.hostname} |
| |
| # Special tasks run before job starts. |
| self.tasks_before = [] |
| # Special tasks run after job finished. |
| self.tasks_after = [] |
| |
| # If job time used is not calculated, skip locating special tasks. |
| if not self.time_used: |
| return |
| |
| # suite job has no special tasks |
| if not self.hostname: |
| return |
| |
| tasks = tasks_cache.get(self.hostname, None) |
| if not tasks: |
| tasks = models.SpecialTask.objects.filter( |
| host__hostname=self.hostname, |
| time_started__gte=suite_start_time, |
| time_started__lte=suite_end_time) |
| tasks_cache[self.hostname] = tasks |
| for task in tasks: |
| if not task.queue_entry or task.queue_entry.job_id != self.id: |
| continue |
| t_runtime = task_runtime(task) |
| if task.time_started < self.start_time: |
| self.tasks_before.append(t_runtime) |
| else: |
| self.tasks_after.append(t_runtime) |
| |
| |
| def get_all_tasks(self): |
| return self.tasks_before + self.tasks_after |
| |
| |
| def get_first_task_start_time(self): |
| """Get the start time of first task, return test job start time if |
| there is no tasks_before |
| """ |
| start = self.start_time |
| for task in self.tasks_before: |
| if task.start_time_db < start: |
| start = task.start_time_db |
| return start |
| |
| |
| def get_last_task_end_time(self): |
| """Get the end time of last task, return test job end time if there is |
| no tasks_after. |
| """ |
| end = self.end_time |
| for task in self.tasks_after: |
| if end < task.end_time_db: |
| end = task.end_time_db |
| return end |
| |
| |
| def get_total_time(self): |
| return (self.get_last_task_end_time() - |
| self.get_first_task_start_time()).total_seconds() |
| |
| |
| def get_time_on_tasks(self): |
| time = 0 |
| for task in self.tasks_before + self.tasks_after: |
| time += task.time_used.total_seconds() |
| return time |
| |
| |
| def get_time_on_tasks_from_db(self): |
| time = 0 |
| for task in self.tasks_before + self.tasks_after: |
| time += task.time_used_db.total_seconds() |
| return time |
| |
| |
| def get_time_on_wait(self): |
| return (self.get_total_time() - |
| self.get_time_on_tasks() - |
| self.time_used.total_seconds()) |
| |
| |
| def get_time_on_wait_from_db(self): |
| return (self.get_total_time() - |
| self.get_time_on_tasks_from_db() - |
| self.time_used.total_seconds()) |
| |
| |
| def get_time_per_task_type(self, task_type): |
| """only used for suite job |
| """ |
| tasks = [] |
| for job in self.test_jobs: |
| tasks += [task for task in job.get_all_tasks() |
| if task.task.task == task_type] |
| if not tasks: |
| return None |
| |
| task_min = tasks[0] |
| task_max = tasks[0] |
| total = 0 |
| for task in tasks: |
| if task.time_used < task_min.time_used: |
| task_min = task |
| if task.time_used > task_max.time_used: |
| task_max = task |
| total += task.time_used.total_seconds() |
| return (task_min, task_max, total/len(tasks), len(tasks)) |
| |
| |
| def get_time_per_task_type_from_db(self, task_type): |
| """only used for suite job |
| """ |
| tasks = [] |
| for job in self.test_jobs: |
| tasks += [task for task in job.get_all_tasks() |
| if task.task.task == task_type] |
| if not tasks: |
| return None |
| |
| task_min = tasks[0] |
| task_max = tasks[0] |
| total = 0 |
| for task in tasks: |
| if task.time_used_db < task_min.time_used_db: |
| task_min = task |
| if task.time_used_db > task_max.time_used_db: |
| task_max = task |
| total += task.time_used_db.total_seconds() |
| return (task_min, task_max, total/len(tasks), len(tasks)) |
| |
| |
| def _get_timestamp(line): |
| """Get the time from the beginning of a log entry. |
| |
| @param line: A line of log entry, e.g., "06/19 19:56:53.602 INFO |" |
| @return: A datetime value of the log entry. |
| """ |
| try: |
| time = re.match( '^(\d\d\/\d\d \d\d:\d\d:\d\d.\d+).*', line).group(1) |
| time = '%d/%s' % (datetime.now().year, time) |
| return datetime.strptime(time, '%Y/%m/%d %H:%M:%S.%f') |
| except: |
| return None |
| |
| |
| def _parse_args(args): |
| if not args: |
| print 'Try ./contrib/compare_suite.py --jobs 51,52,53' |
| sys.exit(0) |
| parser = argparse.ArgumentParser( |
| description=('A script to compare the performance of multiple suite' |
| ' runs.')) |
| parser.add_argument('--jobs', |
| help='A list of job IDs.') |
| return parser.parse_args(args) |
| |
| |
| def merge_time_dict(time_dict_list): |
| merged = {} |
| for time_dict in time_dict_list: |
| for key,val in time_dict.items(): |
| time_used = merged.get(key, 0) |
| merged[key] = time_used + val |
| return merged |
| |
| |
| def print_task_data(all_jobs, time_data): |
| percent_string = delimiter.join( |
| [str(100.0*data[2]*data[3]/suite_job.total_time) |
| if data else '_' for (data, suite_job) in |
| zip(time_data, all_jobs.keys())]) |
| print '%% on %s %s%s' % (task_type, delimiter, percent_string) |
| min_string = delimiter.join( |
| [(HYPERLINK % (data[0].log, data[0].time_used.total_seconds())) |
| if data else '_' for data in time_data]) |
| print ' %s min (seconds)%s%s' % (task_type, delimiter, min_string) |
| max_string = delimiter.join( |
| [HYPERLINK % (data[1].log, data[1].time_used.total_seconds()) |
| if data else '_' for data in time_data]) |
| print ' %s max (seconds)%s%s' % (task_type, delimiter, max_string) |
| ave_string = delimiter.join( |
| [str(data[2]) if data else '_' for data in time_data]) |
| print ' %s average (seconds)%s%s' % (task_type, delimiter, ave_string) |
| |
| |
| if __name__ == '__main__': |
| args = _parse_args(sys.argv[1:]) |
| print 'Comparing jobs: %s' % args.jobs |
| job_ids = [int(id) for id in args.jobs.split(',')] |
| |
| # Make sure all jobs are suite jobs |
| parent_jobs = [job.parent_job_id for job in |
| models.Job.objects.filter(id__in=job_ids)] |
| if any(parent_jobs): |
| print ('Some jobs are not suite job. Please provide a list of suite job' |
| ' IDs.') |
| sys.exit(1) |
| |
| # A dictionary of suite_job_runtime:{test_job_name:test_job_runtime} |
| all_jobs = {} |
| for job_id in job_ids: |
| suite_job = models.Job.objects.filter(id=job_id)[0] |
| suite_job_runtime = job_runtime(suite_job) |
| test_jobs = models.Job.objects.filter(parent_job_id=job_id) |
| if len(test_jobs) == 0: |
| print 'No child job found for suite job: %d' % job_id |
| sys.exit(1) |
| test_job_runtimes = [job_runtime(job, suite_job_runtime.start_time, |
| suite_job_runtime.end_time) |
| for job in test_jobs] |
| suite_job_runtime.test_jobs = test_job_runtimes |
| suite_job_runtime.hosts = set([job.host_id |
| for job in test_job_runtimes |
| if job.host_id != 0]) |
| suite_job_runtime.total_time = sum( |
| [job.get_total_time() for job in test_job_runtimes]) |
| suite_job_runtime.total_time_on_tasks = sum( |
| [job.get_time_on_tasks() for job in test_job_runtimes]) |
| suite_job_runtime.total_time_on_tasks_from_db = sum( |
| [job.get_time_on_tasks_from_db() for job in test_job_runtimes]) |
| suite_job_runtime.total_time_on_wait = sum( |
| [job.get_time_on_wait() for job in test_job_runtimes]) |
| suite_job_runtime.total_time_on_wait_from_db = sum( |
| [job.get_time_on_wait_from_db() for job in test_job_runtimes]) |
| suite_job_runtime.percent_on_tasks = 100*( |
| suite_job_runtime.total_time_on_tasks / |
| suite_job_runtime.total_time) |
| suite_job_runtime.percent_on_wait = 100*( |
| suite_job_runtime.total_time_on_wait / |
| suite_job_runtime.total_time) |
| suite_job_runtime.percent_on_tasks_from_db = 100*( |
| suite_job_runtime.total_time_on_tasks_from_db / |
| suite_job_runtime.total_time) |
| suite_job_runtime.percent_on_wait_from_db = 100*( |
| suite_job_runtime.total_time_on_wait_from_db / |
| suite_job_runtime.total_time) |
| all_jobs[suite_job_runtime] = {r.name:r for r in test_job_runtimes} |
| |
| delimiter = '\t' |
| # Print a row of suite job IDs. |
| print ('job ID%s' % delimiter + |
| delimiter.join([str(suite_job.id) |
| for suite_job in all_jobs.keys()])) |
| |
| # Print a row of platforms, e.g., lumpy-release. |
| print ('platform%s' % delimiter + |
| delimiter.join([suite_job.job.name.split('/')[0] |
| for suite_job in all_jobs.keys()])) |
| |
| # Print time to run suite |
| print ('time(mins)%s' % delimiter + |
| delimiter.join([str(suite_job.time_used.total_seconds()/60) |
| for suite_job in all_jobs.keys()])) |
| |
| # Print percent of time on tasks |
| print ('%% on special tasks%s' % delimiter + |
| delimiter.join([str(suite_job.percent_on_tasks) |
| for suite_job in all_jobs.keys()])) |
| |
| # Print percent of time on tasks based on time data from afe_special_tasks |
| print ('%% on special tasks with data from DB%s' % delimiter + |
| delimiter.join([str(suite_job.percent_on_tasks_from_db) |
| for suite_job in all_jobs.keys()])) |
| |
| # Print percent of time on tasks per task type |
| all_task_types = set() |
| for suite_job in all_jobs.keys(): |
| for job in suite_job.test_jobs: |
| all_task_types.update( |
| [task.task.task for task in job.get_all_tasks()]) |
| for task_type in all_task_types: |
| print 'Time data retrieved from debug log.' |
| time_data = [suite_job.get_time_per_task_type(task_type) |
| for suite_job in all_jobs.keys()] |
| print_task_data(all_jobs, time_data) |
| |
| print 'Time data retrieved from database' |
| time_data = [suite_job.get_time_per_task_type_from_db(task_type) |
| for suite_job in all_jobs.keys()] |
| print_task_data(all_jobs, time_data) |
| |
| count_string = delimiter.join( |
| [str(data[3]) if data else '_' for data in time_data]) |
| print ' %s count%s%s' % (task_type, delimiter, count_string) |
| |
| # Print percent of time on wait |
| print ('%% on wait%s' % delimiter + |
| delimiter.join([str(suite_job.percent_on_wait) |
| for suite_job in all_jobs.keys()])) |
| |
| # Print percent of time on wait based on special task time data from |
| # database |
| print ('%% on wait based on time data from db%s' % delimiter + |
| delimiter.join([str(suite_job.percent_on_wait_from_db) |
| for suite_job in all_jobs.keys()])) |
| |
| # Print the number of duts used for suite. |
| print ('dut #%s' % delimiter + |
| delimiter.join([str(len(suite_job.hosts)) |
| for suite_job in all_jobs.keys()])) |
| |
| for test_name in [job for job in all_jobs.values()[0].keys()]: |
| line = '%s%s' % (test_name, delimiter) |
| for suite_job in all_jobs.keys(): |
| test_job = all_jobs[suite_job].get(test_name) |
| if test_job: |
| line += (HYPERLINK % |
| (test_job.log, test_job.time_used.total_seconds()) |
| + delimiter) |
| else: |
| line += '_%s' % delimiter |
| print line |