Blame - frontend/perf-dashboard/extract_perf.py - platform/external/autotest

blob: 9cd66a12467490d894de1a1b07ff93d4a40a5cfd [file] [log] [blame]

Dennis Jeffrey	b95ba5a	2012-11-12 17:55:18 -0800	[diff] [blame]	1	#!/usr/bin/env python
				2	# Copyright (c) 2013 The Chromium OS Authors. All rights reserved.
				3	# Use of this source code is governed by a BSD-style license that can be
				4	# found in the LICENSE file.
				5
				6	"""Extracts perf keys from autotest database and writes to local data files.
				7
				8	This script keeps track of the job IDs for which perf keys have already been
				9	extracted on previous runs. The script only processes those job IDs whose perf
				10	values haven't yet been previously extracted.
				11
				12	Sample usage:
				13	python extract_perf.py -v
				14
				15	Run with -h to see the full set of command-line options.
				16	"""
				17
				18	import datetime
				19	import logging
				20	import optparse
				21	import os
				22	import re
				23	import simplejson
				24	import sys
				25	import time
				26
				27	import common
				28	from autotest_lib.client.common_lib import global_config
				29	from autotest_lib.server import frontend
				30
				31	try:
				32	from google.storage.speckle.python.api import rdbms_googleapi
				33	except ImportError:
				34	# Download the AppEngine SDK if desired from here:
				35	# https://developers.google.com/appengine/downloads
				36	rdbms_googleapi = None
				37
				38	try:
				39	import MySQLdb
				40	except ImportError:
				41	MySQLdb = None
				42
				43	_GLOBAL_CONF = global_config.global_config
				44	_CONF_SECTION = 'AUTOTEST_WEB'
				45
				46	_MYSQL_READONLY_LOGIN_CREDENTIALS = {
				47	'host': _GLOBAL_CONF.get_config_value(_CONF_SECTION, 'readonly_host'),
				48	'user': _GLOBAL_CONF.get_config_value(_CONF_SECTION, 'readonly_user'),
				49	'passwd': _GLOBAL_CONF.get_config_value(_CONF_SECTION, 'readonly_password'),
				50	'db': _GLOBAL_CONF.get_config_value(_CONF_SECTION, 'database'),
				51	}
				52
				53	# Paths to files.
				54	_ROOT_DIR = os.path.dirname(os.path.abspath(__file__))
				55	_CHART_CONFIG_FILE = os.path.join(_ROOT_DIR, 'croschart_defaults.json')
				56	_DEST_DATA_DIR = os.path.join(_ROOT_DIR, 'data')
				57	_COMPLETED_ID_FILE = os.path.join(_DEST_DATA_DIR, 'job_id_complete.txt')
				58	_CURR_PID_FILE = os.path.join(_DEST_DATA_DIR, __file__ + '.curr_pid.txt')
				59
				60	# Values that can be configured through options.
				61	_NUM_DAYS_BACK = 7 # Ignore perf test runs in database that finished more than
				62	# this many days ago.
				63
				64	# Other values that can only be configured here in the code.
				65	_AFE = frontend.AFE()
				66	_PLATFORMS = map(lambda x: x.name, _AFE.get_labels(platform=True))
				67	if 'snow' in _PLATFORMS:
				68	_PLATFORMS.remove('snow')
				69	_PLATFORMS.append('daisy')
				70
				71
				72	def get_job_ids(cursor, test_name, oldest_db_lookup_date, completed_ids):
				73	"""Gets all job IDs for the given test name that haven't yet been processed.
				74
				75	@param cursor: see query_database().
				76	@param test_name: see query_database().
				77	@param oldest_db_lookup_date: see query_database().
				78	@param completed_ids: see query_database().
				79
				80	@return A list of string job IDs from the database that should be processed.
				81	"""
				82	query = ('SELECT DISTINCT afe_job_id '
				83	'FROM tko_perf_view_2 INNER JOIN tko_jobs USING (job_idx) '
				84	'INNER JOIN tko_status USING (status_idx) WHERE '
				85	'test_name = %s AND test_finished_time >= %s AND '
				86	'word != "RUNNING"')
				87	start_time = time.time()
				88	cursor.execute(query, (test_name, oldest_db_lookup_date))
				89	logging.debug('Extracted job IDs in %.2f seconds', time.time() - start_time)
				90	job_ids = []
				91	for result_row in cursor:
				92	job_id = str(result_row[0])
				93	if job_id not in completed_ids:
				94	job_ids.append(job_id)
				95	return job_ids
				96
				97
				98	def write_perf_info_to_disk(job_id, result_dict, test_dir):
				99	"""Writes extracted perf data for the given job ID to disk.
				100
				101	Also writes the job ID to disk to mark it as having been processed. Note
				102	that the written files are not protected against simultaneous access by
				103	multiple invocations of this script.
				104
				105	@param job_id: The string job ID.
				106	@param result_dict: A dictionary of associated perf info to write to disk.
				107	@param test_dir: The string directory name in which to write perf data.
				108	"""
				109	result_out = [job_id, result_dict['job_name'], result_dict['platform']]
				110	perf_items = []
				111	for perf_key in result_dict['perf_keys']:
				112	for perf_val in result_dict['perf_keys'][perf_key]:
				113	perf_items.append((perf_key, perf_val))
				114	result_out.append(perf_items)
				115	file_name = os.path.join(test_dir, result_dict['platform'] + '.txt')
				116	with open(file_name, 'a') as fp:
				117	fp.write(simplejson.dumps(result_out) + '\n')
				118
				119	with open(_COMPLETED_ID_FILE, 'a') as fp:
				120	fp.write(job_id + '\n')
				121
				122
				123	def extract_perf_for_job_id(cursor, job_id, unexpected_job_names, test_dir):
				124	"""Extracts perf data for a given job, then writes to local text files.
				125
				126	@param cursor: A MySQLdb.cursor object used for interacting with a database.
				127	@param job_id: The string job ID to process.
				128	@param unexpected_job_names: A set of job names encountered so far that are
				129	not associated with a known platform type.
				130	@param test_dir: The string directory name in which to write perf data.
				131
				132	@return True, if data for the specified job ID is written to disk, or
				133	False if not (will be False if the job ID is not associated with a known
				134	platform type).
				135	"""
				136	query = ('SELECT job_name,iteration_key,iteration_value '
				137	'FROM tko_perf_view_2 INNER JOIN tko_jobs USING (job_idx) '
				138	'WHERE afe_job_id = %s')
				139	cursor.execute(query, job_id)
				140
				141	result = {}
				142	for job_name, key, val in cursor:
				143	# The job_name string contains the platform name. The platform name is
				144	# always followed by either "-rX", where X is the milestone number
				145	# (this is from legacy data in the database), or else it is followed
				146	# by "-release" (for more recent data in the database). We do not
				147	# consider jobs in which the platform name is followed by anything
				148	# else (in particular, "-paladin" runs).
				149	#
				150	# TODO(dennisjeffrey): Simplify the below code once the following bug
				151	# is addressed to standardize the platform names: crosbug.com/38521.
				152	match = re.search('(\w+)-r', job_name)
				153	# Only process jobs for known platforms.
				154	platform = match.group(1) if match.group(1) in _PLATFORMS else None
				155	if platform:
				156	result['job_name'] = job_name
				157	result['platform'] = platform
				158	result.setdefault('perf_keys', {})
				159	if key and val:
				160	result['perf_keys'].setdefault(key, [])
				161	result['perf_keys'][key].append(val)
				162	else:
				163	unexpected_job_names.add(job_name)
				164
				165	if 'platform' not in result:
				166	return False
				167
				168	write_perf_info_to_disk(job_id, result, test_dir)
				169	return True
				170
				171
				172	def query_database(cursor, test_name, completed_ids, oldest_db_lookup_date):
				173	"""Queries database for perf values and stores them into local text files.
				174
				175	This function performs the work only for the specified test case.
				176
				177	@param cursor: A MySQLdb.cursor object used for interacting with a database.
				178	@param test_name: The string name of a test case to process.
				179	@param completed_ids: A set of job IDs that have already been previously
				180	extracted from the database.
				181	@param oldest_db_lookup_date: The oldest date (represented as a string) for
				182	which we want to consider perf values in the database.
				183
				184	@return The number of new job IDs that have been extracted/processed.
				185	"""
				186	test_dir = os.path.join(_DEST_DATA_DIR, test_name)
				187	if not os.path.isdir(test_dir):
				188	os.makedirs(test_dir)
				189
				190	# Identify the job IDs that need to be processed.
				191	job_ids = get_job_ids(cursor, test_name, oldest_db_lookup_date,
				192	completed_ids)
				193
				194	# For each job ID, extract the perf values we need.
				195	unexpected_job_names = set()
				196	num_newly_added = 0
				197	for i, job_id in enumerate(job_ids):
				198	logging.debug('Processing job %d of %d', i + 1, len(job_ids))
				199
				200	if extract_perf_for_job_id(cursor, job_id, unexpected_job_names,
				201	test_dir):
				202	completed_ids.add(job_id)
				203	num_newly_added += 1
				204
				205	if unexpected_job_names:
				206	logging.debug('Job names skipped due to unexpected platform: %s',
				207	list(unexpected_job_names))
				208
				209	return num_newly_added
				210
				211
				212	def extract_new_perf_data(cursor, options):
				213	"""Extracts new perf data from database and writes data to local text files.
				214
				215	@param cursor: A MySQLdb.cursor object used for interacting with a database.
				216	@param options: An optparse.OptionParser options object.
				217
				218	@return The number of new job IDs that have been extracted/processed.
				219	"""
				220	charts = {}
				221	with open(_CHART_CONFIG_FILE, 'r') as fp:
				222	charts = simplejson.loads(fp.read())
				223
				224	# Compute the oldest date for the perf values that we want to consider.
				225	oldest_db_lookup_date = (
				226	datetime.date.today() -
				227	datetime.timedelta(days=options.num_days_back)).strftime('%Y-%m-%d')
				228
				229	logging.debug('Extracting job IDs from %s onwards.',
				230	oldest_db_lookup_date)
				231
				232	# Get unique test names.
				233	test_names = set([c['test_name'] for c in charts])
				234
				235	# Get list of already-completed job IDs so we don't re-fetch their data.
				236	completed_ids = set()
				237	if os.path.isfile(_COMPLETED_ID_FILE):
				238	with open(_COMPLETED_ID_FILE, 'r') as fp:
				239	job_ids = map(lambda x: x.strip(), fp.readlines())
				240	for job_id in job_ids:
				241	completed_ids.add(job_id)
				242
				243	num_newly_added = 0
				244	for i, test_name in enumerate(test_names):
				245	logging.info('Extracting info for test %d of %d: %s ', i + 1,
				246	len(test_names), test_name)
				247
				248	num_newly_added += query_database(cursor, test_name, completed_ids,
				249	oldest_db_lookup_date)
				250
				251	return num_newly_added
				252
				253
				254	def cleanup():
				255	"""Cleans up when this script is done."""
				256	if os.path.isfile(_CURR_PID_FILE):
				257	os.remove(_CURR_PID_FILE)
				258
				259
				260	def main():
				261	"""Main function."""
				262	parser = optparse.OptionParser()
				263	parser.add_option('-n', '--num-days-back', metavar='NUM_DAYS', type='int',
				264	default=_NUM_DAYS_BACK,
				265	help='Consider only the perf test results that were '
				266	'computed within this many days ago (if this script '
				267	'is invoked daily, no need to consider history from '
				268	'many days back). Defaults to %default days back.')
				269	parser.add_option('-c', '--cloud-sql', action='store_true', default=False,
				270	help='Connect to the chromeos-lab CloudSQL database, '
				271	'rather than the original MySQL autotest database.')
				272	parser.add_option('-v', '--verbose', action='store_true', default=False,
				273	help='Use verbose logging.')
				274	options, _ = parser.parse_args()
				275
				276	log_level = logging.DEBUG if options.verbose else logging.INFO
				277	logging.basicConfig(format='%(asctime)s %(levelname)s: %(message)s',
				278	level=log_level)
				279
				280	cursor = None
				281	if options.cloud_sql:
				282	# CloudSQL.
				283	logging.debug('Connecting to CloudSQL...')
				284	if rdbms_googleapi is None:
				285	logging.error('CloudSQL requested, but cannot locate CloudSQL '
				286	'dependencies. Have you set up CloudSQL on this '
				287	'machine?')
				288	sys.exit(1)
				289	db = rdbms_googleapi.connect(None,
				290	instance='chromeos-bot:chromeos-lab')
				291	cursor = db.cursor()
				292	cursor.execute('USE chromeos_autotest_db')
				293	else:
				294	# Autotest MySQL database.
				295	logging.debug('Connecting to Autotest MySQL database...')
				296	if MySQLdb is None:
				297	logging.error('MySQL requested, but cannot locate MySQL '
				298	'dependencies. Have you set up MySQL on this '
				299	'machine?')
				300	sys.exit(1)
				301	db = MySQLdb.connect(**_MYSQL_READONLY_LOGIN_CREDENTIALS)
				302	cursor = db.cursor()
				303
				304	logging.debug('Database connection complete.')
				305
				306	if not os.path.isdir(_DEST_DATA_DIR):
				307	os.makedirs(_DEST_DATA_DIR)
				308
				309	common.die_if_already_running(_CURR_PID_FILE, logging)
				310	num_newly_added = extract_new_perf_data(cursor, options)
				311	cleanup()
				312	logging.info('Done! Added info for %d new job IDs', num_newly_added)
				313
				314
				315	if __name__ == '__main__':
				316	main()