Blame - frontend/perf-dashboard/extract_perf.py - platform/external/autotest

blob: 6f69a1e5ac70f4f0af47535c26e0ee12d93e005a [file] [log] [blame]

Dennis Jeffrey	b95ba5a	2012-11-12 17:55:18 -0800	[diff] [blame]	1	#!/usr/bin/env python
				2	# Copyright (c) 2013 The Chromium OS Authors. All rights reserved.
				3	# Use of this source code is governed by a BSD-style license that can be
				4	# found in the LICENSE file.
				5
				6	"""Extracts perf keys from autotest database and writes to local data files.
				7
				8	This script keeps track of the job IDs for which perf keys have already been
				9	extracted on previous runs. The script only processes those job IDs whose perf
				10	values haven't yet been previously extracted.
				11
				12	Sample usage:
				13	python extract_perf.py -v
				14
				15	Run with -h to see the full set of command-line options.
				16	"""
				17
				18	import datetime
				19	import logging
				20	import optparse
				21	import os
				22	import re
				23	import simplejson
				24	import sys
				25	import time
				26
				27	import common
				28	from autotest_lib.client.common_lib import global_config
				29	from autotest_lib.server import frontend
				30
				31	try:
				32	from google.storage.speckle.python.api import rdbms_googleapi
				33	except ImportError:
				34	# Download the AppEngine SDK if desired from here:
				35	# https://developers.google.com/appengine/downloads
				36	rdbms_googleapi = None
				37
				38	try:
				39	import MySQLdb
				40	except ImportError:
				41	MySQLdb = None
				42
				43	_GLOBAL_CONF = global_config.global_config
				44	_CONF_SECTION = 'AUTOTEST_WEB'
				45
				46	_MYSQL_READONLY_LOGIN_CREDENTIALS = {
				47	'host': _GLOBAL_CONF.get_config_value(_CONF_SECTION, 'readonly_host'),
				48	'user': _GLOBAL_CONF.get_config_value(_CONF_SECTION, 'readonly_user'),
				49	'passwd': _GLOBAL_CONF.get_config_value(_CONF_SECTION, 'readonly_password'),
				50	'db': _GLOBAL_CONF.get_config_value(_CONF_SECTION, 'database'),
				51	}
				52
				53	# Paths to files.
				54	_ROOT_DIR = os.path.dirname(os.path.abspath(__file__))
				55	_CHART_CONFIG_FILE = os.path.join(_ROOT_DIR, 'croschart_defaults.json')
				56	_DEST_DATA_DIR = os.path.join(_ROOT_DIR, 'data')
				57	_COMPLETED_ID_FILE = os.path.join(_DEST_DATA_DIR, 'job_id_complete.txt')
				58	_CURR_PID_FILE = os.path.join(_DEST_DATA_DIR, __file__ + '.curr_pid.txt')
				59
				60	# Values that can be configured through options.
				61	_NUM_DAYS_BACK = 7 # Ignore perf test runs in database that finished more than
				62	# this many days ago.
				63
				64	# Other values that can only be configured here in the code.
				65	_AFE = frontend.AFE()
				66	_PLATFORMS = map(lambda x: x.name, _AFE.get_labels(platform=True))
				67	if 'snow' in _PLATFORMS:
				68	_PLATFORMS.remove('snow')
				69	_PLATFORMS.append('daisy')
				70
				71
				72	def get_job_ids(cursor, test_name, oldest_db_lookup_date, completed_ids):
				73	"""Gets all job IDs for the given test name that haven't yet been processed.
				74
				75	@param cursor: see query_database().
				76	@param test_name: see query_database().
				77	@param oldest_db_lookup_date: see query_database().
				78	@param completed_ids: see query_database().
				79
				80	@return A list of string job IDs from the database that should be processed.
				81	"""
				82	query = ('SELECT DISTINCT afe_job_id '
				83	'FROM tko_perf_view_2 INNER JOIN tko_jobs USING (job_idx) '
				84	'INNER JOIN tko_status USING (status_idx) WHERE '
				85	'test_name = %s AND test_finished_time >= %s AND '
				86	'word != "RUNNING"')
				87	start_time = time.time()
				88	cursor.execute(query, (test_name, oldest_db_lookup_date))
				89	logging.debug('Extracted job IDs in %.2f seconds', time.time() - start_time)
				90	job_ids = []
				91	for result_row in cursor:
				92	job_id = str(result_row[0])
				93	if job_id not in completed_ids:
				94	job_ids.append(job_id)
				95	return job_ids
				96
				97
				98	def write_perf_info_to_disk(job_id, result_dict, test_dir):
				99	"""Writes extracted perf data for the given job ID to disk.
				100
				101	Also writes the job ID to disk to mark it as having been processed. Note
				102	that the written files are not protected against simultaneous access by
				103	multiple invocations of this script.
				104
				105	@param job_id: The string job ID.
				106	@param result_dict: A dictionary of associated perf info to write to disk.
				107	@param test_dir: The string directory name in which to write perf data.
				108	"""
				109	result_out = [job_id, result_dict['job_name'], result_dict['platform']]
				110	perf_items = []
				111	for perf_key in result_dict['perf_keys']:
				112	for perf_val in result_dict['perf_keys'][perf_key]:
				113	perf_items.append((perf_key, perf_val))
				114	result_out.append(perf_items)
				115	file_name = os.path.join(test_dir, result_dict['platform'] + '.txt')
				116	with open(file_name, 'a') as fp:
				117	fp.write(simplejson.dumps(result_out) + '\n')
				118
				119	with open(_COMPLETED_ID_FILE, 'a') as fp:
				120	fp.write(job_id + '\n')
				121
				122
				123	def extract_perf_for_job_id(cursor, job_id, unexpected_job_names, test_dir):
				124	"""Extracts perf data for a given job, then writes to local text files.
				125
				126	@param cursor: A MySQLdb.cursor object used for interacting with a database.
				127	@param job_id: The string job ID to process.
				128	@param unexpected_job_names: A set of job names encountered so far that are
				129	not associated with a known platform type.
				130	@param test_dir: The string directory name in which to write perf data.
				131
				132	@return True, if data for the specified job ID is written to disk, or
				133	False if not (will be False if the job ID is not associated with a known
				134	platform type).
				135	"""
				136	query = ('SELECT job_name,iteration_key,iteration_value '
				137	'FROM tko_perf_view_2 INNER JOIN tko_jobs USING (job_idx) '
				138	'WHERE afe_job_id = %s')
				139	cursor.execute(query, job_id)
				140
				141	result = {}
				142	for job_name, key, val in cursor:
				143	# The job_name string contains the platform name. The platform name is
				144	# always followed by either "-rX", where X is the milestone number
				145	# (this is from legacy data in the database), or else it is followed
				146	# by "-release" (for more recent data in the database). We do not
				147	# consider jobs in which the platform name is followed by anything
				148	# else (in particular, "-paladin" runs).
				149	#
				150	# TODO(dennisjeffrey): Simplify the below code once the following bug
				151	# is addressed to standardize the platform names: crosbug.com/38521.
				152	match = re.search('(\w+)-r', job_name)
Dennis Jeffrey	bbe98a2	2013-02-13 09:59:38 -0800	[diff] [blame]	153	if not match:
				154	unexpected_job_names.add(job_name)
				155	continue
Dennis Jeffrey	b95ba5a	2012-11-12 17:55:18 -0800	[diff] [blame]	156	# Only process jobs for known platforms.
				157	platform = match.group(1) if match.group(1) in _PLATFORMS else None
				158	if platform:
				159	result['job_name'] = job_name
				160	result['platform'] = platform
				161	result.setdefault('perf_keys', {})
				162	if key and val:
				163	result['perf_keys'].setdefault(key, [])
				164	result['perf_keys'][key].append(val)
				165	else:
				166	unexpected_job_names.add(job_name)
				167
				168	if 'platform' not in result:
				169	return False
				170
				171	write_perf_info_to_disk(job_id, result, test_dir)
				172	return True
				173
				174
				175	def query_database(cursor, test_name, completed_ids, oldest_db_lookup_date):
				176	"""Queries database for perf values and stores them into local text files.
				177
				178	This function performs the work only for the specified test case.
				179
				180	@param cursor: A MySQLdb.cursor object used for interacting with a database.
				181	@param test_name: The string name of a test case to process.
				182	@param completed_ids: A set of job IDs that have already been previously
				183	extracted from the database.
				184	@param oldest_db_lookup_date: The oldest date (represented as a string) for
				185	which we want to consider perf values in the database.
				186
				187	@return The number of new job IDs that have been extracted/processed.
				188	"""
				189	test_dir = os.path.join(_DEST_DATA_DIR, test_name)
				190	if not os.path.isdir(test_dir):
				191	os.makedirs(test_dir)
				192
				193	# Identify the job IDs that need to be processed.
				194	job_ids = get_job_ids(cursor, test_name, oldest_db_lookup_date,
				195	completed_ids)
				196
				197	# For each job ID, extract the perf values we need.
				198	unexpected_job_names = set()
				199	num_newly_added = 0
				200	for i, job_id in enumerate(job_ids):
				201	logging.debug('Processing job %d of %d', i + 1, len(job_ids))
				202
				203	if extract_perf_for_job_id(cursor, job_id, unexpected_job_names,
				204	test_dir):
				205	completed_ids.add(job_id)
				206	num_newly_added += 1
				207
				208	if unexpected_job_names:
				209	logging.debug('Job names skipped due to unexpected platform: %s',
				210	list(unexpected_job_names))
				211
				212	return num_newly_added
				213
				214
				215	def extract_new_perf_data(cursor, options):
				216	"""Extracts new perf data from database and writes data to local text files.
				217
				218	@param cursor: A MySQLdb.cursor object used for interacting with a database.
				219	@param options: An optparse.OptionParser options object.
				220
				221	@return The number of new job IDs that have been extracted/processed.
				222	"""
				223	charts = {}
				224	with open(_CHART_CONFIG_FILE, 'r') as fp:
				225	charts = simplejson.loads(fp.read())
				226
				227	# Compute the oldest date for the perf values that we want to consider.
				228	oldest_db_lookup_date = (
				229	datetime.date.today() -
				230	datetime.timedelta(days=options.num_days_back)).strftime('%Y-%m-%d')
				231
				232	logging.debug('Extracting job IDs from %s onwards.',
				233	oldest_db_lookup_date)
				234
				235	# Get unique test names.
Dennis Jeffrey	bbe98a2	2013-02-13 09:59:38 -0800	[diff] [blame]	236	test_names = set()
				237	for c in charts:
				238	test_names.add(c['test_name'])
				239	if 'old_test_names' in c:
				240	test_names \|= set(c['old_test_names'])
Dennis Jeffrey	b95ba5a	2012-11-12 17:55:18 -0800	[diff] [blame]	241
				242	# Get list of already-completed job IDs so we don't re-fetch their data.
				243	completed_ids = set()
				244	if os.path.isfile(_COMPLETED_ID_FILE):
				245	with open(_COMPLETED_ID_FILE, 'r') as fp:
				246	job_ids = map(lambda x: x.strip(), fp.readlines())
				247	for job_id in job_ids:
				248	completed_ids.add(job_id)
				249
				250	num_newly_added = 0
				251	for i, test_name in enumerate(test_names):
				252	logging.info('Extracting info for test %d of %d: %s ', i + 1,
				253	len(test_names), test_name)
				254
				255	num_newly_added += query_database(cursor, test_name, completed_ids,
				256	oldest_db_lookup_date)
				257
				258	return num_newly_added
				259
				260
				261	def cleanup():
				262	"""Cleans up when this script is done."""
				263	if os.path.isfile(_CURR_PID_FILE):
				264	os.remove(_CURR_PID_FILE)
				265
				266
				267	def main():
				268	"""Main function."""
				269	parser = optparse.OptionParser()
				270	parser.add_option('-n', '--num-days-back', metavar='NUM_DAYS', type='int',
				271	default=_NUM_DAYS_BACK,
				272	help='Consider only the perf test results that were '
				273	'computed within this many days ago (if this script '
				274	'is invoked daily, no need to consider history from '
				275	'many days back). Defaults to %default days back.')
				276	parser.add_option('-c', '--cloud-sql', action='store_true', default=False,
				277	help='Connect to the chromeos-lab CloudSQL database, '
				278	'rather than the original MySQL autotest database.')
				279	parser.add_option('-v', '--verbose', action='store_true', default=False,
				280	help='Use verbose logging.')
				281	options, _ = parser.parse_args()
				282
				283	log_level = logging.DEBUG if options.verbose else logging.INFO
				284	logging.basicConfig(format='%(asctime)s %(levelname)s: %(message)s',
				285	level=log_level)
				286
				287	cursor = None
				288	if options.cloud_sql:
				289	# CloudSQL.
				290	logging.debug('Connecting to CloudSQL...')
				291	if rdbms_googleapi is None:
				292	logging.error('CloudSQL requested, but cannot locate CloudSQL '
				293	'dependencies. Have you set up CloudSQL on this '
				294	'machine?')
				295	sys.exit(1)
				296	db = rdbms_googleapi.connect(None,
				297	instance='chromeos-bot:chromeos-lab')
				298	cursor = db.cursor()
				299	cursor.execute('USE chromeos_autotest_db')
				300	else:
				301	# Autotest MySQL database.
				302	logging.debug('Connecting to Autotest MySQL database...')
				303	if MySQLdb is None:
				304	logging.error('MySQL requested, but cannot locate MySQL '
				305	'dependencies. Have you set up MySQL on this '
				306	'machine?')
				307	sys.exit(1)
				308	db = MySQLdb.connect(**_MYSQL_READONLY_LOGIN_CREDENTIALS)
				309	cursor = db.cursor()
				310
				311	logging.debug('Database connection complete.')
				312
				313	if not os.path.isdir(_DEST_DATA_DIR):
				314	os.makedirs(_DEST_DATA_DIR)
				315
				316	common.die_if_already_running(_CURR_PID_FILE, logging)
				317	num_newly_added = extract_new_perf_data(cursor, options)
				318	cleanup()
				319	logging.info('Done! Added info for %d new job IDs', num_newly_added)
				320
				321
				322	if __name__ == '__main__':
				323	main()