blob: b5afb88700a9bedb2cf3e3377fac1d871214a7d9 [file] [log] [blame]
Dennis Jeffreyb95ba5a2012-11-12 17:55:18 -08001#!/usr/bin/env python
2# Copyright (c) 2013 The Chromium OS Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6"""Extracts perf keys from autotest database and writes to local data files.
7
8This script keeps track of the job IDs for which perf keys have already been
9extracted on previous runs. The script only processes those job IDs whose perf
10values haven't yet been previously extracted.
11
12Sample usage:
13 python extract_perf.py -v
14
15Run with -h to see the full set of command-line options.
Dennis Jeffrey8a305382013-02-28 09:08:57 -080016
Dennis Jeffreyb95ba5a2012-11-12 17:55:18 -080017"""
18
19import datetime
20import logging
21import optparse
22import os
23import re
24import simplejson
25import sys
26import time
27
28import common
29from autotest_lib.client.common_lib import global_config
30from autotest_lib.server import frontend
31
32try:
33 from google.storage.speckle.python.api import rdbms_googleapi
34except ImportError:
35 # Download the AppEngine SDK if desired from here:
36 # https://developers.google.com/appengine/downloads
37 rdbms_googleapi = None
38
39try:
40 import MySQLdb
41except ImportError:
42 MySQLdb = None
43
44_GLOBAL_CONF = global_config.global_config
45_CONF_SECTION = 'AUTOTEST_WEB'
46
47_MYSQL_READONLY_LOGIN_CREDENTIALS = {
48 'host': _GLOBAL_CONF.get_config_value(_CONF_SECTION, 'readonly_host'),
49 'user': _GLOBAL_CONF.get_config_value(_CONF_SECTION, 'readonly_user'),
50 'passwd': _GLOBAL_CONF.get_config_value(_CONF_SECTION, 'readonly_password'),
51 'db': _GLOBAL_CONF.get_config_value(_CONF_SECTION, 'database'),
52}
53
Dennis Jeffreyb95ba5a2012-11-12 17:55:18 -080054_ROOT_DIR = os.path.dirname(os.path.abspath(__file__))
55_CHART_CONFIG_FILE = os.path.join(_ROOT_DIR, 'croschart_defaults.json')
Dennis Jeffrey8a305382013-02-28 09:08:57 -080056_COMPLETED_ID_FILE_NAME = 'job_id_complete.txt'
57_CURR_PID_FILE_NAME = __file__ + '.curr_pid.txt'
Dennis Jeffreyb95ba5a2012-11-12 17:55:18 -080058
59# Values that can be configured through options.
60_NUM_DAYS_BACK = 7 # Ignore perf test runs in database that finished more than
61 # this many days ago.
Dennis Jeffrey8a305382013-02-28 09:08:57 -080062_DEST_DATA_DIR = _ROOT_DIR
Dennis Jeffreyb95ba5a2012-11-12 17:55:18 -080063
64# Other values that can only be configured here in the code.
65_AFE = frontend.AFE()
66_PLATFORMS = map(lambda x: x.name, _AFE.get_labels(platform=True))
67if 'snow' in _PLATFORMS:
68 _PLATFORMS.remove('snow')
69 _PLATFORMS.append('daisy')
70
71
72def get_job_ids(cursor, test_name, oldest_db_lookup_date, completed_ids):
73 """Gets all job IDs for the given test name that haven't yet been processed.
74
75 @param cursor: see query_database().
76 @param test_name: see query_database().
77 @param oldest_db_lookup_date: see query_database().
78 @param completed_ids: see query_database().
79
80 @return A list of string job IDs from the database that should be processed.
Dennis Jeffrey8a305382013-02-28 09:08:57 -080081
Dennis Jeffreyb95ba5a2012-11-12 17:55:18 -080082 """
83 query = ('SELECT DISTINCT afe_job_id '
84 'FROM tko_perf_view_2 INNER JOIN tko_jobs USING (job_idx) '
85 'INNER JOIN tko_status USING (status_idx) WHERE '
86 'test_name = %s AND test_finished_time >= %s AND '
87 'word != "RUNNING"')
88 start_time = time.time()
89 cursor.execute(query, (test_name, oldest_db_lookup_date))
90 logging.debug('Extracted job IDs in %.2f seconds', time.time() - start_time)
91 job_ids = []
92 for result_row in cursor:
93 job_id = str(result_row[0])
94 if job_id not in completed_ids:
95 job_ids.append(job_id)
96 return job_ids
97
98
Dennis Jeffrey8a305382013-02-28 09:08:57 -080099def write_perf_info_to_disk(job_id, result_dict, test_dir, output_dir):
Dennis Jeffreyb95ba5a2012-11-12 17:55:18 -0800100 """Writes extracted perf data for the given job ID to disk.
101
102 Also writes the job ID to disk to mark it as having been processed. Note
103 that the written files are not protected against simultaneous access by
104 multiple invocations of this script.
105
106 @param job_id: The string job ID.
107 @param result_dict: A dictionary of associated perf info to write to disk.
108 @param test_dir: The string directory name in which to write perf data.
Dennis Jeffrey8a305382013-02-28 09:08:57 -0800109 @param output_dir: The output directory in which results are being written.
110
Dennis Jeffreyb95ba5a2012-11-12 17:55:18 -0800111 """
Dennis Jeffrey9fece302013-03-26 12:08:06 -0700112 result_out = [job_id, result_dict['job_name'], result_dict['platform'],
113 result_dict['chrome_version']]
Dennis Jeffreyb95ba5a2012-11-12 17:55:18 -0800114 perf_items = []
115 for perf_key in result_dict['perf_keys']:
116 for perf_val in result_dict['perf_keys'][perf_key]:
117 perf_items.append((perf_key, perf_val))
118 result_out.append(perf_items)
119 file_name = os.path.join(test_dir, result_dict['platform'] + '.txt')
120 with open(file_name, 'a') as fp:
121 fp.write(simplejson.dumps(result_out) + '\n')
122
Dennis Jeffrey8a305382013-02-28 09:08:57 -0800123 with open(os.path.join(output_dir, _COMPLETED_ID_FILE_NAME), 'a') as fp:
Dennis Jeffreyb95ba5a2012-11-12 17:55:18 -0800124 fp.write(job_id + '\n')
125
126
Dennis Jeffrey8a305382013-02-28 09:08:57 -0800127def extract_perf_for_job_id(cursor, job_id, unexpected_job_names, test_dir,
128 output_dir):
Dennis Jeffreyb95ba5a2012-11-12 17:55:18 -0800129 """Extracts perf data for a given job, then writes to local text files.
130
131 @param cursor: A MySQLdb.cursor object used for interacting with a database.
132 @param job_id: The string job ID to process.
133 @param unexpected_job_names: A set of job names encountered so far that are
134 not associated with a known platform type.
135 @param test_dir: The string directory name in which to write perf data.
Dennis Jeffrey8a305382013-02-28 09:08:57 -0800136 @param output_dir: The output directory in which results are being written.
Dennis Jeffreyb95ba5a2012-11-12 17:55:18 -0800137
138 @return True, if data for the specified job ID is written to disk, or
139 False if not (will be False if the job ID is not associated with a known
140 platform type).
Dennis Jeffrey8a305382013-02-28 09:08:57 -0800141
Dennis Jeffreyb95ba5a2012-11-12 17:55:18 -0800142 """
143 query = ('SELECT job_name,iteration_key,iteration_value '
144 'FROM tko_perf_view_2 INNER JOIN tko_jobs USING (job_idx) '
145 'WHERE afe_job_id = %s')
146 cursor.execute(query, job_id)
147
148 result = {}
149 for job_name, key, val in cursor:
150 # The job_name string contains the platform name. The platform name is
151 # always followed by either "-rX", where X is the milestone number
152 # (this is from legacy data in the database), or else it is followed
153 # by "-release" (for more recent data in the database). We do not
154 # consider jobs in which the platform name is followed by anything
155 # else (in particular, "-paladin" runs).
156 #
157 # TODO(dennisjeffrey): Simplify the below code once the following bug
158 # is addressed to standardize the platform names: crosbug.com/38521.
159 match = re.search('(\w+)-r', job_name)
Dennis Jeffreybbe98a22013-02-13 09:59:38 -0800160 if not match:
161 unexpected_job_names.add(job_name)
162 continue
Dennis Jeffreyb95ba5a2012-11-12 17:55:18 -0800163 # Only process jobs for known platforms.
164 platform = match.group(1) if match.group(1) in _PLATFORMS else None
165 if platform:
166 result['job_name'] = job_name
167 result['platform'] = platform
168 result.setdefault('perf_keys', {})
169 if key and val:
170 result['perf_keys'].setdefault(key, [])
171 result['perf_keys'][key].append(val)
172 else:
173 unexpected_job_names.add(job_name)
174
175 if 'platform' not in result:
176 return False
177
Dennis Jeffrey9fece302013-03-26 12:08:06 -0700178 # Get the Chrome version number associated with this job ID.
179 query = ('SELECT DISTINCT value FROM tko_test_attributes '
180 'INNER JOIN tko_perf_view_2 USING (test_idx) '
181 'INNER JOIN tko_jobs USING (job_idx) '
182 'WHERE afe_job_id=%s AND attribute="CHROME_VERSION"')
183 cursor.execute(query, job_id)
184 cursor_results = cursor.fetchall()
185 assert len(cursor_results) <= 1, \
186 'Expected the Chrome version number to be unique for afe_job_id ' \
187 '%s, but got multiple instead: %s' % (job_id, cursor_results)
188 result['chrome_version'] = cursor_results[0][0] if cursor_results else ''
189
Dennis Jeffrey8a305382013-02-28 09:08:57 -0800190 write_perf_info_to_disk(job_id, result, test_dir, output_dir)
Dennis Jeffreyb95ba5a2012-11-12 17:55:18 -0800191 return True
192
193
Dennis Jeffrey8a305382013-02-28 09:08:57 -0800194def query_database(cursor, test_name, completed_ids, oldest_db_lookup_date,
195 output_dir):
Dennis Jeffreyb95ba5a2012-11-12 17:55:18 -0800196 """Queries database for perf values and stores them into local text files.
197
198 This function performs the work only for the specified test case.
199
200 @param cursor: A MySQLdb.cursor object used for interacting with a database.
201 @param test_name: The string name of a test case to process.
202 @param completed_ids: A set of job IDs that have already been previously
203 extracted from the database.
204 @param oldest_db_lookup_date: The oldest date (represented as a string) for
205 which we want to consider perf values in the database.
Dennis Jeffrey8a305382013-02-28 09:08:57 -0800206 @param output_dir: The output directory in which results are being written.
Dennis Jeffreyb95ba5a2012-11-12 17:55:18 -0800207
208 @return The number of new job IDs that have been extracted/processed.
Dennis Jeffrey8a305382013-02-28 09:08:57 -0800209
Dennis Jeffreyb95ba5a2012-11-12 17:55:18 -0800210 """
Dennis Jeffrey8a305382013-02-28 09:08:57 -0800211 test_dir = os.path.join(output_dir, test_name)
Dennis Jeffreyb95ba5a2012-11-12 17:55:18 -0800212 if not os.path.isdir(test_dir):
213 os.makedirs(test_dir)
214
215 # Identify the job IDs that need to be processed.
216 job_ids = get_job_ids(cursor, test_name, oldest_db_lookup_date,
217 completed_ids)
218
219 # For each job ID, extract the perf values we need.
220 unexpected_job_names = set()
221 num_newly_added = 0
222 for i, job_id in enumerate(job_ids):
223 logging.debug('Processing job %d of %d', i + 1, len(job_ids))
224
225 if extract_perf_for_job_id(cursor, job_id, unexpected_job_names,
Dennis Jeffrey8a305382013-02-28 09:08:57 -0800226 test_dir, output_dir):
Dennis Jeffreyb95ba5a2012-11-12 17:55:18 -0800227 completed_ids.add(job_id)
228 num_newly_added += 1
229
230 if unexpected_job_names:
231 logging.debug('Job names skipped due to unexpected platform: %s',
232 list(unexpected_job_names))
233
234 return num_newly_added
235
236
Dennis Jeffrey8a305382013-02-28 09:08:57 -0800237def extract_new_perf_data(cursor, output_dir, options):
Dennis Jeffreyb95ba5a2012-11-12 17:55:18 -0800238 """Extracts new perf data from database and writes data to local text files.
239
240 @param cursor: A MySQLdb.cursor object used for interacting with a database.
Dennis Jeffrey8a305382013-02-28 09:08:57 -0800241 @param output_dir: The output directory in which results are being written.
Dennis Jeffreyb95ba5a2012-11-12 17:55:18 -0800242 @param options: An optparse.OptionParser options object.
243
244 @return The number of new job IDs that have been extracted/processed.
Dennis Jeffrey8a305382013-02-28 09:08:57 -0800245
Dennis Jeffreyb95ba5a2012-11-12 17:55:18 -0800246 """
247 charts = {}
248 with open(_CHART_CONFIG_FILE, 'r') as fp:
249 charts = simplejson.loads(fp.read())
250
251 # Compute the oldest date for the perf values that we want to consider.
252 oldest_db_lookup_date = (
253 datetime.date.today() -
254 datetime.timedelta(days=options.num_days_back)).strftime('%Y-%m-%d')
255
256 logging.debug('Extracting job IDs from %s onwards.',
257 oldest_db_lookup_date)
258
259 # Get unique test names.
Dennis Jeffreybbe98a22013-02-13 09:59:38 -0800260 test_names = set()
261 for c in charts:
262 test_names.add(c['test_name'])
263 if 'old_test_names' in c:
264 test_names |= set(c['old_test_names'])
Dennis Jeffreyb95ba5a2012-11-12 17:55:18 -0800265
266 # Get list of already-completed job IDs so we don't re-fetch their data.
267 completed_ids = set()
Dennis Jeffrey8a305382013-02-28 09:08:57 -0800268 completed_id_file = os.path.join(output_dir, _COMPLETED_ID_FILE_NAME)
269 if os.path.isfile(completed_id_file):
270 with open(completed_id_file, 'r') as fp:
Dennis Jeffreyb95ba5a2012-11-12 17:55:18 -0800271 job_ids = map(lambda x: x.strip(), fp.readlines())
272 for job_id in job_ids:
273 completed_ids.add(job_id)
274
275 num_newly_added = 0
276 for i, test_name in enumerate(test_names):
277 logging.info('Extracting info for test %d of %d: %s ', i + 1,
278 len(test_names), test_name)
279
280 num_newly_added += query_database(cursor, test_name, completed_ids,
Dennis Jeffrey8a305382013-02-28 09:08:57 -0800281 oldest_db_lookup_date, output_dir)
Dennis Jeffreyb95ba5a2012-11-12 17:55:18 -0800282
283 return num_newly_added
284
285
Dennis Jeffrey8a305382013-02-28 09:08:57 -0800286def cleanup(output_dir):
287 """Cleans up when this script is done.
288
289 @param output_dir: The output directory in which results are being written.
290
291 """
292 curr_pid_file = os.path.join(output_dir, _CURR_PID_FILE_NAME)
293 if os.path.isfile(curr_pid_file):
294 os.remove(curr_pid_file)
Dennis Jeffreyb95ba5a2012-11-12 17:55:18 -0800295
296
297def main():
298 """Main function."""
299 parser = optparse.OptionParser()
300 parser.add_option('-n', '--num-days-back', metavar='NUM_DAYS', type='int',
301 default=_NUM_DAYS_BACK,
302 help='Consider only the perf test results that were '
303 'computed within this many days ago (if this script '
304 'is invoked daily, no need to consider history from '
305 'many days back). Defaults to %default days back.')
Dennis Jeffrey8a305382013-02-28 09:08:57 -0800306 parser.add_option('-o', '--output-dir', metavar='DIR', type='string',
307 default=_DEST_DATA_DIR,
308 help='Absolute path to the output directory in which to '
309 'store the raw perf data extracted from the '
310 'database. Will be written into a subfolder named '
311 '"data". Defaults to "%default".')
Dennis Jeffreyb95ba5a2012-11-12 17:55:18 -0800312 parser.add_option('-c', '--cloud-sql', action='store_true', default=False,
313 help='Connect to the chromeos-lab CloudSQL database, '
314 'rather than the original MySQL autotest database.')
315 parser.add_option('-v', '--verbose', action='store_true', default=False,
316 help='Use verbose logging.')
317 options, _ = parser.parse_args()
318
319 log_level = logging.DEBUG if options.verbose else logging.INFO
320 logging.basicConfig(format='%(asctime)s %(levelname)s: %(message)s',
321 level=log_level)
322
323 cursor = None
324 if options.cloud_sql:
325 # CloudSQL.
326 logging.debug('Connecting to CloudSQL...')
327 if rdbms_googleapi is None:
328 logging.error('CloudSQL requested, but cannot locate CloudSQL '
329 'dependencies. Have you set up CloudSQL on this '
330 'machine?')
331 sys.exit(1)
332 db = rdbms_googleapi.connect(None,
333 instance='chromeos-bot:chromeos-lab')
334 cursor = db.cursor()
335 cursor.execute('USE chromeos_autotest_db')
336 else:
337 # Autotest MySQL database.
338 logging.debug('Connecting to Autotest MySQL database...')
339 if MySQLdb is None:
340 logging.error('MySQL requested, but cannot locate MySQL '
341 'dependencies. Have you set up MySQL on this '
342 'machine?')
343 sys.exit(1)
344 db = MySQLdb.connect(**_MYSQL_READONLY_LOGIN_CREDENTIALS)
345 cursor = db.cursor()
346
347 logging.debug('Database connection complete.')
348
Dennis Jeffrey8a305382013-02-28 09:08:57 -0800349 output_dir = os.path.join(options.output_dir, 'data')
350 if not os.path.isdir(output_dir):
351 os.makedirs(output_dir)
Dennis Jeffreyb95ba5a2012-11-12 17:55:18 -0800352
Dennis Jeffrey8a305382013-02-28 09:08:57 -0800353 common.die_if_already_running(
354 os.path.join(output_dir, _CURR_PID_FILE_NAME), logging)
355 num_newly_added = extract_new_perf_data(cursor, output_dir, options)
356 cleanup(output_dir)
Dennis Jeffreyb95ba5a2012-11-12 17:55:18 -0800357 logging.info('Done! Added info for %d new job IDs', num_newly_added)
358
359
360if __name__ == '__main__':
361 main()