blob: 93b62baaab6b09d294956604da2fbea218a8c95a [file] [log] [blame]
Dennis Jeffreyb95ba5a2012-11-12 17:55:18 -08001#!/usr/bin/env python
2# Copyright (c) 2013 The Chromium OS Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6"""Extracts perf keys from autotest database and writes to local data files.
7
8This script keeps track of the job IDs for which perf keys have already been
9extracted on previous runs. The script only processes those job IDs whose perf
10values haven't yet been previously extracted.
11
12Sample usage:
13 python extract_perf.py -v
14
15Run with -h to see the full set of command-line options.
Dennis Jeffrey8a305382013-02-28 09:08:57 -080016
Dennis Jeffreyb95ba5a2012-11-12 17:55:18 -080017"""
18
19import datetime
20import logging
21import optparse
22import os
23import re
24import simplejson
25import sys
26import time
27
28import common
29from autotest_lib.client.common_lib import global_config
30from autotest_lib.server import frontend
31
32try:
33 from google.storage.speckle.python.api import rdbms_googleapi
34except ImportError:
35 # Download the AppEngine SDK if desired from here:
36 # https://developers.google.com/appengine/downloads
37 rdbms_googleapi = None
38
39try:
40 import MySQLdb
41except ImportError:
42 MySQLdb = None
43
44_GLOBAL_CONF = global_config.global_config
45_CONF_SECTION = 'AUTOTEST_WEB'
46
47_MYSQL_READONLY_LOGIN_CREDENTIALS = {
48 'host': _GLOBAL_CONF.get_config_value(_CONF_SECTION, 'readonly_host'),
49 'user': _GLOBAL_CONF.get_config_value(_CONF_SECTION, 'readonly_user'),
50 'passwd': _GLOBAL_CONF.get_config_value(_CONF_SECTION, 'readonly_password'),
51 'db': _GLOBAL_CONF.get_config_value(_CONF_SECTION, 'database'),
52}
53
Dennis Jeffreyb95ba5a2012-11-12 17:55:18 -080054_ROOT_DIR = os.path.dirname(os.path.abspath(__file__))
55_CHART_CONFIG_FILE = os.path.join(_ROOT_DIR, 'croschart_defaults.json')
Dennis Jeffrey8a305382013-02-28 09:08:57 -080056_COMPLETED_ID_FILE_NAME = 'job_id_complete.txt'
57_CURR_PID_FILE_NAME = __file__ + '.curr_pid.txt'
Dennis Jeffreyb95ba5a2012-11-12 17:55:18 -080058
59# Values that can be configured through options.
60_NUM_DAYS_BACK = 7 # Ignore perf test runs in database that finished more than
61 # this many days ago.
Dennis Jeffrey8a305382013-02-28 09:08:57 -080062_DEST_DATA_DIR = _ROOT_DIR
Dennis Jeffreyb95ba5a2012-11-12 17:55:18 -080063
64# Other values that can only be configured here in the code.
65_AFE = frontend.AFE()
66_PLATFORMS = map(lambda x: x.name, _AFE.get_labels(platform=True))
67if 'snow' in _PLATFORMS:
68 _PLATFORMS.remove('snow')
69 _PLATFORMS.append('daisy')
70
71
72def get_job_ids(cursor, test_name, oldest_db_lookup_date, completed_ids):
73 """Gets all job IDs for the given test name that haven't yet been processed.
74
75 @param cursor: see query_database().
76 @param test_name: see query_database().
77 @param oldest_db_lookup_date: see query_database().
78 @param completed_ids: see query_database().
79
80 @return A list of string job IDs from the database that should be processed.
Dennis Jeffrey8a305382013-02-28 09:08:57 -080081
Dennis Jeffreyb95ba5a2012-11-12 17:55:18 -080082 """
83 query = ('SELECT DISTINCT afe_job_id '
84 'FROM tko_perf_view_2 INNER JOIN tko_jobs USING (job_idx) '
85 'INNER JOIN tko_status USING (status_idx) WHERE '
86 'test_name = %s AND test_finished_time >= %s AND '
87 'word != "RUNNING"')
88 start_time = time.time()
89 cursor.execute(query, (test_name, oldest_db_lookup_date))
90 logging.debug('Extracted job IDs in %.2f seconds', time.time() - start_time)
91 job_ids = []
92 for result_row in cursor:
93 job_id = str(result_row[0])
94 if job_id not in completed_ids:
95 job_ids.append(job_id)
96 return job_ids
97
98
Dennis Jeffrey8a305382013-02-28 09:08:57 -080099def write_perf_info_to_disk(job_id, result_dict, test_dir, output_dir):
Dennis Jeffreyb95ba5a2012-11-12 17:55:18 -0800100 """Writes extracted perf data for the given job ID to disk.
101
102 Also writes the job ID to disk to mark it as having been processed. Note
103 that the written files are not protected against simultaneous access by
104 multiple invocations of this script.
105
106 @param job_id: The string job ID.
107 @param result_dict: A dictionary of associated perf info to write to disk.
108 @param test_dir: The string directory name in which to write perf data.
Dennis Jeffrey8a305382013-02-28 09:08:57 -0800109 @param output_dir: The output directory in which results are being written.
110
Dennis Jeffreyb95ba5a2012-11-12 17:55:18 -0800111 """
112 result_out = [job_id, result_dict['job_name'], result_dict['platform']]
113 perf_items = []
114 for perf_key in result_dict['perf_keys']:
115 for perf_val in result_dict['perf_keys'][perf_key]:
116 perf_items.append((perf_key, perf_val))
117 result_out.append(perf_items)
118 file_name = os.path.join(test_dir, result_dict['platform'] + '.txt')
119 with open(file_name, 'a') as fp:
120 fp.write(simplejson.dumps(result_out) + '\n')
121
Dennis Jeffrey8a305382013-02-28 09:08:57 -0800122 with open(os.path.join(output_dir, _COMPLETED_ID_FILE_NAME), 'a') as fp:
Dennis Jeffreyb95ba5a2012-11-12 17:55:18 -0800123 fp.write(job_id + '\n')
124
125
Dennis Jeffrey8a305382013-02-28 09:08:57 -0800126def extract_perf_for_job_id(cursor, job_id, unexpected_job_names, test_dir,
127 output_dir):
Dennis Jeffreyb95ba5a2012-11-12 17:55:18 -0800128 """Extracts perf data for a given job, then writes to local text files.
129
130 @param cursor: A MySQLdb.cursor object used for interacting with a database.
131 @param job_id: The string job ID to process.
132 @param unexpected_job_names: A set of job names encountered so far that are
133 not associated with a known platform type.
134 @param test_dir: The string directory name in which to write perf data.
Dennis Jeffrey8a305382013-02-28 09:08:57 -0800135 @param output_dir: The output directory in which results are being written.
Dennis Jeffreyb95ba5a2012-11-12 17:55:18 -0800136
137 @return True, if data for the specified job ID is written to disk, or
138 False if not (will be False if the job ID is not associated with a known
139 platform type).
Dennis Jeffrey8a305382013-02-28 09:08:57 -0800140
Dennis Jeffreyb95ba5a2012-11-12 17:55:18 -0800141 """
142 query = ('SELECT job_name,iteration_key,iteration_value '
143 'FROM tko_perf_view_2 INNER JOIN tko_jobs USING (job_idx) '
144 'WHERE afe_job_id = %s')
145 cursor.execute(query, job_id)
146
147 result = {}
148 for job_name, key, val in cursor:
149 # The job_name string contains the platform name. The platform name is
150 # always followed by either "-rX", where X is the milestone number
151 # (this is from legacy data in the database), or else it is followed
152 # by "-release" (for more recent data in the database). We do not
153 # consider jobs in which the platform name is followed by anything
154 # else (in particular, "-paladin" runs).
155 #
156 # TODO(dennisjeffrey): Simplify the below code once the following bug
157 # is addressed to standardize the platform names: crosbug.com/38521.
158 match = re.search('(\w+)-r', job_name)
Dennis Jeffreybbe98a22013-02-13 09:59:38 -0800159 if not match:
160 unexpected_job_names.add(job_name)
161 continue
Dennis Jeffreyb95ba5a2012-11-12 17:55:18 -0800162 # Only process jobs for known platforms.
163 platform = match.group(1) if match.group(1) in _PLATFORMS else None
164 if platform:
165 result['job_name'] = job_name
166 result['platform'] = platform
167 result.setdefault('perf_keys', {})
168 if key and val:
169 result['perf_keys'].setdefault(key, [])
170 result['perf_keys'][key].append(val)
171 else:
172 unexpected_job_names.add(job_name)
173
174 if 'platform' not in result:
175 return False
176
Dennis Jeffrey8a305382013-02-28 09:08:57 -0800177 write_perf_info_to_disk(job_id, result, test_dir, output_dir)
Dennis Jeffreyb95ba5a2012-11-12 17:55:18 -0800178 return True
179
180
Dennis Jeffrey8a305382013-02-28 09:08:57 -0800181def query_database(cursor, test_name, completed_ids, oldest_db_lookup_date,
182 output_dir):
Dennis Jeffreyb95ba5a2012-11-12 17:55:18 -0800183 """Queries database for perf values and stores them into local text files.
184
185 This function performs the work only for the specified test case.
186
187 @param cursor: A MySQLdb.cursor object used for interacting with a database.
188 @param test_name: The string name of a test case to process.
189 @param completed_ids: A set of job IDs that have already been previously
190 extracted from the database.
191 @param oldest_db_lookup_date: The oldest date (represented as a string) for
192 which we want to consider perf values in the database.
Dennis Jeffrey8a305382013-02-28 09:08:57 -0800193 @param output_dir: The output directory in which results are being written.
Dennis Jeffreyb95ba5a2012-11-12 17:55:18 -0800194
195 @return The number of new job IDs that have been extracted/processed.
Dennis Jeffrey8a305382013-02-28 09:08:57 -0800196
Dennis Jeffreyb95ba5a2012-11-12 17:55:18 -0800197 """
Dennis Jeffrey8a305382013-02-28 09:08:57 -0800198 test_dir = os.path.join(output_dir, test_name)
Dennis Jeffreyb95ba5a2012-11-12 17:55:18 -0800199 if not os.path.isdir(test_dir):
200 os.makedirs(test_dir)
201
202 # Identify the job IDs that need to be processed.
203 job_ids = get_job_ids(cursor, test_name, oldest_db_lookup_date,
204 completed_ids)
205
206 # For each job ID, extract the perf values we need.
207 unexpected_job_names = set()
208 num_newly_added = 0
209 for i, job_id in enumerate(job_ids):
210 logging.debug('Processing job %d of %d', i + 1, len(job_ids))
211
212 if extract_perf_for_job_id(cursor, job_id, unexpected_job_names,
Dennis Jeffrey8a305382013-02-28 09:08:57 -0800213 test_dir, output_dir):
Dennis Jeffreyb95ba5a2012-11-12 17:55:18 -0800214 completed_ids.add(job_id)
215 num_newly_added += 1
216
217 if unexpected_job_names:
218 logging.debug('Job names skipped due to unexpected platform: %s',
219 list(unexpected_job_names))
220
221 return num_newly_added
222
223
Dennis Jeffrey8a305382013-02-28 09:08:57 -0800224def extract_new_perf_data(cursor, output_dir, options):
Dennis Jeffreyb95ba5a2012-11-12 17:55:18 -0800225 """Extracts new perf data from database and writes data to local text files.
226
227 @param cursor: A MySQLdb.cursor object used for interacting with a database.
Dennis Jeffrey8a305382013-02-28 09:08:57 -0800228 @param output_dir: The output directory in which results are being written.
Dennis Jeffreyb95ba5a2012-11-12 17:55:18 -0800229 @param options: An optparse.OptionParser options object.
230
231 @return The number of new job IDs that have been extracted/processed.
Dennis Jeffrey8a305382013-02-28 09:08:57 -0800232
Dennis Jeffreyb95ba5a2012-11-12 17:55:18 -0800233 """
234 charts = {}
235 with open(_CHART_CONFIG_FILE, 'r') as fp:
236 charts = simplejson.loads(fp.read())
237
238 # Compute the oldest date for the perf values that we want to consider.
239 oldest_db_lookup_date = (
240 datetime.date.today() -
241 datetime.timedelta(days=options.num_days_back)).strftime('%Y-%m-%d')
242
243 logging.debug('Extracting job IDs from %s onwards.',
244 oldest_db_lookup_date)
245
246 # Get unique test names.
Dennis Jeffreybbe98a22013-02-13 09:59:38 -0800247 test_names = set()
248 for c in charts:
249 test_names.add(c['test_name'])
250 if 'old_test_names' in c:
251 test_names |= set(c['old_test_names'])
Dennis Jeffreyb95ba5a2012-11-12 17:55:18 -0800252
253 # Get list of already-completed job IDs so we don't re-fetch their data.
254 completed_ids = set()
Dennis Jeffrey8a305382013-02-28 09:08:57 -0800255 completed_id_file = os.path.join(output_dir, _COMPLETED_ID_FILE_NAME)
256 if os.path.isfile(completed_id_file):
257 with open(completed_id_file, 'r') as fp:
Dennis Jeffreyb95ba5a2012-11-12 17:55:18 -0800258 job_ids = map(lambda x: x.strip(), fp.readlines())
259 for job_id in job_ids:
260 completed_ids.add(job_id)
261
262 num_newly_added = 0
263 for i, test_name in enumerate(test_names):
264 logging.info('Extracting info for test %d of %d: %s ', i + 1,
265 len(test_names), test_name)
266
267 num_newly_added += query_database(cursor, test_name, completed_ids,
Dennis Jeffrey8a305382013-02-28 09:08:57 -0800268 oldest_db_lookup_date, output_dir)
Dennis Jeffreyb95ba5a2012-11-12 17:55:18 -0800269
270 return num_newly_added
271
272
Dennis Jeffrey8a305382013-02-28 09:08:57 -0800273def cleanup(output_dir):
274 """Cleans up when this script is done.
275
276 @param output_dir: The output directory in which results are being written.
277
278 """
279 curr_pid_file = os.path.join(output_dir, _CURR_PID_FILE_NAME)
280 if os.path.isfile(curr_pid_file):
281 os.remove(curr_pid_file)
Dennis Jeffreyb95ba5a2012-11-12 17:55:18 -0800282
283
284def main():
285 """Main function."""
286 parser = optparse.OptionParser()
287 parser.add_option('-n', '--num-days-back', metavar='NUM_DAYS', type='int',
288 default=_NUM_DAYS_BACK,
289 help='Consider only the perf test results that were '
290 'computed within this many days ago (if this script '
291 'is invoked daily, no need to consider history from '
292 'many days back). Defaults to %default days back.')
Dennis Jeffrey8a305382013-02-28 09:08:57 -0800293 parser.add_option('-o', '--output-dir', metavar='DIR', type='string',
294 default=_DEST_DATA_DIR,
295 help='Absolute path to the output directory in which to '
296 'store the raw perf data extracted from the '
297 'database. Will be written into a subfolder named '
298 '"data". Defaults to "%default".')
Dennis Jeffreyb95ba5a2012-11-12 17:55:18 -0800299 parser.add_option('-c', '--cloud-sql', action='store_true', default=False,
300 help='Connect to the chromeos-lab CloudSQL database, '
301 'rather than the original MySQL autotest database.')
302 parser.add_option('-v', '--verbose', action='store_true', default=False,
303 help='Use verbose logging.')
304 options, _ = parser.parse_args()
305
306 log_level = logging.DEBUG if options.verbose else logging.INFO
307 logging.basicConfig(format='%(asctime)s %(levelname)s: %(message)s',
308 level=log_level)
309
310 cursor = None
311 if options.cloud_sql:
312 # CloudSQL.
313 logging.debug('Connecting to CloudSQL...')
314 if rdbms_googleapi is None:
315 logging.error('CloudSQL requested, but cannot locate CloudSQL '
316 'dependencies. Have you set up CloudSQL on this '
317 'machine?')
318 sys.exit(1)
319 db = rdbms_googleapi.connect(None,
320 instance='chromeos-bot:chromeos-lab')
321 cursor = db.cursor()
322 cursor.execute('USE chromeos_autotest_db')
323 else:
324 # Autotest MySQL database.
325 logging.debug('Connecting to Autotest MySQL database...')
326 if MySQLdb is None:
327 logging.error('MySQL requested, but cannot locate MySQL '
328 'dependencies. Have you set up MySQL on this '
329 'machine?')
330 sys.exit(1)
331 db = MySQLdb.connect(**_MYSQL_READONLY_LOGIN_CREDENTIALS)
332 cursor = db.cursor()
333
334 logging.debug('Database connection complete.')
335
Dennis Jeffrey8a305382013-02-28 09:08:57 -0800336 output_dir = os.path.join(options.output_dir, 'data')
337 if not os.path.isdir(output_dir):
338 os.makedirs(output_dir)
Dennis Jeffreyb95ba5a2012-11-12 17:55:18 -0800339
Dennis Jeffrey8a305382013-02-28 09:08:57 -0800340 common.die_if_already_running(
341 os.path.join(output_dir, _CURR_PID_FILE_NAME), logging)
342 num_newly_added = extract_new_perf_data(cursor, output_dir, options)
343 cleanup(output_dir)
Dennis Jeffreyb95ba5a2012-11-12 17:55:18 -0800344 logging.info('Done! Added info for %d new job IDs', num_newly_added)
345
346
347if __name__ == '__main__':
348 main()