blob: 9cd66a12467490d894de1a1b07ff93d4a40a5cfd [file] [log] [blame]
Dennis Jeffreyb95ba5a2012-11-12 17:55:18 -08001#!/usr/bin/env python
2# Copyright (c) 2013 The Chromium OS Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6"""Extracts perf keys from autotest database and writes to local data files.
7
8This script keeps track of the job IDs for which perf keys have already been
9extracted on previous runs. The script only processes those job IDs whose perf
10values haven't yet been previously extracted.
11
12Sample usage:
13 python extract_perf.py -v
14
15Run with -h to see the full set of command-line options.
16"""
17
18import datetime
19import logging
20import optparse
21import os
22import re
23import simplejson
24import sys
25import time
26
27import common
28from autotest_lib.client.common_lib import global_config
29from autotest_lib.server import frontend
30
31try:
32 from google.storage.speckle.python.api import rdbms_googleapi
33except ImportError:
34 # Download the AppEngine SDK if desired from here:
35 # https://developers.google.com/appengine/downloads
36 rdbms_googleapi = None
37
38try:
39 import MySQLdb
40except ImportError:
41 MySQLdb = None
42
43_GLOBAL_CONF = global_config.global_config
44_CONF_SECTION = 'AUTOTEST_WEB'
45
46_MYSQL_READONLY_LOGIN_CREDENTIALS = {
47 'host': _GLOBAL_CONF.get_config_value(_CONF_SECTION, 'readonly_host'),
48 'user': _GLOBAL_CONF.get_config_value(_CONF_SECTION, 'readonly_user'),
49 'passwd': _GLOBAL_CONF.get_config_value(_CONF_SECTION, 'readonly_password'),
50 'db': _GLOBAL_CONF.get_config_value(_CONF_SECTION, 'database'),
51}
52
53# Paths to files.
54_ROOT_DIR = os.path.dirname(os.path.abspath(__file__))
55_CHART_CONFIG_FILE = os.path.join(_ROOT_DIR, 'croschart_defaults.json')
56_DEST_DATA_DIR = os.path.join(_ROOT_DIR, 'data')
57_COMPLETED_ID_FILE = os.path.join(_DEST_DATA_DIR, 'job_id_complete.txt')
58_CURR_PID_FILE = os.path.join(_DEST_DATA_DIR, __file__ + '.curr_pid.txt')
59
60# Values that can be configured through options.
61_NUM_DAYS_BACK = 7 # Ignore perf test runs in database that finished more than
62 # this many days ago.
63
64# Other values that can only be configured here in the code.
65_AFE = frontend.AFE()
66_PLATFORMS = map(lambda x: x.name, _AFE.get_labels(platform=True))
67if 'snow' in _PLATFORMS:
68 _PLATFORMS.remove('snow')
69 _PLATFORMS.append('daisy')
70
71
72def get_job_ids(cursor, test_name, oldest_db_lookup_date, completed_ids):
73 """Gets all job IDs for the given test name that haven't yet been processed.
74
75 @param cursor: see query_database().
76 @param test_name: see query_database().
77 @param oldest_db_lookup_date: see query_database().
78 @param completed_ids: see query_database().
79
80 @return A list of string job IDs from the database that should be processed.
81 """
82 query = ('SELECT DISTINCT afe_job_id '
83 'FROM tko_perf_view_2 INNER JOIN tko_jobs USING (job_idx) '
84 'INNER JOIN tko_status USING (status_idx) WHERE '
85 'test_name = %s AND test_finished_time >= %s AND '
86 'word != "RUNNING"')
87 start_time = time.time()
88 cursor.execute(query, (test_name, oldest_db_lookup_date))
89 logging.debug('Extracted job IDs in %.2f seconds', time.time() - start_time)
90 job_ids = []
91 for result_row in cursor:
92 job_id = str(result_row[0])
93 if job_id not in completed_ids:
94 job_ids.append(job_id)
95 return job_ids
96
97
98def write_perf_info_to_disk(job_id, result_dict, test_dir):
99 """Writes extracted perf data for the given job ID to disk.
100
101 Also writes the job ID to disk to mark it as having been processed. Note
102 that the written files are not protected against simultaneous access by
103 multiple invocations of this script.
104
105 @param job_id: The string job ID.
106 @param result_dict: A dictionary of associated perf info to write to disk.
107 @param test_dir: The string directory name in which to write perf data.
108 """
109 result_out = [job_id, result_dict['job_name'], result_dict['platform']]
110 perf_items = []
111 for perf_key in result_dict['perf_keys']:
112 for perf_val in result_dict['perf_keys'][perf_key]:
113 perf_items.append((perf_key, perf_val))
114 result_out.append(perf_items)
115 file_name = os.path.join(test_dir, result_dict['platform'] + '.txt')
116 with open(file_name, 'a') as fp:
117 fp.write(simplejson.dumps(result_out) + '\n')
118
119 with open(_COMPLETED_ID_FILE, 'a') as fp:
120 fp.write(job_id + '\n')
121
122
123def extract_perf_for_job_id(cursor, job_id, unexpected_job_names, test_dir):
124 """Extracts perf data for a given job, then writes to local text files.
125
126 @param cursor: A MySQLdb.cursor object used for interacting with a database.
127 @param job_id: The string job ID to process.
128 @param unexpected_job_names: A set of job names encountered so far that are
129 not associated with a known platform type.
130 @param test_dir: The string directory name in which to write perf data.
131
132 @return True, if data for the specified job ID is written to disk, or
133 False if not (will be False if the job ID is not associated with a known
134 platform type).
135 """
136 query = ('SELECT job_name,iteration_key,iteration_value '
137 'FROM tko_perf_view_2 INNER JOIN tko_jobs USING (job_idx) '
138 'WHERE afe_job_id = %s')
139 cursor.execute(query, job_id)
140
141 result = {}
142 for job_name, key, val in cursor:
143 # The job_name string contains the platform name. The platform name is
144 # always followed by either "-rX", where X is the milestone number
145 # (this is from legacy data in the database), or else it is followed
146 # by "-release" (for more recent data in the database). We do not
147 # consider jobs in which the platform name is followed by anything
148 # else (in particular, "-paladin" runs).
149 #
150 # TODO(dennisjeffrey): Simplify the below code once the following bug
151 # is addressed to standardize the platform names: crosbug.com/38521.
152 match = re.search('(\w+)-r', job_name)
153 # Only process jobs for known platforms.
154 platform = match.group(1) if match.group(1) in _PLATFORMS else None
155 if platform:
156 result['job_name'] = job_name
157 result['platform'] = platform
158 result.setdefault('perf_keys', {})
159 if key and val:
160 result['perf_keys'].setdefault(key, [])
161 result['perf_keys'][key].append(val)
162 else:
163 unexpected_job_names.add(job_name)
164
165 if 'platform' not in result:
166 return False
167
168 write_perf_info_to_disk(job_id, result, test_dir)
169 return True
170
171
172def query_database(cursor, test_name, completed_ids, oldest_db_lookup_date):
173 """Queries database for perf values and stores them into local text files.
174
175 This function performs the work only for the specified test case.
176
177 @param cursor: A MySQLdb.cursor object used for interacting with a database.
178 @param test_name: The string name of a test case to process.
179 @param completed_ids: A set of job IDs that have already been previously
180 extracted from the database.
181 @param oldest_db_lookup_date: The oldest date (represented as a string) for
182 which we want to consider perf values in the database.
183
184 @return The number of new job IDs that have been extracted/processed.
185 """
186 test_dir = os.path.join(_DEST_DATA_DIR, test_name)
187 if not os.path.isdir(test_dir):
188 os.makedirs(test_dir)
189
190 # Identify the job IDs that need to be processed.
191 job_ids = get_job_ids(cursor, test_name, oldest_db_lookup_date,
192 completed_ids)
193
194 # For each job ID, extract the perf values we need.
195 unexpected_job_names = set()
196 num_newly_added = 0
197 for i, job_id in enumerate(job_ids):
198 logging.debug('Processing job %d of %d', i + 1, len(job_ids))
199
200 if extract_perf_for_job_id(cursor, job_id, unexpected_job_names,
201 test_dir):
202 completed_ids.add(job_id)
203 num_newly_added += 1
204
205 if unexpected_job_names:
206 logging.debug('Job names skipped due to unexpected platform: %s',
207 list(unexpected_job_names))
208
209 return num_newly_added
210
211
212def extract_new_perf_data(cursor, options):
213 """Extracts new perf data from database and writes data to local text files.
214
215 @param cursor: A MySQLdb.cursor object used for interacting with a database.
216 @param options: An optparse.OptionParser options object.
217
218 @return The number of new job IDs that have been extracted/processed.
219 """
220 charts = {}
221 with open(_CHART_CONFIG_FILE, 'r') as fp:
222 charts = simplejson.loads(fp.read())
223
224 # Compute the oldest date for the perf values that we want to consider.
225 oldest_db_lookup_date = (
226 datetime.date.today() -
227 datetime.timedelta(days=options.num_days_back)).strftime('%Y-%m-%d')
228
229 logging.debug('Extracting job IDs from %s onwards.',
230 oldest_db_lookup_date)
231
232 # Get unique test names.
233 test_names = set([c['test_name'] for c in charts])
234
235 # Get list of already-completed job IDs so we don't re-fetch their data.
236 completed_ids = set()
237 if os.path.isfile(_COMPLETED_ID_FILE):
238 with open(_COMPLETED_ID_FILE, 'r') as fp:
239 job_ids = map(lambda x: x.strip(), fp.readlines())
240 for job_id in job_ids:
241 completed_ids.add(job_id)
242
243 num_newly_added = 0
244 for i, test_name in enumerate(test_names):
245 logging.info('Extracting info for test %d of %d: %s ', i + 1,
246 len(test_names), test_name)
247
248 num_newly_added += query_database(cursor, test_name, completed_ids,
249 oldest_db_lookup_date)
250
251 return num_newly_added
252
253
254def cleanup():
255 """Cleans up when this script is done."""
256 if os.path.isfile(_CURR_PID_FILE):
257 os.remove(_CURR_PID_FILE)
258
259
260def main():
261 """Main function."""
262 parser = optparse.OptionParser()
263 parser.add_option('-n', '--num-days-back', metavar='NUM_DAYS', type='int',
264 default=_NUM_DAYS_BACK,
265 help='Consider only the perf test results that were '
266 'computed within this many days ago (if this script '
267 'is invoked daily, no need to consider history from '
268 'many days back). Defaults to %default days back.')
269 parser.add_option('-c', '--cloud-sql', action='store_true', default=False,
270 help='Connect to the chromeos-lab CloudSQL database, '
271 'rather than the original MySQL autotest database.')
272 parser.add_option('-v', '--verbose', action='store_true', default=False,
273 help='Use verbose logging.')
274 options, _ = parser.parse_args()
275
276 log_level = logging.DEBUG if options.verbose else logging.INFO
277 logging.basicConfig(format='%(asctime)s %(levelname)s: %(message)s',
278 level=log_level)
279
280 cursor = None
281 if options.cloud_sql:
282 # CloudSQL.
283 logging.debug('Connecting to CloudSQL...')
284 if rdbms_googleapi is None:
285 logging.error('CloudSQL requested, but cannot locate CloudSQL '
286 'dependencies. Have you set up CloudSQL on this '
287 'machine?')
288 sys.exit(1)
289 db = rdbms_googleapi.connect(None,
290 instance='chromeos-bot:chromeos-lab')
291 cursor = db.cursor()
292 cursor.execute('USE chromeos_autotest_db')
293 else:
294 # Autotest MySQL database.
295 logging.debug('Connecting to Autotest MySQL database...')
296 if MySQLdb is None:
297 logging.error('MySQL requested, but cannot locate MySQL '
298 'dependencies. Have you set up MySQL on this '
299 'machine?')
300 sys.exit(1)
301 db = MySQLdb.connect(**_MYSQL_READONLY_LOGIN_CREDENTIALS)
302 cursor = db.cursor()
303
304 logging.debug('Database connection complete.')
305
306 if not os.path.isdir(_DEST_DATA_DIR):
307 os.makedirs(_DEST_DATA_DIR)
308
309 common.die_if_already_running(_CURR_PID_FILE, logging)
310 num_newly_added = extract_new_perf_data(cursor, options)
311 cleanup()
312 logging.info('Done! Added info for %d new job IDs', num_newly_added)
313
314
315if __name__ == '__main__':
316 main()