blob: 6f69a1e5ac70f4f0af47535c26e0ee12d93e005a [file] [log] [blame]
Dennis Jeffreyb95ba5a2012-11-12 17:55:18 -08001#!/usr/bin/env python
2# Copyright (c) 2013 The Chromium OS Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6"""Extracts perf keys from autotest database and writes to local data files.
7
8This script keeps track of the job IDs for which perf keys have already been
9extracted on previous runs. The script only processes those job IDs whose perf
10values haven't yet been previously extracted.
11
12Sample usage:
13 python extract_perf.py -v
14
15Run with -h to see the full set of command-line options.
16"""
17
18import datetime
19import logging
20import optparse
21import os
22import re
23import simplejson
24import sys
25import time
26
27import common
28from autotest_lib.client.common_lib import global_config
29from autotest_lib.server import frontend
30
31try:
32 from google.storage.speckle.python.api import rdbms_googleapi
33except ImportError:
34 # Download the AppEngine SDK if desired from here:
35 # https://developers.google.com/appengine/downloads
36 rdbms_googleapi = None
37
38try:
39 import MySQLdb
40except ImportError:
41 MySQLdb = None
42
43_GLOBAL_CONF = global_config.global_config
44_CONF_SECTION = 'AUTOTEST_WEB'
45
46_MYSQL_READONLY_LOGIN_CREDENTIALS = {
47 'host': _GLOBAL_CONF.get_config_value(_CONF_SECTION, 'readonly_host'),
48 'user': _GLOBAL_CONF.get_config_value(_CONF_SECTION, 'readonly_user'),
49 'passwd': _GLOBAL_CONF.get_config_value(_CONF_SECTION, 'readonly_password'),
50 'db': _GLOBAL_CONF.get_config_value(_CONF_SECTION, 'database'),
51}
52
53# Paths to files.
54_ROOT_DIR = os.path.dirname(os.path.abspath(__file__))
55_CHART_CONFIG_FILE = os.path.join(_ROOT_DIR, 'croschart_defaults.json')
56_DEST_DATA_DIR = os.path.join(_ROOT_DIR, 'data')
57_COMPLETED_ID_FILE = os.path.join(_DEST_DATA_DIR, 'job_id_complete.txt')
58_CURR_PID_FILE = os.path.join(_DEST_DATA_DIR, __file__ + '.curr_pid.txt')
59
60# Values that can be configured through options.
61_NUM_DAYS_BACK = 7 # Ignore perf test runs in database that finished more than
62 # this many days ago.
63
64# Other values that can only be configured here in the code.
65_AFE = frontend.AFE()
66_PLATFORMS = map(lambda x: x.name, _AFE.get_labels(platform=True))
67if 'snow' in _PLATFORMS:
68 _PLATFORMS.remove('snow')
69 _PLATFORMS.append('daisy')
70
71
72def get_job_ids(cursor, test_name, oldest_db_lookup_date, completed_ids):
73 """Gets all job IDs for the given test name that haven't yet been processed.
74
75 @param cursor: see query_database().
76 @param test_name: see query_database().
77 @param oldest_db_lookup_date: see query_database().
78 @param completed_ids: see query_database().
79
80 @return A list of string job IDs from the database that should be processed.
81 """
82 query = ('SELECT DISTINCT afe_job_id '
83 'FROM tko_perf_view_2 INNER JOIN tko_jobs USING (job_idx) '
84 'INNER JOIN tko_status USING (status_idx) WHERE '
85 'test_name = %s AND test_finished_time >= %s AND '
86 'word != "RUNNING"')
87 start_time = time.time()
88 cursor.execute(query, (test_name, oldest_db_lookup_date))
89 logging.debug('Extracted job IDs in %.2f seconds', time.time() - start_time)
90 job_ids = []
91 for result_row in cursor:
92 job_id = str(result_row[0])
93 if job_id not in completed_ids:
94 job_ids.append(job_id)
95 return job_ids
96
97
98def write_perf_info_to_disk(job_id, result_dict, test_dir):
99 """Writes extracted perf data for the given job ID to disk.
100
101 Also writes the job ID to disk to mark it as having been processed. Note
102 that the written files are not protected against simultaneous access by
103 multiple invocations of this script.
104
105 @param job_id: The string job ID.
106 @param result_dict: A dictionary of associated perf info to write to disk.
107 @param test_dir: The string directory name in which to write perf data.
108 """
109 result_out = [job_id, result_dict['job_name'], result_dict['platform']]
110 perf_items = []
111 for perf_key in result_dict['perf_keys']:
112 for perf_val in result_dict['perf_keys'][perf_key]:
113 perf_items.append((perf_key, perf_val))
114 result_out.append(perf_items)
115 file_name = os.path.join(test_dir, result_dict['platform'] + '.txt')
116 with open(file_name, 'a') as fp:
117 fp.write(simplejson.dumps(result_out) + '\n')
118
119 with open(_COMPLETED_ID_FILE, 'a') as fp:
120 fp.write(job_id + '\n')
121
122
123def extract_perf_for_job_id(cursor, job_id, unexpected_job_names, test_dir):
124 """Extracts perf data for a given job, then writes to local text files.
125
126 @param cursor: A MySQLdb.cursor object used for interacting with a database.
127 @param job_id: The string job ID to process.
128 @param unexpected_job_names: A set of job names encountered so far that are
129 not associated with a known platform type.
130 @param test_dir: The string directory name in which to write perf data.
131
132 @return True, if data for the specified job ID is written to disk, or
133 False if not (will be False if the job ID is not associated with a known
134 platform type).
135 """
136 query = ('SELECT job_name,iteration_key,iteration_value '
137 'FROM tko_perf_view_2 INNER JOIN tko_jobs USING (job_idx) '
138 'WHERE afe_job_id = %s')
139 cursor.execute(query, job_id)
140
141 result = {}
142 for job_name, key, val in cursor:
143 # The job_name string contains the platform name. The platform name is
144 # always followed by either "-rX", where X is the milestone number
145 # (this is from legacy data in the database), or else it is followed
146 # by "-release" (for more recent data in the database). We do not
147 # consider jobs in which the platform name is followed by anything
148 # else (in particular, "-paladin" runs).
149 #
150 # TODO(dennisjeffrey): Simplify the below code once the following bug
151 # is addressed to standardize the platform names: crosbug.com/38521.
152 match = re.search('(\w+)-r', job_name)
Dennis Jeffreybbe98a22013-02-13 09:59:38 -0800153 if not match:
154 unexpected_job_names.add(job_name)
155 continue
Dennis Jeffreyb95ba5a2012-11-12 17:55:18 -0800156 # Only process jobs for known platforms.
157 platform = match.group(1) if match.group(1) in _PLATFORMS else None
158 if platform:
159 result['job_name'] = job_name
160 result['platform'] = platform
161 result.setdefault('perf_keys', {})
162 if key and val:
163 result['perf_keys'].setdefault(key, [])
164 result['perf_keys'][key].append(val)
165 else:
166 unexpected_job_names.add(job_name)
167
168 if 'platform' not in result:
169 return False
170
171 write_perf_info_to_disk(job_id, result, test_dir)
172 return True
173
174
175def query_database(cursor, test_name, completed_ids, oldest_db_lookup_date):
176 """Queries database for perf values and stores them into local text files.
177
178 This function performs the work only for the specified test case.
179
180 @param cursor: A MySQLdb.cursor object used for interacting with a database.
181 @param test_name: The string name of a test case to process.
182 @param completed_ids: A set of job IDs that have already been previously
183 extracted from the database.
184 @param oldest_db_lookup_date: The oldest date (represented as a string) for
185 which we want to consider perf values in the database.
186
187 @return The number of new job IDs that have been extracted/processed.
188 """
189 test_dir = os.path.join(_DEST_DATA_DIR, test_name)
190 if not os.path.isdir(test_dir):
191 os.makedirs(test_dir)
192
193 # Identify the job IDs that need to be processed.
194 job_ids = get_job_ids(cursor, test_name, oldest_db_lookup_date,
195 completed_ids)
196
197 # For each job ID, extract the perf values we need.
198 unexpected_job_names = set()
199 num_newly_added = 0
200 for i, job_id in enumerate(job_ids):
201 logging.debug('Processing job %d of %d', i + 1, len(job_ids))
202
203 if extract_perf_for_job_id(cursor, job_id, unexpected_job_names,
204 test_dir):
205 completed_ids.add(job_id)
206 num_newly_added += 1
207
208 if unexpected_job_names:
209 logging.debug('Job names skipped due to unexpected platform: %s',
210 list(unexpected_job_names))
211
212 return num_newly_added
213
214
215def extract_new_perf_data(cursor, options):
216 """Extracts new perf data from database and writes data to local text files.
217
218 @param cursor: A MySQLdb.cursor object used for interacting with a database.
219 @param options: An optparse.OptionParser options object.
220
221 @return The number of new job IDs that have been extracted/processed.
222 """
223 charts = {}
224 with open(_CHART_CONFIG_FILE, 'r') as fp:
225 charts = simplejson.loads(fp.read())
226
227 # Compute the oldest date for the perf values that we want to consider.
228 oldest_db_lookup_date = (
229 datetime.date.today() -
230 datetime.timedelta(days=options.num_days_back)).strftime('%Y-%m-%d')
231
232 logging.debug('Extracting job IDs from %s onwards.',
233 oldest_db_lookup_date)
234
235 # Get unique test names.
Dennis Jeffreybbe98a22013-02-13 09:59:38 -0800236 test_names = set()
237 for c in charts:
238 test_names.add(c['test_name'])
239 if 'old_test_names' in c:
240 test_names |= set(c['old_test_names'])
Dennis Jeffreyb95ba5a2012-11-12 17:55:18 -0800241
242 # Get list of already-completed job IDs so we don't re-fetch their data.
243 completed_ids = set()
244 if os.path.isfile(_COMPLETED_ID_FILE):
245 with open(_COMPLETED_ID_FILE, 'r') as fp:
246 job_ids = map(lambda x: x.strip(), fp.readlines())
247 for job_id in job_ids:
248 completed_ids.add(job_id)
249
250 num_newly_added = 0
251 for i, test_name in enumerate(test_names):
252 logging.info('Extracting info for test %d of %d: %s ', i + 1,
253 len(test_names), test_name)
254
255 num_newly_added += query_database(cursor, test_name, completed_ids,
256 oldest_db_lookup_date)
257
258 return num_newly_added
259
260
261def cleanup():
262 """Cleans up when this script is done."""
263 if os.path.isfile(_CURR_PID_FILE):
264 os.remove(_CURR_PID_FILE)
265
266
267def main():
268 """Main function."""
269 parser = optparse.OptionParser()
270 parser.add_option('-n', '--num-days-back', metavar='NUM_DAYS', type='int',
271 default=_NUM_DAYS_BACK,
272 help='Consider only the perf test results that were '
273 'computed within this many days ago (if this script '
274 'is invoked daily, no need to consider history from '
275 'many days back). Defaults to %default days back.')
276 parser.add_option('-c', '--cloud-sql', action='store_true', default=False,
277 help='Connect to the chromeos-lab CloudSQL database, '
278 'rather than the original MySQL autotest database.')
279 parser.add_option('-v', '--verbose', action='store_true', default=False,
280 help='Use verbose logging.')
281 options, _ = parser.parse_args()
282
283 log_level = logging.DEBUG if options.verbose else logging.INFO
284 logging.basicConfig(format='%(asctime)s %(levelname)s: %(message)s',
285 level=log_level)
286
287 cursor = None
288 if options.cloud_sql:
289 # CloudSQL.
290 logging.debug('Connecting to CloudSQL...')
291 if rdbms_googleapi is None:
292 logging.error('CloudSQL requested, but cannot locate CloudSQL '
293 'dependencies. Have you set up CloudSQL on this '
294 'machine?')
295 sys.exit(1)
296 db = rdbms_googleapi.connect(None,
297 instance='chromeos-bot:chromeos-lab')
298 cursor = db.cursor()
299 cursor.execute('USE chromeos_autotest_db')
300 else:
301 # Autotest MySQL database.
302 logging.debug('Connecting to Autotest MySQL database...')
303 if MySQLdb is None:
304 logging.error('MySQL requested, but cannot locate MySQL '
305 'dependencies. Have you set up MySQL on this '
306 'machine?')
307 sys.exit(1)
308 db = MySQLdb.connect(**_MYSQL_READONLY_LOGIN_CREDENTIALS)
309 cursor = db.cursor()
310
311 logging.debug('Database connection complete.')
312
313 if not os.path.isdir(_DEST_DATA_DIR):
314 os.makedirs(_DEST_DATA_DIR)
315
316 common.die_if_already_running(_CURR_PID_FILE, logging)
317 num_newly_added = extract_new_perf_data(cursor, options)
318 cleanup()
319 logging.info('Done! Added info for %d new job IDs', num_newly_added)
320
321
322if __name__ == '__main__':
323 main()