Dennis Jeffrey | b95ba5a | 2012-11-12 17:55:18 -0800 | [diff] [blame] | 1 | #!/usr/bin/env python |
| 2 | # Copyright (c) 2013 The Chromium OS Authors. All rights reserved. |
| 3 | # Use of this source code is governed by a BSD-style license that can be |
| 4 | # found in the LICENSE file. |
| 5 | |
| 6 | """Extracts perf keys from autotest database and writes to local data files. |
| 7 | |
| 8 | This script keeps track of the job IDs for which perf keys have already been |
| 9 | extracted on previous runs. The script only processes those job IDs whose perf |
| 10 | values haven't yet been previously extracted. |
| 11 | |
| 12 | Sample usage: |
| 13 | python extract_perf.py -v |
| 14 | |
| 15 | Run with -h to see the full set of command-line options. |
| 16 | """ |
| 17 | |
| 18 | import datetime |
| 19 | import logging |
| 20 | import optparse |
| 21 | import os |
| 22 | import re |
| 23 | import simplejson |
| 24 | import sys |
| 25 | import time |
| 26 | |
| 27 | import common |
| 28 | from autotest_lib.client.common_lib import global_config |
| 29 | from autotest_lib.server import frontend |
| 30 | |
| 31 | try: |
| 32 | from google.storage.speckle.python.api import rdbms_googleapi |
| 33 | except ImportError: |
| 34 | # Download the AppEngine SDK if desired from here: |
| 35 | # https://developers.google.com/appengine/downloads |
| 36 | rdbms_googleapi = None |
| 37 | |
| 38 | try: |
| 39 | import MySQLdb |
| 40 | except ImportError: |
| 41 | MySQLdb = None |
| 42 | |
| 43 | _GLOBAL_CONF = global_config.global_config |
| 44 | _CONF_SECTION = 'AUTOTEST_WEB' |
| 45 | |
| 46 | _MYSQL_READONLY_LOGIN_CREDENTIALS = { |
| 47 | 'host': _GLOBAL_CONF.get_config_value(_CONF_SECTION, 'readonly_host'), |
| 48 | 'user': _GLOBAL_CONF.get_config_value(_CONF_SECTION, 'readonly_user'), |
| 49 | 'passwd': _GLOBAL_CONF.get_config_value(_CONF_SECTION, 'readonly_password'), |
| 50 | 'db': _GLOBAL_CONF.get_config_value(_CONF_SECTION, 'database'), |
| 51 | } |
| 52 | |
| 53 | # Paths to files. |
| 54 | _ROOT_DIR = os.path.dirname(os.path.abspath(__file__)) |
| 55 | _CHART_CONFIG_FILE = os.path.join(_ROOT_DIR, 'croschart_defaults.json') |
| 56 | _DEST_DATA_DIR = os.path.join(_ROOT_DIR, 'data') |
| 57 | _COMPLETED_ID_FILE = os.path.join(_DEST_DATA_DIR, 'job_id_complete.txt') |
| 58 | _CURR_PID_FILE = os.path.join(_DEST_DATA_DIR, __file__ + '.curr_pid.txt') |
| 59 | |
| 60 | # Values that can be configured through options. |
| 61 | _NUM_DAYS_BACK = 7 # Ignore perf test runs in database that finished more than |
| 62 | # this many days ago. |
| 63 | |
| 64 | # Other values that can only be configured here in the code. |
| 65 | _AFE = frontend.AFE() |
| 66 | _PLATFORMS = map(lambda x: x.name, _AFE.get_labels(platform=True)) |
| 67 | if 'snow' in _PLATFORMS: |
| 68 | _PLATFORMS.remove('snow') |
| 69 | _PLATFORMS.append('daisy') |
| 70 | |
| 71 | |
| 72 | def get_job_ids(cursor, test_name, oldest_db_lookup_date, completed_ids): |
| 73 | """Gets all job IDs for the given test name that haven't yet been processed. |
| 74 | |
| 75 | @param cursor: see query_database(). |
| 76 | @param test_name: see query_database(). |
| 77 | @param oldest_db_lookup_date: see query_database(). |
| 78 | @param completed_ids: see query_database(). |
| 79 | |
| 80 | @return A list of string job IDs from the database that should be processed. |
| 81 | """ |
| 82 | query = ('SELECT DISTINCT afe_job_id ' |
| 83 | 'FROM tko_perf_view_2 INNER JOIN tko_jobs USING (job_idx) ' |
| 84 | 'INNER JOIN tko_status USING (status_idx) WHERE ' |
| 85 | 'test_name = %s AND test_finished_time >= %s AND ' |
| 86 | 'word != "RUNNING"') |
| 87 | start_time = time.time() |
| 88 | cursor.execute(query, (test_name, oldest_db_lookup_date)) |
| 89 | logging.debug('Extracted job IDs in %.2f seconds', time.time() - start_time) |
| 90 | job_ids = [] |
| 91 | for result_row in cursor: |
| 92 | job_id = str(result_row[0]) |
| 93 | if job_id not in completed_ids: |
| 94 | job_ids.append(job_id) |
| 95 | return job_ids |
| 96 | |
| 97 | |
| 98 | def write_perf_info_to_disk(job_id, result_dict, test_dir): |
| 99 | """Writes extracted perf data for the given job ID to disk. |
| 100 | |
| 101 | Also writes the job ID to disk to mark it as having been processed. Note |
| 102 | that the written files are not protected against simultaneous access by |
| 103 | multiple invocations of this script. |
| 104 | |
| 105 | @param job_id: The string job ID. |
| 106 | @param result_dict: A dictionary of associated perf info to write to disk. |
| 107 | @param test_dir: The string directory name in which to write perf data. |
| 108 | """ |
| 109 | result_out = [job_id, result_dict['job_name'], result_dict['platform']] |
| 110 | perf_items = [] |
| 111 | for perf_key in result_dict['perf_keys']: |
| 112 | for perf_val in result_dict['perf_keys'][perf_key]: |
| 113 | perf_items.append((perf_key, perf_val)) |
| 114 | result_out.append(perf_items) |
| 115 | file_name = os.path.join(test_dir, result_dict['platform'] + '.txt') |
| 116 | with open(file_name, 'a') as fp: |
| 117 | fp.write(simplejson.dumps(result_out) + '\n') |
| 118 | |
| 119 | with open(_COMPLETED_ID_FILE, 'a') as fp: |
| 120 | fp.write(job_id + '\n') |
| 121 | |
| 122 | |
| 123 | def extract_perf_for_job_id(cursor, job_id, unexpected_job_names, test_dir): |
| 124 | """Extracts perf data for a given job, then writes to local text files. |
| 125 | |
| 126 | @param cursor: A MySQLdb.cursor object used for interacting with a database. |
| 127 | @param job_id: The string job ID to process. |
| 128 | @param unexpected_job_names: A set of job names encountered so far that are |
| 129 | not associated with a known platform type. |
| 130 | @param test_dir: The string directory name in which to write perf data. |
| 131 | |
| 132 | @return True, if data for the specified job ID is written to disk, or |
| 133 | False if not (will be False if the job ID is not associated with a known |
| 134 | platform type). |
| 135 | """ |
| 136 | query = ('SELECT job_name,iteration_key,iteration_value ' |
| 137 | 'FROM tko_perf_view_2 INNER JOIN tko_jobs USING (job_idx) ' |
| 138 | 'WHERE afe_job_id = %s') |
| 139 | cursor.execute(query, job_id) |
| 140 | |
| 141 | result = {} |
| 142 | for job_name, key, val in cursor: |
| 143 | # The job_name string contains the platform name. The platform name is |
| 144 | # always followed by either "-rX", where X is the milestone number |
| 145 | # (this is from legacy data in the database), or else it is followed |
| 146 | # by "-release" (for more recent data in the database). We do not |
| 147 | # consider jobs in which the platform name is followed by anything |
| 148 | # else (in particular, "-paladin" runs). |
| 149 | # |
| 150 | # TODO(dennisjeffrey): Simplify the below code once the following bug |
| 151 | # is addressed to standardize the platform names: crosbug.com/38521. |
| 152 | match = re.search('(\w+)-r', job_name) |
Dennis Jeffrey | bbe98a2 | 2013-02-13 09:59:38 -0800 | [diff] [blame] | 153 | if not match: |
| 154 | unexpected_job_names.add(job_name) |
| 155 | continue |
Dennis Jeffrey | b95ba5a | 2012-11-12 17:55:18 -0800 | [diff] [blame] | 156 | # Only process jobs for known platforms. |
| 157 | platform = match.group(1) if match.group(1) in _PLATFORMS else None |
| 158 | if platform: |
| 159 | result['job_name'] = job_name |
| 160 | result['platform'] = platform |
| 161 | result.setdefault('perf_keys', {}) |
| 162 | if key and val: |
| 163 | result['perf_keys'].setdefault(key, []) |
| 164 | result['perf_keys'][key].append(val) |
| 165 | else: |
| 166 | unexpected_job_names.add(job_name) |
| 167 | |
| 168 | if 'platform' not in result: |
| 169 | return False |
| 170 | |
| 171 | write_perf_info_to_disk(job_id, result, test_dir) |
| 172 | return True |
| 173 | |
| 174 | |
| 175 | def query_database(cursor, test_name, completed_ids, oldest_db_lookup_date): |
| 176 | """Queries database for perf values and stores them into local text files. |
| 177 | |
| 178 | This function performs the work only for the specified test case. |
| 179 | |
| 180 | @param cursor: A MySQLdb.cursor object used for interacting with a database. |
| 181 | @param test_name: The string name of a test case to process. |
| 182 | @param completed_ids: A set of job IDs that have already been previously |
| 183 | extracted from the database. |
| 184 | @param oldest_db_lookup_date: The oldest date (represented as a string) for |
| 185 | which we want to consider perf values in the database. |
| 186 | |
| 187 | @return The number of new job IDs that have been extracted/processed. |
| 188 | """ |
| 189 | test_dir = os.path.join(_DEST_DATA_DIR, test_name) |
| 190 | if not os.path.isdir(test_dir): |
| 191 | os.makedirs(test_dir) |
| 192 | |
| 193 | # Identify the job IDs that need to be processed. |
| 194 | job_ids = get_job_ids(cursor, test_name, oldest_db_lookup_date, |
| 195 | completed_ids) |
| 196 | |
| 197 | # For each job ID, extract the perf values we need. |
| 198 | unexpected_job_names = set() |
| 199 | num_newly_added = 0 |
| 200 | for i, job_id in enumerate(job_ids): |
| 201 | logging.debug('Processing job %d of %d', i + 1, len(job_ids)) |
| 202 | |
| 203 | if extract_perf_for_job_id(cursor, job_id, unexpected_job_names, |
| 204 | test_dir): |
| 205 | completed_ids.add(job_id) |
| 206 | num_newly_added += 1 |
| 207 | |
| 208 | if unexpected_job_names: |
| 209 | logging.debug('Job names skipped due to unexpected platform: %s', |
| 210 | list(unexpected_job_names)) |
| 211 | |
| 212 | return num_newly_added |
| 213 | |
| 214 | |
| 215 | def extract_new_perf_data(cursor, options): |
| 216 | """Extracts new perf data from database and writes data to local text files. |
| 217 | |
| 218 | @param cursor: A MySQLdb.cursor object used for interacting with a database. |
| 219 | @param options: An optparse.OptionParser options object. |
| 220 | |
| 221 | @return The number of new job IDs that have been extracted/processed. |
| 222 | """ |
| 223 | charts = {} |
| 224 | with open(_CHART_CONFIG_FILE, 'r') as fp: |
| 225 | charts = simplejson.loads(fp.read()) |
| 226 | |
| 227 | # Compute the oldest date for the perf values that we want to consider. |
| 228 | oldest_db_lookup_date = ( |
| 229 | datetime.date.today() - |
| 230 | datetime.timedelta(days=options.num_days_back)).strftime('%Y-%m-%d') |
| 231 | |
| 232 | logging.debug('Extracting job IDs from %s onwards.', |
| 233 | oldest_db_lookup_date) |
| 234 | |
| 235 | # Get unique test names. |
Dennis Jeffrey | bbe98a2 | 2013-02-13 09:59:38 -0800 | [diff] [blame] | 236 | test_names = set() |
| 237 | for c in charts: |
| 238 | test_names.add(c['test_name']) |
| 239 | if 'old_test_names' in c: |
| 240 | test_names |= set(c['old_test_names']) |
Dennis Jeffrey | b95ba5a | 2012-11-12 17:55:18 -0800 | [diff] [blame] | 241 | |
| 242 | # Get list of already-completed job IDs so we don't re-fetch their data. |
| 243 | completed_ids = set() |
| 244 | if os.path.isfile(_COMPLETED_ID_FILE): |
| 245 | with open(_COMPLETED_ID_FILE, 'r') as fp: |
| 246 | job_ids = map(lambda x: x.strip(), fp.readlines()) |
| 247 | for job_id in job_ids: |
| 248 | completed_ids.add(job_id) |
| 249 | |
| 250 | num_newly_added = 0 |
| 251 | for i, test_name in enumerate(test_names): |
| 252 | logging.info('Extracting info for test %d of %d: %s ', i + 1, |
| 253 | len(test_names), test_name) |
| 254 | |
| 255 | num_newly_added += query_database(cursor, test_name, completed_ids, |
| 256 | oldest_db_lookup_date) |
| 257 | |
| 258 | return num_newly_added |
| 259 | |
| 260 | |
| 261 | def cleanup(): |
| 262 | """Cleans up when this script is done.""" |
| 263 | if os.path.isfile(_CURR_PID_FILE): |
| 264 | os.remove(_CURR_PID_FILE) |
| 265 | |
| 266 | |
| 267 | def main(): |
| 268 | """Main function.""" |
| 269 | parser = optparse.OptionParser() |
| 270 | parser.add_option('-n', '--num-days-back', metavar='NUM_DAYS', type='int', |
| 271 | default=_NUM_DAYS_BACK, |
| 272 | help='Consider only the perf test results that were ' |
| 273 | 'computed within this many days ago (if this script ' |
| 274 | 'is invoked daily, no need to consider history from ' |
| 275 | 'many days back). Defaults to %default days back.') |
| 276 | parser.add_option('-c', '--cloud-sql', action='store_true', default=False, |
| 277 | help='Connect to the chromeos-lab CloudSQL database, ' |
| 278 | 'rather than the original MySQL autotest database.') |
| 279 | parser.add_option('-v', '--verbose', action='store_true', default=False, |
| 280 | help='Use verbose logging.') |
| 281 | options, _ = parser.parse_args() |
| 282 | |
| 283 | log_level = logging.DEBUG if options.verbose else logging.INFO |
| 284 | logging.basicConfig(format='%(asctime)s %(levelname)s: %(message)s', |
| 285 | level=log_level) |
| 286 | |
| 287 | cursor = None |
| 288 | if options.cloud_sql: |
| 289 | # CloudSQL. |
| 290 | logging.debug('Connecting to CloudSQL...') |
| 291 | if rdbms_googleapi is None: |
| 292 | logging.error('CloudSQL requested, but cannot locate CloudSQL ' |
| 293 | 'dependencies. Have you set up CloudSQL on this ' |
| 294 | 'machine?') |
| 295 | sys.exit(1) |
| 296 | db = rdbms_googleapi.connect(None, |
| 297 | instance='chromeos-bot:chromeos-lab') |
| 298 | cursor = db.cursor() |
| 299 | cursor.execute('USE chromeos_autotest_db') |
| 300 | else: |
| 301 | # Autotest MySQL database. |
| 302 | logging.debug('Connecting to Autotest MySQL database...') |
| 303 | if MySQLdb is None: |
| 304 | logging.error('MySQL requested, but cannot locate MySQL ' |
| 305 | 'dependencies. Have you set up MySQL on this ' |
| 306 | 'machine?') |
| 307 | sys.exit(1) |
| 308 | db = MySQLdb.connect(**_MYSQL_READONLY_LOGIN_CREDENTIALS) |
| 309 | cursor = db.cursor() |
| 310 | |
| 311 | logging.debug('Database connection complete.') |
| 312 | |
| 313 | if not os.path.isdir(_DEST_DATA_DIR): |
| 314 | os.makedirs(_DEST_DATA_DIR) |
| 315 | |
| 316 | common.die_if_already_running(_CURR_PID_FILE, logging) |
| 317 | num_newly_added = extract_new_perf_data(cursor, options) |
| 318 | cleanup() |
| 319 | logging.info('Done! Added info for %d new job IDs', num_newly_added) |
| 320 | |
| 321 | |
| 322 | if __name__ == '__main__': |
| 323 | main() |