Blame - frontend/perf-dashboard/extract_perf.py - platform/external/autotest

Dennis Jeffrey

b95ba5a

2012-11-12 17:55:18 -0800

[diff] [blame]

1

#!/usr/bin/env python

2

3

# Use of this source code is governed by a BSD-style license that can be

4

# found in the LICENSE file.

5

6

"""Extracts perf keys from autotest database and writes to local data files.

7

8

This script keeps track of the job IDs for which perf keys have already been

9

extracted on previous runs. The script only processes those job IDs whose perf

10

values haven't yet been previously extracted.

11

12

Sample usage:

13

python extract_perf.py -v

14

15

Run with -h to see the full set of command-line options.

Dennis Jeffrey

8a30538

2013-02-28 09:08:57 -0800

[diff] [blame]

16

Dennis Jeffrey

b95ba5a

2012-11-12 17:55:18 -0800

[diff] [blame]

"""

import datetime

import logging

import optparse

import os

import re

import simplejson

import sys

import time

import common

from autotest_lib.client.common_lib import global_config

30

from autotest_lib.server import frontend

31

32

try:

33

from google.storage.speckle.python.api import rdbms_googleapi

34

except ImportError:

35

# Download the AppEngine SDK if desired from here:

36

# https://developers.google.com/appengine/downloads

37

rdbms_googleapi = None

try:

import MySQLdb

except ImportError:

MySQLdb = None

_GLOBAL_CONF = global_config.global_config

45

_CONF_SECTION = 'AUTOTEST_WEB'

46

47

_MYSQL_READONLY_LOGIN_CREDENTIALS = {

48

'host': _GLOBAL_CONF.get_config_value(_CONF_SECTION, 'readonly_host'),

49

'user': _GLOBAL_CONF.get_config_value(_CONF_SECTION, 'readonly_user'),

50

'passwd': _GLOBAL_CONF.get_config_value(_CONF_SECTION, 'readonly_password'),

51

'db': _GLOBAL_CONF.get_config_value(_CONF_SECTION, 'database'),

52

}

53

Dennis Jeffrey

b95ba5a

2012-11-12 17:55:18 -0800

[diff] [blame]

54

_ROOT_DIR = os.path.dirname(os.path.abspath(__file__))

55

_CHART_CONFIG_FILE = os.path.join(_ROOT_DIR, 'croschart_defaults.json')

Dennis Jeffrey

8a30538

2013-02-28 09:08:57 -0800

[diff] [blame]

56

_COMPLETED_ID_FILE_NAME = 'job_id_complete.txt'

57

_CURR_PID_FILE_NAME = __file__ + '.curr_pid.txt'

Dennis Jeffrey

b95ba5a

2012-11-12 17:55:18 -0800

[diff] [blame]

58

59

# Values that can be configured through options.

60

_NUM_DAYS_BACK = 7 # Ignore perf test runs in database that finished more than

61

# this many days ago.

Dennis Jeffrey

8a30538

2013-02-28 09:08:57 -0800

[diff] [blame]

62

_DEST_DATA_DIR = _ROOT_DIR

Dennis Jeffrey

b95ba5a

2012-11-12 17:55:18 -0800

[diff] [blame]

63

64

# Other values that can only be configured here in the code.

65

_AFE = frontend.AFE()

66

_PLATFORMS = map(lambda x: x.name, _AFE.get_labels(platform=True))

67

if 'snow' in _PLATFORMS:

68

_PLATFORMS.remove('snow')

69

_PLATFORMS.append('daisy')

70

71

72

def get_job_ids(cursor, test_name, oldest_db_lookup_date, completed_ids):

73

"""Gets all job IDs for the given test name that haven't yet been processed.

74

75

@param cursor: see query_database().

76

@param test_name: see query_database().

77

@param oldest_db_lookup_date: see query_database().

78

@param completed_ids: see query_database().

79

80

@return A list of string job IDs from the database that should be processed.

Dennis Jeffrey

8a30538

2013-02-28 09:08:57 -0800

[diff] [blame]

81

Dennis Jeffrey

b95ba5a

2012-11-12 17:55:18 -0800

[diff] [blame]

82

"""

83

query = ('SELECT DISTINCT afe_job_id '

84

'FROM tko_perf_view_2 INNER JOIN tko_jobs USING (job_idx) '

85

'INNER JOIN tko_status USING (status_idx) WHERE '

86

'test_name = %s AND test_finished_time >= %s AND '

87

'word != "RUNNING"')

88

start_time = time.time()

89

cursor.execute(query, (test_name, oldest_db_lookup_date))

90

logging.debug('Extracted job IDs in %.2f seconds', time.time() - start_time)

91

job_ids = []

92

for result_row in cursor:

93

job_id = str(result_row[0])

94

if job_id not in completed_ids:

95

job_ids.append(job_id)

return job_ids

Dennis Jeffrey

2013-02-28 09:08:57 -0800

[diff] [blame]

99

def write_perf_info_to_disk(job_id, result_dict, test_dir, output_dir):

Dennis Jeffrey

b95ba5a

2012-11-12 17:55:18 -0800

[diff] [blame]

100

"""Writes extracted perf data for the given job ID to disk.

101

102

Also writes the job ID to disk to mark it as having been processed. Note

103

that the written files are not protected against simultaneous access by

104

multiple invocations of this script.

105

106

@param job_id: The string job ID.

107

@param result_dict: A dictionary of associated perf info to write to disk.

108

@param test_dir: The string directory name in which to write perf data.

Dennis Jeffrey

8a30538

2013-02-28 09:08:57 -0800

[diff] [blame]

109

@param output_dir: The output directory in which results are being written.

110

Dennis Jeffrey

b95ba5a

2012-11-12 17:55:18 -0800

[diff] [blame]

111

"""

Dennis Jeffrey

9fece30

2013-03-26 12:08:06 -0700

[diff] [blame]

112

result_out = [job_id, result_dict['job_name'], result_dict['platform'],

113

result_dict['chrome_version']]

Dennis Jeffrey

b95ba5a

2012-11-12 17:55:18 -0800

[diff] [blame]

114

perf_items = []

115

for perf_key in result_dict['perf_keys']:

116

for perf_val in result_dict['perf_keys'][perf_key]:

117

perf_items.append((perf_key, perf_val))

118

result_out.append(perf_items)

119

file_name = os.path.join(test_dir, result_dict['platform'] + '.txt')

120

with open(file_name, 'a') as fp:

121

fp.write(simplejson.dumps(result_out) + '\n')

122

Dennis Jeffrey

8a30538

2013-02-28 09:08:57 -0800

[diff] [blame]

123

with open(os.path.join(output_dir, _COMPLETED_ID_FILE_NAME), 'a') as fp:

Dennis Jeffrey

b95ba5a

2012-11-12 17:55:18 -0800

[diff] [blame]

124

fp.write(job_id + '\n')

125

126

Dennis Jeffrey

8a30538

2013-02-28 09:08:57 -0800

[diff] [blame]

127

def extract_perf_for_job_id(cursor, job_id, unexpected_job_names, test_dir,

128

output_dir):

Dennis Jeffrey

b95ba5a

2012-11-12 17:55:18 -0800

[diff] [blame]

129

"""Extracts perf data for a given job, then writes to local text files.

130

131

@param cursor: A MySQLdb.cursor object used for interacting with a database.

132

@param job_id: The string job ID to process.

133

@param unexpected_job_names: A set of job names encountered so far that are

134

not associated with a known platform type.

135

@param test_dir: The string directory name in which to write perf data.

Dennis Jeffrey

8a30538

2013-02-28 09:08:57 -0800

[diff] [blame]

136

@param output_dir: The output directory in which results are being written.

Dennis Jeffrey

b95ba5a

2012-11-12 17:55:18 -0800

[diff] [blame]

137

138

@return True, if data for the specified job ID is written to disk, or

139

False if not (will be False if the job ID is not associated with a known

140

platform type).

Dennis Jeffrey

8a30538

2013-02-28 09:08:57 -0800

[diff] [blame]

141

Dennis Jeffrey

b95ba5a

2012-11-12 17:55:18 -0800

[diff] [blame]

142

"""

143

query = ('SELECT job_name,iteration_key,iteration_value '

144

'FROM tko_perf_view_2 INNER JOIN tko_jobs USING (job_idx) '

145

'WHERE afe_job_id = %s')

146

cursor.execute(query, job_id)

147

148

result = {}

149

for job_name, key, val in cursor:

150

# The job_name string contains the platform name. The platform name is

151

# always followed by either "-rX", where X is the milestone number

152

# (this is from legacy data in the database), or else it is followed

153

# by "-release" (for more recent data in the database). We do not

154

# consider jobs in which the platform name is followed by anything

155

# else (in particular, "-paladin" runs).

156

#

157

# TODO(dennisjeffrey): Simplify the below code once the following bug

158

# is addressed to standardize the platform names: crosbug.com/38521.

159

match = re.search('(\w+)-r', job_name)

Dennis Jeffrey

bbe98a2

2013-02-13 09:59:38 -0800

[diff] [blame]

160

if not match:

161

unexpected_job_names.add(job_name)

162

continue

Dennis Jeffrey

b95ba5a

2012-11-12 17:55:18 -0800

[diff] [blame]

163

# Only process jobs for known platforms.

164

platform = match.group(1) if match.group(1) in _PLATFORMS else None

165

if platform:

166

result['job_name'] = job_name

167

result['platform'] = platform

168

result.setdefault('perf_keys', {})

169

if key and val:

170

result['perf_keys'].setdefault(key, [])

171

result['perf_keys'][key].append(val)

172

else:

173

unexpected_job_names.add(job_name)

174

175

if 'platform' not in result:

176

return False

177

Dennis Jeffrey

9fece30

2013-03-26 12:08:06 -0700

[diff] [blame]

178

# Get the Chrome version number associated with this job ID.

179

query = ('SELECT DISTINCT value FROM tko_test_attributes '

180

'INNER JOIN tko_perf_view_2 USING (test_idx) '

181

'INNER JOIN tko_jobs USING (job_idx) '

182

'WHERE afe_job_id=%s AND attribute="CHROME_VERSION"')

183

cursor.execute(query, job_id)

184

cursor_results = cursor.fetchall()

185

assert len(cursor_results) <= 1, \

186

'Expected the Chrome version number to be unique for afe_job_id ' \

187

'%s, but got multiple instead: %s' % (job_id, cursor_results)

188

result['chrome_version'] = cursor_results[0][0] if cursor_results else ''

189

Dennis Jeffrey

8a30538

2013-02-28 09:08:57 -0800

[diff] [blame]

190

write_perf_info_to_disk(job_id, result, test_dir, output_dir)

Dennis Jeffrey

b95ba5a

2012-11-12 17:55:18 -0800

[diff] [blame]

return True

Dennis Jeffrey

2013-02-28 09:08:57 -0800

[diff] [blame]

194

def query_database(cursor, test_name, completed_ids, oldest_db_lookup_date,

195

output_dir):

Dennis Jeffrey

b95ba5a

2012-11-12 17:55:18 -0800

[diff] [blame]

196

"""Queries database for perf values and stores them into local text files.

197

198

This function performs the work only for the specified test case.

199

200

@param cursor: A MySQLdb.cursor object used for interacting with a database.

201

@param test_name: The string name of a test case to process.

202

@param completed_ids: A set of job IDs that have already been previously

203

extracted from the database.

204

@param oldest_db_lookup_date: The oldest date (represented as a string) for

205

which we want to consider perf values in the database.

Dennis Jeffrey

8a30538

2013-02-28 09:08:57 -0800

[diff] [blame]

206

@param output_dir: The output directory in which results are being written.

Dennis Jeffrey

b95ba5a

2012-11-12 17:55:18 -0800

[diff] [blame]

207

208

@return The number of new job IDs that have been extracted/processed.

Dennis Jeffrey

8a30538

2013-02-28 09:08:57 -0800

[diff] [blame]

209

Dennis Jeffrey

b95ba5a

2012-11-12 17:55:18 -0800

[diff] [blame]

210

"""

Dennis Jeffrey

8a30538

2013-02-28 09:08:57 -0800

[diff] [blame]

211

test_dir = os.path.join(output_dir, test_name)

Dennis Jeffrey

b95ba5a

2012-11-12 17:55:18 -0800

[diff] [blame]

212

if not os.path.isdir(test_dir):

213

os.makedirs(test_dir)

214

215

# Identify the job IDs that need to be processed.

216

job_ids = get_job_ids(cursor, test_name, oldest_db_lookup_date,

217

completed_ids)

218

219

# For each job ID, extract the perf values we need.

220

unexpected_job_names = set()

221

num_newly_added = 0

222

for i, job_id in enumerate(job_ids):

223

logging.debug('Processing job %d of %d', i + 1, len(job_ids))

224

225

if extract_perf_for_job_id(cursor, job_id, unexpected_job_names,

Dennis Jeffrey

8a30538

2013-02-28 09:08:57 -0800

[diff] [blame]

226

test_dir, output_dir):

Dennis Jeffrey

b95ba5a

2012-11-12 17:55:18 -0800

[diff] [blame]

227

completed_ids.add(job_id)

228

num_newly_added += 1

229

230

if unexpected_job_names:

231

logging.debug('Job names skipped due to unexpected platform: %s',

232

list(unexpected_job_names))

233

234

return num_newly_added

235

236

Dennis Jeffrey

8a30538

2013-02-28 09:08:57 -0800

[diff] [blame]

237

def extract_new_perf_data(cursor, output_dir, options):

Dennis Jeffrey

b95ba5a

2012-11-12 17:55:18 -0800

[diff] [blame]

238

"""Extracts new perf data from database and writes data to local text files.

239

240

@param cursor: A MySQLdb.cursor object used for interacting with a database.

Dennis Jeffrey

8a30538

2013-02-28 09:08:57 -0800

[diff] [blame]

241

@param output_dir: The output directory in which results are being written.

Dennis Jeffrey

b95ba5a

2012-11-12 17:55:18 -0800

[diff] [blame]

242

@param options: An optparse.OptionParser options object.

243

244

@return The number of new job IDs that have been extracted/processed.

Dennis Jeffrey

8a30538

2013-02-28 09:08:57 -0800

[diff] [blame]

245

Dennis Jeffrey

b95ba5a

2012-11-12 17:55:18 -0800

[diff] [blame]

246

"""

247

charts = {}

248

with open(_CHART_CONFIG_FILE, 'r') as fp:

249

charts = simplejson.loads(fp.read())

250

251

# Compute the oldest date for the perf values that we want to consider.

252

oldest_db_lookup_date = (

253

datetime.date.today() -

254

datetime.timedelta(days=options.num_days_back)).strftime('%Y-%m-%d')

255

256

logging.debug('Extracting job IDs from %s onwards.',

257

oldest_db_lookup_date)

258

259

# Get unique test names.

Dennis Jeffrey

bbe98a2

2013-02-13 09:59:38 -0800

[diff] [blame]

260

test_names = set()

261

for c in charts:

262

test_names.add(c['test_name'])

263

if 'old_test_names' in c:

264

test_names |= set(c['old_test_names'])

Dennis Jeffrey

b95ba5a

2012-11-12 17:55:18 -0800

[diff] [blame]

265

266

# Get list of already-completed job IDs so we don't re-fetch their data.

267

completed_ids = set()

Dennis Jeffrey

8a30538

2013-02-28 09:08:57 -0800

[diff] [blame]

268

completed_id_file = os.path.join(output_dir, _COMPLETED_ID_FILE_NAME)

269

if os.path.isfile(completed_id_file):

270

with open(completed_id_file, 'r') as fp:

Dennis Jeffrey

b95ba5a

2012-11-12 17:55:18 -0800

[diff] [blame]

271

job_ids = map(lambda x: x.strip(), fp.readlines())

272

for job_id in job_ids:

273

completed_ids.add(job_id)

274

275

num_newly_added = 0

276

for i, test_name in enumerate(test_names):

277

logging.info('Extracting info for test %d of %d: %s ', i + 1,

278

len(test_names), test_name)

279

280

num_newly_added += query_database(cursor, test_name, completed_ids,

Dennis Jeffrey

8a30538

2013-02-28 09:08:57 -0800

[diff] [blame]

281

oldest_db_lookup_date, output_dir)

Dennis Jeffrey

b95ba5a

2012-11-12 17:55:18 -0800

[diff] [blame]

282

283

return num_newly_added

284

285

Dennis Jeffrey

8a30538

2013-02-28 09:08:57 -0800

[diff] [blame]

286

def cleanup(output_dir):

287

"""Cleans up when this script is done.

288

289

@param output_dir: The output directory in which results are being written.

290

291

"""

292

curr_pid_file = os.path.join(output_dir, _CURR_PID_FILE_NAME)

293

if os.path.isfile(curr_pid_file):

294

os.remove(curr_pid_file)

Dennis Jeffrey

b95ba5a

2012-11-12 17:55:18 -0800

[diff] [blame]

def main():

"""Main function."""

parser = optparse.OptionParser()

300

parser.add_option('-n', '--num-days-back', metavar='NUM_DAYS', type='int',

301

default=_NUM_DAYS_BACK,

302

help='Consider only the perf test results that were '

303

'computed within this many days ago (if this script '

304

'is invoked daily, no need to consider history from '

305

'many days back). Defaults to %default days back.')

Dennis Jeffrey

8a30538

2013-02-28 09:08:57 -0800

[diff] [blame]

306

parser.add_option('-o', '--output-dir', metavar='DIR', type='string',

307

default=_DEST_DATA_DIR,

308

help='Absolute path to the output directory in which to '

309

'store the raw perf data extracted from the '

310

'database. Will be written into a subfolder named '

311

'"data". Defaults to "%default".')

Dennis Jeffrey

b95ba5a

2012-11-12 17:55:18 -0800

[diff] [blame]

312

parser.add_option('-c', '--cloud-sql', action='store_true', default=False,

313

help='Connect to the chromeos-lab CloudSQL database, '

314

'rather than the original MySQL autotest database.')

315

parser.add_option('-v', '--verbose', action='store_true', default=False,

316

help='Use verbose logging.')

317

options, _ = parser.parse_args()

318

319

log_level = logging.DEBUG if options.verbose else logging.INFO

320

logging.basicConfig(format='%(asctime)s %(levelname)s: %(message)s',

level=log_level)

cursor = None

if options.cloud_sql:

325

# CloudSQL.

326

logging.debug('Connecting to CloudSQL...')

327

if rdbms_googleapi is None:

328

logging.error('CloudSQL requested, but cannot locate CloudSQL '

329

'dependencies. Have you set up CloudSQL on this '

330

'machine?')

331

sys.exit(1)

332

db = rdbms_googleapi.connect(None,

333

instance='chromeos-bot:chromeos-lab')

334

cursor = db.cursor()

335

cursor.execute('USE chromeos_autotest_db')

336

else:

337

# Autotest MySQL database.

338

logging.debug('Connecting to Autotest MySQL database...')

339

if MySQLdb is None:

340

logging.error('MySQL requested, but cannot locate MySQL '

341

'dependencies. Have you set up MySQL on this '

342

'machine?')

343

sys.exit(1)

344

db = MySQLdb.connect(**_MYSQL_READONLY_LOGIN_CREDENTIALS)

345

cursor = db.cursor()

346

347

logging.debug('Database connection complete.')

348

Dennis Jeffrey

8a30538

2013-02-28 09:08:57 -0800

[diff] [blame]

349

output_dir = os.path.join(options.output_dir, 'data')

350

if not os.path.isdir(output_dir):

351

os.makedirs(output_dir)

Dennis Jeffrey

b95ba5a

2012-11-12 17:55:18 -0800

[diff] [blame]

352

Dennis Jeffrey

8a30538

2013-02-28 09:08:57 -0800

[diff] [blame]

353

common.die_if_already_running(

354

os.path.join(output_dir, _CURR_PID_FILE_NAME), logging)

355

num_newly_added = extract_new_perf_data(cursor, output_dir, options)

356

cleanup(output_dir)

Dennis Jeffrey

b95ba5a

2012-11-12 17:55:18 -0800

[diff] [blame]

357

logging.info('Done! Added info for %d new job IDs', num_newly_added)

358

359

360

if __name__ == '__main__':

361

main()