Blame - tko/parse.py - platform/external/autotest

2008-04-17 15:25:38 +0000

[diff] [blame]

1

#!/usr/bin/python -u

mbligh

c251454

2008-02-19 15:54:26 +0000

[diff] [blame]

2

Aviv Keshet

2016-10-20 15:41:16 -0700

[diff] [blame]

3

import collections

Fang Deng

2014-10-21 16:29:22 -0700

[diff] [blame]

4

import datetime

Aviv Keshet

2016-10-20 15:41:16 -0700

[diff] [blame]

5

import errno

6

import fcntl

Simran Basi

2015-04-16 15:09:56 -0700

[diff] [blame]

7

import json

Aviv Keshet

2016-10-20 15:41:16 -0700

[diff] [blame]

8

import optparse

9

import os

10

import socket

Shuqian Zhao

2016-12-07 09:35:03 -0800

[diff] [blame]

11

import subprocess

Aviv Keshet

2016-10-20 15:41:16 -0700

[diff] [blame]

12

import sys

Dan Shi

11e3506

2017-11-03 10:09:05 -0700

[diff] [blame]

13

import time

Aviv Keshet

2016-10-20 15:41:16 -0700

[diff] [blame]

14

import traceback

mbligh

bb7b891

2006-10-08 03:59:02 +0000

[diff] [blame]

15

mbligh

2008-04-17 15:25:38 +0000

[diff] [blame]

16

import common

Dan Shi

4f8c024

2017-07-07 15:34:49 -0700

[diff] [blame]

17

from autotest_lib.client.bin.result_tools import utils as result_utils

18

from autotest_lib.client.bin.result_tools import utils_lib as result_utils_lib

19

from autotest_lib.client.bin.result_tools import runner as result_runner

20

from autotest_lib.client.common_lib import control_data

Benny Peake

feb775c

2017-02-08 15:14:14 -0800

[diff] [blame]

21

from autotest_lib.client.common_lib import global_config

jadmanski

db4f9b5

2008-12-03 22:52:53 +0000

[diff] [blame]

22

from autotest_lib.client.common_lib import mail, pidfile

Fang Deng

2014-10-21 16:29:22 -0700

[diff] [blame]

23

from autotest_lib.client.common_lib import utils

Fang Deng

2014-10-21 16:29:22 -0700

[diff] [blame]

24

from autotest_lib.frontend import setup_django_environment

Fang Deng

9ec6680

2014-04-28 19:04:33 +0000

[diff] [blame]

25

from autotest_lib.frontend.tko import models as tko_models

Shuqian Zhao

2017-01-09 10:10:14 -0800

[diff] [blame]

26

from autotest_lib.server import site_utils

Fang Deng

2014-10-21 16:29:22 -0700

[diff] [blame]

27

from autotest_lib.server.cros.dynamic_suite import constants

28

from autotest_lib.site_utils import job_overhead

Benny Peake

d322d3d

2017-02-08 15:39:28 -0800

[diff] [blame]

29

from autotest_lib.site_utils.sponge_lib import sponge_utils

Dennis Jeffrey

f9bef6c

2013-08-05 11:01:27 -0700

[diff] [blame]

30

from autotest_lib.tko import db as tko_db, utils as tko_utils

Luigi Semenzato

e706481

2017-02-03 14:47:59 -0800

[diff] [blame]

31

from autotest_lib.tko import models, parser_lib

Dennis Jeffrey

f9bef6c

2013-08-05 11:01:27 -0700

[diff] [blame]

32

from autotest_lib.tko.perf_upload import perf_uploader

mbligh

2007-11-05 20:24:17 +0000

[diff] [blame]

33

Dan Shi

2017-07-17 14:40:02 -0700

[diff] [blame]

34

try:

35

from chromite.lib import metrics

36

except ImportError:

37

metrics = utils.metrics_mock

38

39

Aviv Keshet

2016-10-20 15:41:16 -0700

[diff] [blame]

40

_ParseOptions = collections.namedtuple(

Shuqian Zhao

2017-01-09 10:10:14 -0800

[diff] [blame]

41

'ParseOptions', ['reparse', 'mail_on_failure', 'dry_run', 'suite_report',

42

'datastore_creds', 'export_to_gcloud_path'])

Aviv Keshet

2016-10-20 15:41:16 -0700

[diff] [blame]

43

mbligh

2008-04-17 15:25:38 +0000

[diff] [blame]

44

def parse_args():

Fang Deng

2014-10-21 16:29:22 -0700

[diff] [blame]

45

"""Parse args."""

jadmanski

2008-06-06 21:10:57 +0000

[diff] [blame]

46

# build up our options parser and parse sys.argv

47

parser = optparse.OptionParser()

48

parser.add_option("-m", help="Send mail for FAILED tests",

49

dest="mailit", action="store_true")

50

parser.add_option("-r", help="Reparse the results of a job",

51

dest="reparse", action="store_true")

52

parser.add_option("-o", help="Parse a single results directory",

53

dest="singledir", action="store_true")

54

parser.add_option("-l", help=("Levels of subdirectories to include "

55

"in the job name"),

56

type="int", dest="level", default=1)

57

parser.add_option("-n", help="No blocking on an existing parse",

58

dest="noblock", action="store_true")

59

parser.add_option("-s", help="Database server hostname",

60

dest="db_host", action="store")

61

parser.add_option("-u", help="Database username", dest="db_user",

62

action="store")

63

parser.add_option("-p", help="Database password", dest="db_pass",

64

action="store")

65

parser.add_option("-d", help="Database name", dest="db_name",

66

action="store")

Aviv Keshet

2016-10-20 17:17:36 -0700

[diff] [blame]

67

parser.add_option("--dry-run", help="Do not actually commit any results.",

68

dest="dry_run", action="store_true", default=False)

Prathmesh Prabhu

3e319da

2017-08-30 19:13:03 -0700

[diff] [blame]

69

parser.add_option(

70

"--detach", action="store_true",

71

help="Detach parsing process from the caller process. Used by "

72

"monitor_db to safely restart without affecting parsing.",

73

default=False)

jadmanski

2008-12-03 16:27:07 +0000

[diff] [blame]

74

parser.add_option("--write-pidfile",

75

help="write pidfile (.parser_execute)",

76

dest="write_pidfile", action="store_true",

77

default=False)

Fang Deng

2014-10-21 16:29:22 -0700

[diff] [blame]

78

parser.add_option("--record-duration",

Prathmesh Prabhu

7776945

2018-04-17 13:30:50 -0700

[diff] [blame]

79

help="[DEPRECATED] Record timing to metadata db",

Fang Deng

2014-10-21 16:29:22 -0700

[diff] [blame]

80

dest="record_duration", action="store_true",

81

default=False)

Shuqian Zhao

2016-12-07 09:35:03 -0800

[diff] [blame]

82

parser.add_option("--suite-report",

83

help=("Allows parsing job to attempt to create a suite "

Shuqian Zhao

2017-01-09 10:10:14 -0800

[diff] [blame]

84

"timeline report, if it detects that the job being "

Shuqian Zhao

2016-12-07 09:35:03 -0800

[diff] [blame]

85

"parsed is a suite job."),

86

dest="suite_report", action="store_true",

87

default=False)

Shuqian Zhao

2017-01-09 10:10:14 -0800

[diff] [blame]

88

parser.add_option("--datastore-creds",

89

help=("The path to gcloud datastore credentials file, "

90

"which will be used to upload suite timeline "

91

"report to gcloud. If not specified, the one "

92

"defined in shadow_config will be used."),

93

dest="datastore_creds", action="store", default=None)

94

parser.add_option("--export-to-gcloud-path",

95

help=("The path to export_to_gcloud script. Please find "

96

"chromite path on your server. The script is under "

97

"chromite/bin/."),

98

dest="export_to_gcloud_path", action="store",

99

default=None)

jadmanski

2008-06-06 21:10:57 +0000

[diff] [blame]

100

options, args = parser.parse_args()

mbligh

2007-11-05 20:24:17 +0000

[diff] [blame]

101

jadmanski

2008-06-06 21:10:57 +0000

[diff] [blame]

102

# we need a results directory

103

if len(args) == 0:

104

tko_utils.dprint("ERROR: at least one results directory must "

105

"be provided")

106

parser.print_help()

107

sys.exit(1)

mbligh

2007-11-05 20:24:17 +0000

[diff] [blame]

108

Shuqian Zhao

2017-01-09 10:10:14 -0800

[diff] [blame]

109

if not options.datastore_creds:

110

gcloud_creds = global_config.global_config.get_config_value(

111

'GCLOUD', 'cidb_datastore_writer_creds', default=None)

112

options.datastore_creds = (site_utils.get_creds_abspath(gcloud_creds)

113

if gcloud_creds else None)

114

115

if not options.export_to_gcloud_path:

116

export_script = 'chromiumos/chromite/bin/export_to_gcloud'

117

# If it is a lab server, the script is under ~chromeos-test/

118

if os.path.exists(os.path.expanduser('~chromeos-test/%s' %

119

export_script)):

120

path = os.path.expanduser('~chromeos-test/%s' % export_script)

121

# If it is a local workstation, it is probably under ~/

122

elif os.path.exists(os.path.expanduser('~/%s' % export_script)):

123

path = os.path.expanduser('~/%s' % export_script)

124

# If it is not found anywhere, the default will be set to None.

125

else:

126

path = None

127

options.export_to_gcloud_path = path

128

jadmanski

2008-06-06 21:10:57 +0000

[diff] [blame]

129

# pass the options back

130

return options, args

mbligh

2007-11-05 20:24:17 +0000

[diff] [blame]

131

132

mbligh

2008-04-17 15:25:38 +0000

[diff] [blame]

133

def format_failure_message(jobname, kernel, testname, status, reason):

Fang Deng

2014-10-21 16:29:22 -0700

[diff] [blame]

134

"""Format failure message with the given information.

135

136

@param jobname: String representing the job name.

137

@param kernel: String representing the kernel.

138

@param testname: String representing the test name.

139

@param status: String representing the test status.

140

@param reason: String representing the reason.

141

142

@return: Failure message as a string.

143

"""

jadmanski

2008-06-06 21:10:57 +0000

[diff] [blame]

144

format_string = "%-12s %-20s %-12s %-10s %s"

145

return format_string % (jobname, kernel, testname, status, reason)

mbligh

b85e6b0

2006-10-08 17:20:56 +0000

[diff] [blame]

146

mbligh

bb7b891

2006-10-08 03:59:02 +0000

[diff] [blame]

147

mbligh

2008-04-17 15:25:38 +0000

[diff] [blame]

148

def mailfailure(jobname, job, message):

Fang Deng

2014-10-21 16:29:22 -0700

[diff] [blame]

149

"""Send an email about the failure.

150

151

@param jobname: String representing the job name.

152

@param job: A job object.

153

@param message: The message to mail.

154

"""

jadmanski

2008-06-06 21:10:57 +0000

[diff] [blame]

155

message_lines = [""]

156

message_lines.append("The following tests FAILED for this job")

157

message_lines.append("http://%s/results/%s" %

158

(socket.gethostname(), jobname))

159

message_lines.append("")

160

message_lines.append(format_failure_message("Job name", "Kernel",

161

"Test name", "FAIL/WARN",

162

"Failure reason"))

163

message_lines.append(format_failure_message("=" * 8, "=" * 6, "=" * 8,

164

"=" * 8, "=" * 14))

165

message_header = "\n".join(message_lines)

mbligh

2008-04-17 15:25:38 +0000

[diff] [blame]

166

jadmanski

2008-06-06 21:10:57 +0000

[diff] [blame]

167

subject = "AUTOTEST: FAILED tests from job %s" % jobname

168

mail.send("", job.user, "", subject, message_header + message)

mbligh

006f230

2007-09-13 20:46:46 +0000

[diff] [blame]

169

170

Fang Deng

9ec6680

2014-04-28 19:04:33 +0000

[diff] [blame]

171

def _invalidate_original_tests(orig_job_idx, retry_job_idx):

172

"""Retry tests invalidates original tests.

173

174

Whenever a retry job is complete, we want to invalidate the original

175

job's test results, such that the consumers of the tko database

176

(e.g. tko frontend, wmatrix) could figure out which results are the latest.

177

178

When a retry job is parsed, we retrieve the original job's afe_job_id

179

from the retry job's keyvals, which is then converted to tko job_idx and

180

passed into this method as |orig_job_idx|.

181

182

In this method, we are going to invalidate the rows in tko_tests that are

183

associated with the original job by flipping their 'invalid' bit to True.

184

In addition, in tko_tests, we also maintain a pointer from the retry results

185

to the original results, so that later we can always know which rows in

186

tko_tests are retries and which are the corresponding original results.

187

This is done by setting the field 'invalidates_test_idx' of the tests

188

associated with the retry job.

189

190

For example, assume Job(job_idx=105) are retried by Job(job_idx=108), after

191

this method is run, their tko_tests rows will look like:

192

__________________________________________________________________________

193

194

10 | 105 | dummy_Fail.Error| ... | 1 | NULL

195

11 | 105 | dummy_Fail.Fail | ... | 1 | NULL

196

...

197

20 | 108 | dummy_Fail.Error| ... | 0 | 10

198

21 | 108 | dummy_Fail.Fail | ... | 0 | 11

199

__________________________________________________________________________

200

Note the invalid bits of the rows for Job(job_idx=105) are set to '1'.

201

And the 'invalidates_test_idx' fields of the rows for Job(job_idx=108)

202

are set to 10 and 11 (the test_idx of the rows for the original job).

203

204

@param orig_job_idx: An integer representing the original job's

205

tko job_idx. Tests associated with this job will

206

be marked as 'invalid'.

207

@param retry_job_idx: An integer representing the retry job's

208

tko job_idx. The field 'invalidates_test_idx'

209

of the tests associated with this job will be updated.

210

211

"""

212

msg = 'orig_job_idx: %s, retry_job_idx: %s' % (orig_job_idx, retry_job_idx)

213

if not orig_job_idx or not retry_job_idx:

214

tko_utils.dprint('ERROR: Could not invalidate tests: ' + msg)

215

# Using django models here makes things easier, but make sure that

216

# before this method is called, all other relevant transactions have been

217

# committed to avoid race condition. In the long run, we might consider

218

# to make the rest of parser use django models.

219

orig_tests = tko_models.Test.objects.filter(job__job_idx=orig_job_idx)

220

retry_tests = tko_models.Test.objects.filter(job__job_idx=retry_job_idx)

221

222

# Invalidate original tests.

223

orig_tests.update(invalid=True)

224

225

# Maintain a dictionary that maps (test, subdir) to original tests.

226

# Note that within the scope of a job, (test, subdir) uniquelly

227

# identifies a test run, but 'test' does not.

228

# In a control file, one could run the same test with different

229

# 'subdir_tag', for example,

230

# job.run_test('dummy_Fail', tag='Error', subdir_tag='subdir_1')

231

# job.run_test('dummy_Fail', tag='Error', subdir_tag='subdir_2')

232

# In tko, we will get

233

# (test='dummy_Fail.Error', subdir='dummy_Fail.Error.subdir_1')

234

# (test='dummy_Fail.Error', subdir='dummy_Fail.Error.subdir_2')

235

invalidated_tests = {(orig_test.test, orig_test.subdir): orig_test

236

for orig_test in orig_tests}

237

for retry in retry_tests:

238

# It is possible that (retry.test, retry.subdir) doesn't exist

239

# in invalidated_tests. This could happen when the original job

240

# didn't run some of its tests. For example, a dut goes offline

241

# since the beginning of the job, in which case invalidated_tests

242

# will only have one entry for 'SERVER_JOB'.

243

orig_test = invalidated_tests.get((retry.test, retry.subdir), None)

244

if orig_test:

245

retry.invalidates_test = orig_test

246

retry.save()

247

tko_utils.dprint('DEBUG: Invalidated tests associated to job: ' + msg)

248

249

Dan Shi

4f8c024

2017-07-07 15:34:49 -0700

[diff] [blame]

250

def _throttle_result_size(path):

251

"""Limit the total size of test results for the given path.

252

253

@param path: Path of the result directory.

254

"""

255

if not result_runner.ENABLE_RESULT_THROTTLING:

256

tko_utils.dprint(

257

'Result throttling is not enabled. Skipping throttling %s' %

path)

return

max_result_size_KB = control_data.DEFAULT_MAX_RESULT_SIZE_KB

262

# Client side test saves the test control to file `control`, while server

263

# side test saves the test control to file `control.srv`

264

for control_file in ['control', 'control.srv']:

265

control = os.path.join(path, control_file)

266

try:

267

max_result_size_KB = control_data.parse_control(

268

control, raise_warnings=False).max_result_size_KB

269

# Any value different from the default is considered to be the one

270

# set in the test control file.

271

if max_result_size_KB != control_data.DEFAULT_MAX_RESULT_SIZE_KB:

break

except IOError as e:

tko_utils.dprint(

'Failed to access %s. Error: %s\nDetails %s' %

276

(control, e, traceback.format_exc()))

277

except control_data.ControlVariableException as e:

278

tko_utils.dprint(

279

'Failed to parse %s. Error: %s\nDetails %s' %

280

(control, e, traceback.format_exc()))

281

282

try:

283

result_utils.execute(path, max_result_size_KB)

284

except:

285

tko_utils.dprint(

286

'Failed to throttle result size of %s.\nDetails %s' %

287

(path, traceback.format_exc()))

288

289

Michael Tang

2017-08-03 14:27:10 -0700

[diff] [blame]

290

def export_tko_job_to_file(job, jobname, filename):

291

"""Exports the tko job to disk file.

292

293

@param job: database object.

294

@param jobname: the job name as string.

295

@param filename: The path to the results to be parsed.

296

"""

297

try:

298

from autotest_lib.tko import job_serializer

299

300

serializer = job_serializer.JobSerializer()

301

serializer.serialize_to_binary(job, jobname, filename)

302

except ImportError:

303

tko_utils.dprint("WARNING: tko_pb2.py doesn't exist. Create by "

304

"compiling tko/tko.proto.")

305

306

Aviv Keshet

2016-10-20 15:41:16 -0700

[diff] [blame]

307

def parse_one(db, jobname, path, parse_options):

Fang Deng

2014-10-21 16:29:22 -0700

[diff] [blame]

308

"""Parse a single job. Optionally send email on failure.

309

310

@param db: database object.

311

@param jobname: the tag used to search for existing job in db,

312

e.g. '1234-chromeos-test/host1'

313

@param path: The path to the results to be parsed.

Aviv Keshet

2016-10-20 15:41:16 -0700

[diff] [blame]

314

@param parse_options: _ParseOptions instance.

jadmanski

2008-06-06 21:10:57 +0000

[diff] [blame]

315

"""

Aviv Keshet

2016-10-20 15:41:16 -0700

[diff] [blame]

316

reparse = parse_options.reparse

317

mail_on_failure = parse_options.mail_on_failure

Aviv Keshet

2016-10-20 17:17:36 -0700

[diff] [blame]

318

dry_run = parse_options.dry_run

Shuqian Zhao

2016-12-07 09:35:03 -0800

[diff] [blame]

319

suite_report = parse_options.suite_report

Shuqian Zhao

2017-01-09 10:10:14 -0800

[diff] [blame]

320

datastore_creds = parse_options.datastore_creds

321

export_to_gcloud_path = parse_options.export_to_gcloud_path

Aviv Keshet

2016-10-20 15:41:16 -0700

[diff] [blame]

322

jadmanski

2008-06-06 21:10:57 +0000

[diff] [blame]

323

tko_utils.dprint("\nScanning %s (%s)" % (jobname, path))

jadmanski

9b6babf

2009-04-21 17:57:40 +0000

[diff] [blame]

324

old_job_idx = db.find_job(jobname)

Prathmesh Prabhu

2018-04-18 19:16:34 -0700

[diff] [blame^]

325

if old_job_idx is not None and not reparse:

326

tko_utils.dprint("! Job is already parsed, done")

327

return

mbligh

2008-04-17 15:25:38 +0000

[diff] [blame]

328

jadmanski

2008-06-06 21:10:57 +0000

[diff] [blame]

329

# look up the status version

jadmanski

db4f9b5

2008-12-03 22:52:53 +0000

[diff] [blame]

330

job_keyval = models.job.read_keyval(path)

331

status_version = job_keyval.get("status_version", 0)

jadmanski

6e8bf75

2008-05-14 00:17:48 +0000

[diff] [blame]

332

Luigi Semenzato

e706481

2017-02-03 14:47:59 -0800

[diff] [blame]

333

parser = parser_lib.parser(status_version)

jadmanski

2008-06-06 21:10:57 +0000

[diff] [blame]

334

job = parser.make_job(path)

Prathmesh Prabhu

e06c49b

2018-04-18 19:01:23 -0700

[diff] [blame]

335

tko_utils.dprint("+ Parsing dir=%s, jobname=%s" % (path, jobname))

336

status_log_path = _find_status_log_path(path)

337

if not status_log_path:

jadmanski

2008-06-06 21:10:57 +0000

[diff] [blame]

338

tko_utils.dprint("! Unable to parse job, no status file")

339

return

Prathmesh Prabhu

e06c49b

2018-04-18 19:01:23 -0700

[diff] [blame]

340

_parse_status_log(parser, job, status_log_path)

jadmanski

9b6babf

2009-04-21 17:57:40 +0000

[diff] [blame]

341

Prathmesh Prabhu

2018-04-18 19:16:34 -0700

[diff] [blame^]

342

if old_job_idx is not None:

343

job.job_idx = old_job_idx

344

unmatched_tests = _match_existing_tests(db, job)

Aviv Keshet

2016-10-20 17:17:36 -0700

[diff] [blame]

345

if not dry_run:

Prathmesh Prabhu

2018-04-18 19:16:34 -0700

[diff] [blame^]

346

_delete_tests_from_db(db, unmatched_tests)

mbligh

2008-04-17 15:25:38 +0000

[diff] [blame]

347

Benny Peake

feb775c

2017-02-08 15:14:14 -0800

[diff] [blame]

348

job.build = None

349

job.board = None

350

job.build_version = None

351

job.suite = None

352

if job.label:

353

label_info = site_utils.parse_job_name(job.label)

354

if label_info:

355

job.build = label_info.get('build', None)

356

job.build_version = label_info.get('build_version', None)

357

job.board = label_info.get('board', None)

358

job.suite = label_info.get('suite', None)

359

Dan Shi

4f8c024

2017-07-07 15:34:49 -0700

[diff] [blame]

360

result_utils_lib.LOG = tko_utils.dprint

361

_throttle_result_size(path)

362

Dan Shi

2017-07-14 11:16:23 -0700

[diff] [blame]

363

# Record test result size to job_keyvals

Dan Shi

11e3506

2017-11-03 10:09:05 -0700

[diff] [blame]

364

start_time = time.time()

Dan Shi

2017-07-14 11:16:23 -0700

[diff] [blame]

365

result_size_info = site_utils.collect_result_sizes(

366

path, log=tko_utils.dprint)

Dan Shi

11e3506

2017-11-03 10:09:05 -0700

[diff] [blame]

367

tko_utils.dprint('Finished collecting result sizes after %s seconds' %

368

(time.time()-start_time))

Dan Shi

2017-07-14 11:16:23 -0700

[diff] [blame]

369

job.keyval_dict.update(result_size_info.__dict__)

370

Dan Shi

2017-07-14 11:16:23 -0700

[diff] [blame]

371

# TODO(dshi): Update sizes with sponge_invocation.xml and throttle it.

Dan Shi

96c3bdc

2017-05-24 11:34:30 -0700

[diff] [blame]

372

jadmanski

2008-06-06 21:10:57 +0000

[diff] [blame]

373

# check for failures

374

message_lines = [""]

Simran Basi

2015-04-16 15:09:56 -0700

[diff] [blame]

375

job_successful = True

jadmanski

2008-06-06 21:10:57 +0000

[diff] [blame]

376

for test in job.tests:

377

if not test.subdir:

378

continue

Sida Liu

afe550a

2017-09-03 19:03:40 -0700

[diff] [blame]

379

tko_utils.dprint("* testname, subdir, status, reason: %s %s %s %s"

380

% (test.testname, test.subdir, test.status,

381

test.reason))

Simran Basi

2015-04-16 15:09:56 -0700

[diff] [blame]

382

if test.status != 'GOOD':

383

job_successful = False

jadmanski

2008-06-06 21:10:57 +0000

[diff] [blame]

384

message_lines.append(format_failure_message(

385

jobname, test.kernel.base, test.subdir,

386

test.status, test.reason))

Simran Basi

2016-09-22 16:57:56 -0700

[diff] [blame]

387

try:

388

message = "\n".join(message_lines)

Simran Basi

2015-04-16 15:09:56 -0700

[diff] [blame]

389

Aviv Keshet

2016-10-20 17:17:36 -0700

[diff] [blame]

390

if not dry_run:

391

# send out a email report of failure

392

if len(message) > 2 and mail_on_failure:

393

tko_utils.dprint("Sending email report of failure on %s to %s"

394

% (jobname, job.user))

395

mailfailure(jobname, job, message)

mbligh

2008-04-17 15:25:38 +0000

[diff] [blame]

396

Dan Shi

e5d063f

2017-09-29 15:37:34 -0700

[diff] [blame]

397

# Upload perf values to the perf dashboard, if applicable.

398

for test in job.tests:

399

perf_uploader.upload_test(job, test, jobname)

400

401

# Upload job details to Sponge.

402

sponge_url = sponge_utils.upload_results(job, log=tko_utils.dprint)

403

if sponge_url:

404

job.keyval_dict['sponge_url'] = sponge_url

405

Prathmesh Prabhu

8957a34

2018-04-18 18:29:09 -0700

[diff] [blame]

406

_write_job_to_db(db, jobname, job, job_keyval)

mbligh

2008-04-17 15:25:38 +0000

[diff] [blame]

407

Dan Shi

2017-07-17 14:40:02 -0700

[diff] [blame]

408

# Verify the job data is written to the database.

409

if job.tests:

Prathmesh Prabhu

2018-04-18 18:24:54 -0700

[diff] [blame]

410

tests_in_db = db.find_tests(job.index)

Dan Shi

2017-07-17 14:40:02 -0700

[diff] [blame]

411

tests_in_db_count = len(tests_in_db) if tests_in_db else 0

412

if tests_in_db_count != len(job.tests):

413

tko_utils.dprint(

414

'Failed to find enough tests for job_idx: %d. The '

415

'job should have %d tests, only found %d tests.' %

Prathmesh Prabhu

2018-04-18 18:24:54 -0700

[diff] [blame]

416

(job.index, len(job.tests), tests_in_db_count))

Dan Shi

2017-07-17 14:40:02 -0700

[diff] [blame]

417

metrics.Counter(

418

'chromeos/autotest/result/db_save_failure',

419

description='The number of times parse failed to '

420

'save job to TKO database.').increment()

421

Aviv Keshet

2016-10-20 17:17:36 -0700

[diff] [blame]

422

# Although the cursor has autocommit, we still need to force it to

423

# commit existing changes before we can use django models, otherwise

424

# it will go into deadlock when django models try to start a new

425

# trasaction while the current one has not finished yet.

426

db.commit()

Dennis Jeffrey

f9bef6c

2013-08-05 11:01:27 -0700

[diff] [blame]

427

Aviv Keshet

2016-10-20 17:17:36 -0700

[diff] [blame]

428

# Handle retry job.

429

orig_afe_job_id = job_keyval.get(constants.RETRY_ORIGINAL_JOB_ID,

430

None)

431

if orig_afe_job_id:

432

orig_job_idx = tko_models.Job.objects.get(

433

afe_job_id=orig_afe_job_id).job_idx

434

_invalidate_original_tests(orig_job_idx, job.index)

Simran Basi

2016-09-22 16:57:56 -0700

[diff] [blame]

435

except Exception as e:

Simran Basi

2016-09-22 16:57:56 -0700

[diff] [blame]

436

tko_utils.dprint("Hit exception while uploading to tko db:\n%s" %

437

traceback.format_exc())

Simran Basi

2016-09-22 16:57:56 -0700

[diff] [blame]

438

raise e

Fang Deng

9ec6680

2014-04-28 19:04:33 +0000

[diff] [blame]

439

jamesren

7a52204

2010-06-10 22:53:55 +0000

[diff] [blame]

440

# Serializing job into a binary file

Michael Tang

2017-08-03 14:27:10 -0700

[diff] [blame]

441

export_tko_to_file = global_config.global_config.get_config_value(

442

'AUTOSERV', 'export_tko_job_to_file', type=bool, default=False)

Michael Tang

8303a37

2017-08-11 11:03:50 -0700

[diff] [blame]

443

444

binary_file_name = os.path.join(path, "job.serialize")

Michael Tang

2017-08-03 14:27:10 -0700

[diff] [blame]

445

if export_tko_to_file:

Michael Tang

2017-08-03 14:27:10 -0700

[diff] [blame]

446

export_tko_job_to_file(job, jobname, binary_file_name)

jamesren

4826cc4

2010-06-15 20:33:22 +0000

[diff] [blame]

447

Aviv Keshet

2016-10-20 17:17:36 -0700

[diff] [blame]

448

if not dry_run:

449

db.commit()

mbligh

26b992b

2008-02-19 15:46:21 +0000

[diff] [blame]

450

Shuqian Zhao

2016-12-07 09:35:03 -0800

[diff] [blame]

451

# Generate a suite report.

452

# Check whether this is a suite job, a suite job will be a hostless job, its

453

# jobname will be <JOB_ID>-<USERNAME>/hostless, the suite field will not be

Shuqian Zhao

2017-03-10 14:20:11 -0800

[diff] [blame]

454

# NULL. Only generate timeline report when datastore_parent_key is given.

Shuqian Zhao

2016-12-07 09:35:03 -0800

[diff] [blame]

455

try:

Shuqian Zhao

2017-03-10 14:20:11 -0800

[diff] [blame]

456

datastore_parent_key = job_keyval.get('datastore_parent_key', None)

Ningning Xia

bbba11f

2018-03-16 13:35:24 -0700

[diff] [blame]

457

provision_job_id = job_keyval.get('provision_job_id', None)

Shuqian Zhao

2017-03-10 14:20:11 -0800

[diff] [blame]

458

if (suite_report and jobname.endswith('/hostless')

Prathmesh Prabhu

2018-04-18 18:24:54 -0700

[diff] [blame]

459

and job.suite and datastore_parent_key):

Shuqian Zhao

2016-12-07 09:35:03 -0800

[diff] [blame]

460

tko_utils.dprint('Start dumping suite timing report...')

461

timing_log = os.path.join(path, 'suite_timing.log')

462

dump_cmd = ("%s/site_utils/dump_suite_report.py %s "

463

"--output='%s' --debug" %

Prathmesh Prabhu

2018-04-18 18:24:54 -0700

[diff] [blame]

464

(common.autotest_dir, job.afe_job_id,

Shuqian Zhao

2016-12-07 09:35:03 -0800

[diff] [blame]

465

timing_log))

Ningning Xia

bbba11f

2018-03-16 13:35:24 -0700

[diff] [blame]

466

467

if provision_job_id is not None:

468

dump_cmd += " --provision_job_id=%d" % int(provision_job_id)

469

Shuqian Zhao

2016-12-07 09:35:03 -0800

[diff] [blame]

470

subprocess.check_output(dump_cmd, shell=True)

471

tko_utils.dprint('Successfully finish dumping suite timing report')

472

Shuqian Zhao

2017-01-09 10:10:14 -0800

[diff] [blame]

473

if (datastore_creds and export_to_gcloud_path

474

and os.path.exists(export_to_gcloud_path)):

Shuqian Zhao

2017-03-10 14:20:11 -0800

[diff] [blame]

475

upload_cmd = [export_to_gcloud_path, datastore_creds,

476

timing_log, '--parent_key',

Shuqian Zhao

4ff7473

2017-03-30 16:20:10 -0700

[diff] [blame]

477

datastore_parent_key]

Shuqian Zhao

2017-01-09 10:10:14 -0800

[diff] [blame]

478

tko_utils.dprint('Start exporting timeline report to gcloud')

Shuqian Zhao

2017-03-10 14:20:11 -0800

[diff] [blame]

479

subprocess.check_output(upload_cmd)

Shuqian Zhao

2017-01-09 10:10:14 -0800

[diff] [blame]

480

tko_utils.dprint('Successfully export timeline report to '

481

'gcloud')

482

else:

483

tko_utils.dprint('DEBUG: skip exporting suite timeline to '

484

'gcloud, because either gcloud creds or '

485

'export_to_gcloud script is not found.')

Shuqian Zhao

2016-12-07 09:35:03 -0800

[diff] [blame]

486

except Exception as e:

Shuqian Zhao

2017-01-09 10:10:14 -0800

[diff] [blame]

487

tko_utils.dprint("WARNING: fail to dump/export suite report. "

488

"Error:\n%s" % e)

Shuqian Zhao

2016-12-07 09:35:03 -0800

[diff] [blame]

489

Dan Shi

5f62633

2016-01-27 15:25:58 -0800

[diff] [blame]

490

# Mark GS_OFFLOADER_NO_OFFLOAD in gs_offloader_instructions at the end of

491

# the function, so any failure, e.g., db connection error, will stop

492

# gs_offloader_instructions being updated, and logs can be uploaded for

493

# troubleshooting.

494

if job_successful:

495

# Check if we should not offload this test's results.

496

if job_keyval.get(constants.JOB_OFFLOAD_FAILURES_KEY, False):

497

# Update the gs_offloader_instructions json file.

498

gs_instructions_file = os.path.join(

499

path, constants.GS_OFFLOADER_INSTRUCTIONS)

500

gs_offloader_instructions = {}

501

if os.path.exists(gs_instructions_file):

502

with open(gs_instructions_file, 'r') as f:

503

gs_offloader_instructions = json.load(f)

504

505

gs_offloader_instructions[constants.GS_OFFLOADER_NO_OFFLOAD] = True

506

with open(gs_instructions_file, 'w') as f:

507

json.dump(gs_offloader_instructions, f)

508

509

Prathmesh Prabhu

8957a34

2018-04-18 18:29:09 -0700

[diff] [blame]

510

def _write_job_to_db(db, jobname, job, job_keyval):

511

"""Write all TKO data associated with a job to DB.

512

513

This updates the job object as a side effect.

514

515

@param db: tko.db.db_sql object.

516

@param jobname: Name of the job to write.

517

@param job: tko.models.job object.

518

"""

519

db.insert_or_update_machine(job)

520

db.insert_job(

521

jobname, job,

522

parent_job_id=job_keyval.get(constants.PARENT_JOB_ID, None))

523

db.update_job_keyvals(job)

524

for test in job.tests:

525

db.insert_test(job, test)

526

527

Prathmesh Prabhu

42a2bb4

2018-04-18 18:56:16 -0700

[diff] [blame]

528

def _find_status_log_path(path):

529

if os.path.exists(os.path.join(path, "status.log")):

530

return os.path.join(path, "status.log")

531

if os.path.exists(os.path.join(path, "status")):

532

return os.path.join(path, "status")

return ""

Prathmesh Prabhu

2018-04-18 19:01:23 -0700

[diff] [blame]

536

def _parse_status_log(parser, job, status_log_path):

537

status_lines = open(status_log_path).readlines()

538

parser.start(job)

539

tests = parser.end(status_lines)

540

541

# parser.end can return the same object multiple times, so filter out dups

542

job.tests = []

543

already_added = set()

544

for test in tests:

545

if test not in already_added:

546

already_added.add(test)

547

job.tests.append(test)

548

549

Prathmesh Prabhu

2018-04-18 19:16:34 -0700

[diff] [blame^]

550

def _match_existing_tests(db, job):

551

"""Find entries in the DB corresponding to the job's tests, update job.

552

553

@return: Any unmatched tests in the db.

554

"""

555

old_job_idx = job.job_idx

556

raw_old_tests = db.select("test_idx,subdir,test", "tko_tests",

557

{"job_idx": old_job_idx})

558

if raw_old_tests:

559

old_tests = dict(((test, subdir), test_idx)

560

for test_idx, subdir, test in raw_old_tests)

else:

old_tests = {}

for test in job.tests:

565

test_idx = old_tests.pop((test.testname, test.subdir), None)

566

if test_idx is not None:

567

test.test_idx = test_idx

568

else:

569

tko_utils.dprint("! Reparse returned new test "

570

"testname=%r subdir=%r" %

571

(test.testname, test.subdir))

return old_tests

def _delete_tests_from_db(db, tests):

576

for test_idx in tests.itervalues():

577

where = {'test_idx' : test_idx}

578

db.delete('tko_iteration_result', where)

579

db.delete('tko_iteration_perf_value', where)

580

db.delete('tko_iteration_attributes', where)

581

db.delete('tko_test_attributes', where)

582

db.delete('tko_test_labels_tests', {'test_id': test_idx})

583

db.delete('tko_tests', where)

584

585

jadmanski

2008-11-11 00:29:02 +0000

[diff] [blame]

586

def _get_job_subdirs(path):

587

"""

588

Returns a list of job subdirectories at path. Returns None if the test

589

is itself a job directory. Does not recurse into the subdirs.

590

"""

591

# if there's a .machines file, use it to get the subdirs

jadmanski

2008-06-06 21:10:57 +0000

[diff] [blame]

592

machine_list = os.path.join(path, ".machines")

593

if os.path.exists(machine_list):

jadmanski

42fbd07

2009-01-30 15:07:05 +0000

[diff] [blame]

594

subdirs = set(line.strip() for line in file(machine_list))

595

existing_subdirs = set(subdir for subdir in subdirs

596

if os.path.exists(os.path.join(path, subdir)))

597

if len(existing_subdirs) != 0:

598

return existing_subdirs

jadmanski

2008-11-11 00:29:02 +0000

[diff] [blame]

599

600

# if this dir contains ONLY subdirectories, return them

601

contents = set(os.listdir(path))

602

contents.discard(".parse.lock")

603

subdirs = set(sub for sub in contents if

604

os.path.isdir(os.path.join(path, sub)))

605

if len(contents) == len(subdirs) != 0:

606

return subdirs

607

608

# this is a job directory, or something else we don't understand

return None

Aviv Keshet

2016-10-20 15:41:16 -0700

[diff] [blame]

612

def parse_leaf_path(db, path, level, parse_options):

Fang Deng

2014-10-21 16:29:22 -0700

[diff] [blame]

613

"""Parse a leaf path.

614

615

@param db: database handle.

616

@param path: The path to the results to be parsed.

617

@param level: Integer, level of subdirectories to include in the job name.

Aviv Keshet

2016-10-20 15:41:16 -0700

[diff] [blame]

618

@param parse_options: _ParseOptions instance.

Fang Deng

2014-10-21 16:29:22 -0700

[diff] [blame]

619

620

@returns: The job name of the parsed job, e.g. '123-chromeos-test/host1'

621

"""

mbligh

a48eeb2

2009-03-11 16:44:43 +0000

[diff] [blame]

622

job_elements = path.split("/")[-level:]

623

jobname = "/".join(job_elements)

624

try:

Aviv Keshet

2016-10-20 15:41:16 -0700

[diff] [blame]

625

db.run_with_retry(parse_one, db, jobname, path, parse_options)

Simran Basi

8de306c

2016-12-21 12:04:21 -0800

[diff] [blame]

626

except Exception as e:

627

tko_utils.dprint("Error parsing leaf path: %s\nException:\n%s\n%s" %

628

(path, e, traceback.format_exc()))

Fang Deng

2014-10-21 16:29:22 -0700

[diff] [blame]

629

return jobname

mbligh

a48eeb2

2009-03-11 16:44:43 +0000

[diff] [blame]

630

631

Aviv Keshet

2016-10-20 15:41:16 -0700

[diff] [blame]

632

def parse_path(db, path, level, parse_options):

Fang Deng

2014-10-21 16:29:22 -0700

[diff] [blame]

633

"""Parse a path

634

635

@param db: database handle.

636

@param path: The path to the results to be parsed.

637

@param level: Integer, level of subdirectories to include in the job name.

Aviv Keshet

2016-10-20 15:41:16 -0700

[diff] [blame]

638

@param parse_options: _ParseOptions instance.

Fang Deng

2014-10-21 16:29:22 -0700

[diff] [blame]

639

640

@returns: A set of job names of the parsed jobs.

641

set(['123-chromeos-test/host1', '123-chromeos-test/host2'])

642

"""

643

processed_jobs = set()

jadmanski

2008-11-11 00:29:02 +0000

[diff] [blame]

644

job_subdirs = _get_job_subdirs(path)

645

if job_subdirs is not None:

mbligh

a48eeb2

2009-03-11 16:44:43 +0000

[diff] [blame]

646

# parse status.log in current directory, if it exists. multi-machine

647

# synchronous server side tests record output in this directory. without

648

# this check, we do not parse these results.

649

if os.path.exists(os.path.join(path, 'status.log')):

Aviv Keshet

2016-10-20 15:41:16 -0700

[diff] [blame]

650

new_job = parse_leaf_path(db, path, level, parse_options)

Fang Deng

2014-10-21 16:29:22 -0700

[diff] [blame]

651

processed_jobs.add(new_job)

jadmanski

2008-06-06 21:10:57 +0000

[diff] [blame]

652

# multi-machine job

jadmanski

2008-11-11 00:29:02 +0000

[diff] [blame]

653

for subdir in job_subdirs:

654

jobpath = os.path.join(path, subdir)

Aviv Keshet

2016-10-20 15:41:16 -0700

[diff] [blame]

655

new_jobs = parse_path(db, jobpath, level + 1, parse_options)

Fang Deng

2014-10-21 16:29:22 -0700

[diff] [blame]

656

processed_jobs.update(new_jobs)

jadmanski

2008-06-06 21:10:57 +0000

[diff] [blame]

657

else:

658

# single machine job

Aviv Keshet

2016-10-20 15:41:16 -0700

[diff] [blame]

659

new_job = parse_leaf_path(db, path, level, parse_options)

Fang Deng

2014-10-21 16:29:22 -0700

[diff] [blame]

660

processed_jobs.add(new_job)

661

return processed_jobs

662

663

Prathmesh Prabhu

3e319da

2017-08-30 19:13:03 -0700

[diff] [blame]

664

def _detach_from_parent_process():

665

"""Allow reparenting the parse process away from caller.

666

667

When monitor_db is run via upstart, restarting the job sends SIGTERM to

668

the whole process group. This makes us immune from that.

669

"""

670

if os.getpid() != os.getpgid(0):

671

os.setsid()

mbligh

bb7b891

2006-10-08 03:59:02 +0000

[diff] [blame]

672

mbligh

2008-04-17 15:25:38 +0000

[diff] [blame]

673

def main():

Fang Deng

2014-10-21 16:29:22 -0700

[diff] [blame]

674

"""Main entrance."""

675

start_time = datetime.datetime.now()

676

# Record the processed jobs so that

677

# we can send the duration of parsing to metadata db.

678

processed_jobs = set()

679

jadmanski

2008-06-06 21:10:57 +0000

[diff] [blame]

680

options, args = parse_args()

Prathmesh Prabhu

3e319da

2017-08-30 19:13:03 -0700

[diff] [blame]

681

682

if options.detach:

683

_detach_from_parent_process()

684

Aviv Keshet

2016-10-20 17:17:36 -0700

[diff] [blame]

685

parse_options = _ParseOptions(options.reparse, options.mailit,

Shuqian Zhao

2017-01-09 10:10:14 -0800

[diff] [blame]

686

options.dry_run, options.suite_report,

687

options.datastore_creds,

688

options.export_to_gcloud_path)

jadmanski

2008-06-06 21:10:57 +0000

[diff] [blame]

689

results_dir = os.path.abspath(args[0])

690

assert os.path.exists(results_dir)

mbligh

2008-04-17 15:25:38 +0000

[diff] [blame]

691

Dan Shi

bea2691

2017-07-21 12:26:10 -0700

[diff] [blame]

692

site_utils.SetupTsMonGlobalState('tko_parse', indirect=False,

693

short_lived=True)

694

jadmanski

2008-12-03 16:27:07 +0000

[diff] [blame]

695

pid_file_manager = pidfile.PidFileManager("parser", results_dir)

mbligh

2008-04-17 15:25:38 +0000

[diff] [blame]

696

jadmanski

2008-12-03 16:27:07 +0000

[diff] [blame]

697

if options.write_pidfile:

698

pid_file_manager.open_file()

mbligh

2008-04-17 15:25:38 +0000

[diff] [blame]

699

jadmanski

2008-12-03 16:27:07 +0000

[diff] [blame]

700

try:

701

# build up the list of job dirs to parse

702

if options.singledir:

703

jobs_list = [results_dir]

704

else:

705

jobs_list = [os.path.join(results_dir, subdir)

706

for subdir in os.listdir(results_dir)]

707

708

# build up the database

709

db = tko_db.db(autocommit=False, host=options.db_host,

710

user=options.db_user, password=options.db_pass,

711

database=options.db_name)

712

713

# parse all the jobs

714

for path in jobs_list:

715

lockfile = open(os.path.join(path, ".parse.lock"), "w")

716

flags = fcntl.LOCK_EX

717

if options.noblock:

mbligh

db18b0e

2009-01-30 00:34:32 +0000

[diff] [blame]

718

flags |= fcntl.LOCK_NB

jadmanski

2008-12-03 16:27:07 +0000

[diff] [blame]

719

try:

720

fcntl.flock(lockfile, flags)

721

except IOError, e:

mbligh

db18b0e

2009-01-30 00:34:32 +0000

[diff] [blame]

722

# lock is not available and nonblock has been requested

jadmanski

2008-12-03 16:27:07 +0000

[diff] [blame]

723

if e.errno == errno.EWOULDBLOCK:

lockfile.close()

continue

else:

raise # something unexpected happened

728

try:

Aviv Keshet

2016-10-20 15:41:16 -0700

[diff] [blame]

729

new_jobs = parse_path(db, path, options.level, parse_options)

Fang Deng

2014-10-21 16:29:22 -0700

[diff] [blame]

730

processed_jobs.update(new_jobs)

mbligh

9e93640

2009-05-13 20:42:17 +0000

[diff] [blame]

731

jadmanski

2008-12-03 16:27:07 +0000

[diff] [blame]

732

finally:

733

fcntl.flock(lockfile, fcntl.LOCK_UN)

jadmanski

2008-06-06 21:10:57 +0000

[diff] [blame]

734

lockfile.close()

mbligh

e97e0e6

2009-05-21 01:41:58 +0000

[diff] [blame]

735

Dan Shi

b7a36ea

2017-02-28 21:52:20 -0800

[diff] [blame]

736

except Exception as e:

jadmanski

2008-12-03 16:27:07 +0000

[diff] [blame]

737

pid_file_manager.close_file(1)

738

raise

739

else:

740

pid_file_manager.close_file(0)

Dan Shi

bea2691

2017-07-21 12:26:10 -0700

[diff] [blame]

741

finally:

742

metrics.Flush()

Fang Deng

2014-10-21 16:29:22 -0700

[diff] [blame]

743

duration_secs = (datetime.datetime.now() - start_time).total_seconds()

mbligh

71d340d

2008-03-05 15:51:16 +0000

[diff] [blame]

744

mbligh

532cb27

2007-11-26 18:54:20 +0000

[diff] [blame]

745

mbligh

2008-04-17 15:25:38 +0000

[diff] [blame]

746

if __name__ == "__main__":

jadmanski