Blame - tko/parse.py - platform/external/autotest

2016-10-20 15:41:16 -0700

[diff] [blame]

3

import collections

Aviv Keshet

2016-10-20 15:41:16 -0700

[diff] [blame]

4

import errno

5

import fcntl

Simran Basi

2015-04-16 15:09:56 -0700

[diff] [blame]

6

import json

Aviv Keshet

2016-10-20 15:41:16 -0700

[diff] [blame]

7

import optparse

8

import os

9

import socket

Shuqian Zhao

2016-12-07 09:35:03 -0800

[diff] [blame]

10

import subprocess

Aviv Keshet

2016-10-20 15:41:16 -0700

[diff] [blame]

11

import sys

Dan Shi

11e3506

2017-11-03 10:09:05 -0700

[diff] [blame]

12

import time

Aviv Keshet

2016-10-20 15:41:16 -0700

[diff] [blame]

13

import traceback

mbligh

bb7b891

2006-10-08 03:59:02 +0000

[diff] [blame]

14

mbligh

2008-04-17 15:25:38 +0000

[diff] [blame]

15

import common

Dan Shi

2017-07-07 15:34:49 -0700

[diff] [blame]

16

from autotest_lib.client.bin.result_tools import utils as result_utils

17

from autotest_lib.client.bin.result_tools import utils_lib as result_utils_lib

18

from autotest_lib.client.bin.result_tools import runner as result_runner

19

from autotest_lib.client.common_lib import control_data

Benny Peake

feb775c

2017-02-08 15:14:14 -0800

[diff] [blame]

20

from autotest_lib.client.common_lib import global_config

jadmanski

db4f9b5

2008-12-03 22:52:53 +0000

[diff] [blame]

21

from autotest_lib.client.common_lib import mail, pidfile

Fang Deng

2014-10-21 16:29:22 -0700

[diff] [blame]

22

from autotest_lib.client.common_lib import utils

Fang Deng

2014-10-21 16:29:22 -0700

[diff] [blame]

23

from autotest_lib.frontend import setup_django_environment

Fang Deng

9ec6680

2014-04-28 19:04:33 +0000

[diff] [blame]

24

from autotest_lib.frontend.tko import models as tko_models

Shuqian Zhao

2017-01-09 10:10:14 -0800

[diff] [blame]

25

from autotest_lib.server import site_utils

Fang Deng

2014-10-21 16:29:22 -0700

[diff] [blame]

26

from autotest_lib.server.cros.dynamic_suite import constants

Benny Peake

d322d3d

2017-02-08 15:39:28 -0800

[diff] [blame]

27

from autotest_lib.site_utils.sponge_lib import sponge_utils

Dennis Jeffrey

f9bef6c

2013-08-05 11:01:27 -0700

[diff] [blame]

28

from autotest_lib.tko import db as tko_db, utils as tko_utils

Luigi Semenzato

e706481

2017-02-03 14:47:59 -0800

[diff] [blame]

29

from autotest_lib.tko import models, parser_lib

Dennis Jeffrey

f9bef6c

2013-08-05 11:01:27 -0700

[diff] [blame]

30

from autotest_lib.tko.perf_upload import perf_uploader

mbligh

2007-11-05 20:24:17 +0000

[diff] [blame]

31

Dan Shi

b0af621

2017-07-17 14:40:02 -0700

[diff] [blame]

32

try:

33

from chromite.lib import metrics

34

except ImportError:

35

metrics = utils.metrics_mock

36

37

Aviv Keshet

2016-10-20 15:41:16 -0700

[diff] [blame]

38

_ParseOptions = collections.namedtuple(

Shuqian Zhao

2017-01-09 10:10:14 -0800

[diff] [blame]

39

'ParseOptions', ['reparse', 'mail_on_failure', 'dry_run', 'suite_report',

40

'datastore_creds', 'export_to_gcloud_path'])

Aviv Keshet

2016-10-20 15:41:16 -0700

[diff] [blame]

41

Prathmesh Prabhu

531188a

2018-08-10 12:47:39 -0700

[diff] [blame]

42

_HARDCODED_CONTROL_FILE_NAMES = (

43

# client side test control, as saved in old Autotest paths.

44

'control',

45

# server side test control, as saved in old Autotest paths.

46

'control.srv',

47

# All control files, as saved in skylab.

48

'control.from_control_name',

)

mbligh

2008-04-17 15:25:38 +0000

[diff] [blame]

52

def parse_args():

Fang Deng

2014-10-21 16:29:22 -0700

[diff] [blame]

53

"""Parse args."""

jadmanski

2008-06-06 21:10:57 +0000

[diff] [blame]

54

# build up our options parser and parse sys.argv

55

parser = optparse.OptionParser()

56

parser.add_option("-m", help="Send mail for FAILED tests",

57

dest="mailit", action="store_true")

58

parser.add_option("-r", help="Reparse the results of a job",

59

dest="reparse", action="store_true")

60

parser.add_option("-o", help="Parse a single results directory",

61

dest="singledir", action="store_true")

62

parser.add_option("-l", help=("Levels of subdirectories to include "

63

"in the job name"),

64

type="int", dest="level", default=1)

65

parser.add_option("-n", help="No blocking on an existing parse",

66

dest="noblock", action="store_true")

67

parser.add_option("-s", help="Database server hostname",

68

dest="db_host", action="store")

69

parser.add_option("-u", help="Database username", dest="db_user",

70

action="store")

71

parser.add_option("-p", help="Database password", dest="db_pass",

72

action="store")

73

parser.add_option("-d", help="Database name", dest="db_name",

74

action="store")

Aviv Keshet

2016-10-20 17:17:36 -0700

[diff] [blame]

75

parser.add_option("--dry-run", help="Do not actually commit any results.",

76

dest="dry_run", action="store_true", default=False)

Prathmesh Prabhu

3e319da

2017-08-30 19:13:03 -0700

[diff] [blame]

77

parser.add_option(

78

"--detach", action="store_true",

79

help="Detach parsing process from the caller process. Used by "

80

"monitor_db to safely restart without affecting parsing.",

81

default=False)

jadmanski

2008-12-03 16:27:07 +0000

[diff] [blame]

82

parser.add_option("--write-pidfile",

83

help="write pidfile (.parser_execute)",

84

dest="write_pidfile", action="store_true",

85

default=False)

Fang Deng

2014-10-21 16:29:22 -0700

[diff] [blame]

86

parser.add_option("--record-duration",

Prathmesh Prabhu

7776945

2018-04-17 13:30:50 -0700

[diff] [blame]

87

help="[DEPRECATED] Record timing to metadata db",

Fang Deng

2014-10-21 16:29:22 -0700

[diff] [blame]

88

dest="record_duration", action="store_true",

89

default=False)

Shuqian Zhao

2016-12-07 09:35:03 -0800

[diff] [blame]

90

parser.add_option("--suite-report",

91

help=("Allows parsing job to attempt to create a suite "

Shuqian Zhao

2017-01-09 10:10:14 -0800

[diff] [blame]

92

"timeline report, if it detects that the job being "

Shuqian Zhao

2016-12-07 09:35:03 -0800

[diff] [blame]

93

"parsed is a suite job."),

94

dest="suite_report", action="store_true",

95

default=False)

Shuqian Zhao

2017-01-09 10:10:14 -0800

[diff] [blame]

96

parser.add_option("--datastore-creds",

97

help=("The path to gcloud datastore credentials file, "

98

"which will be used to upload suite timeline "

99

"report to gcloud. If not specified, the one "

100

"defined in shadow_config will be used."),

101

dest="datastore_creds", action="store", default=None)

102

parser.add_option("--export-to-gcloud-path",

103

help=("The path to export_to_gcloud script. Please find "

104

"chromite path on your server. The script is under "

105

"chromite/bin/."),

106

dest="export_to_gcloud_path", action="store",

107

default=None)

jadmanski

2008-06-06 21:10:57 +0000

[diff] [blame]

108

options, args = parser.parse_args()

mbligh

2007-11-05 20:24:17 +0000

[diff] [blame]

109

jadmanski

2008-06-06 21:10:57 +0000

[diff] [blame]

110

# we need a results directory

111

if len(args) == 0:

112

tko_utils.dprint("ERROR: at least one results directory must "

113

"be provided")

114

parser.print_help()

115

sys.exit(1)

mbligh

2007-11-05 20:24:17 +0000

[diff] [blame]

116

Shuqian Zhao

2017-01-09 10:10:14 -0800

[diff] [blame]

117

if not options.datastore_creds:

118

gcloud_creds = global_config.global_config.get_config_value(

119

'GCLOUD', 'cidb_datastore_writer_creds', default=None)

120

options.datastore_creds = (site_utils.get_creds_abspath(gcloud_creds)

121

if gcloud_creds else None)

122

123

if not options.export_to_gcloud_path:

124

export_script = 'chromiumos/chromite/bin/export_to_gcloud'

125

# If it is a lab server, the script is under ~chromeos-test/

126

if os.path.exists(os.path.expanduser('~chromeos-test/%s' %

127

export_script)):

128

path = os.path.expanduser('~chromeos-test/%s' % export_script)

129

# If it is a local workstation, it is probably under ~/

130

elif os.path.exists(os.path.expanduser('~/%s' % export_script)):

131

path = os.path.expanduser('~/%s' % export_script)

132

# If it is not found anywhere, the default will be set to None.

133

else:

134

path = None

135

options.export_to_gcloud_path = path

136

jadmanski

2008-06-06 21:10:57 +0000

[diff] [blame]

137

# pass the options back

138

return options, args

mbligh

2007-11-05 20:24:17 +0000

[diff] [blame]

139

140

mbligh

2008-04-17 15:25:38 +0000

[diff] [blame]

141

def format_failure_message(jobname, kernel, testname, status, reason):

Fang Deng

2014-10-21 16:29:22 -0700

[diff] [blame]

142

"""Format failure message with the given information.

143

144

@param jobname: String representing the job name.

145

@param kernel: String representing the kernel.

146

@param testname: String representing the test name.

147

@param status: String representing the test status.

148

@param reason: String representing the reason.

149

150

@return: Failure message as a string.

151

"""

jadmanski

2008-06-06 21:10:57 +0000

[diff] [blame]

152

format_string = "%-12s %-20s %-12s %-10s %s"

153

return format_string % (jobname, kernel, testname, status, reason)

mbligh

b85e6b0

2006-10-08 17:20:56 +0000

[diff] [blame]

154

mbligh

bb7b891

2006-10-08 03:59:02 +0000

[diff] [blame]

155

mbligh

2008-04-17 15:25:38 +0000

[diff] [blame]

156

def mailfailure(jobname, job, message):

Fang Deng

2014-10-21 16:29:22 -0700

[diff] [blame]

157

"""Send an email about the failure.

158

159

@param jobname: String representing the job name.

160

@param job: A job object.

161

@param message: The message to mail.

162

"""

jadmanski

2008-06-06 21:10:57 +0000

[diff] [blame]

163

message_lines = [""]

164

message_lines.append("The following tests FAILED for this job")

165

message_lines.append("http://%s/results/%s" %

166

(socket.gethostname(), jobname))

167

message_lines.append("")

168

message_lines.append(format_failure_message("Job name", "Kernel",

169

"Test name", "FAIL/WARN",

170

"Failure reason"))

171

message_lines.append(format_failure_message("=" * 8, "=" * 6, "=" * 8,

172

"=" * 8, "=" * 14))

173

message_header = "\n".join(message_lines)

mbligh

2008-04-17 15:25:38 +0000

[diff] [blame]

174

jadmanski

2008-06-06 21:10:57 +0000

[diff] [blame]

175

subject = "AUTOTEST: FAILED tests from job %s" % jobname

176

mail.send("", job.user, "", subject, message_header + message)

mbligh

006f230

2007-09-13 20:46:46 +0000

[diff] [blame]

177

178

Fang Deng

9ec6680

2014-04-28 19:04:33 +0000

[diff] [blame]

179

def _invalidate_original_tests(orig_job_idx, retry_job_idx):

180

"""Retry tests invalidates original tests.

181

182

Whenever a retry job is complete, we want to invalidate the original

183

job's test results, such that the consumers of the tko database

184

(e.g. tko frontend, wmatrix) could figure out which results are the latest.

185

186

When a retry job is parsed, we retrieve the original job's afe_job_id

187

from the retry job's keyvals, which is then converted to tko job_idx and

188

passed into this method as |orig_job_idx|.

189

190

In this method, we are going to invalidate the rows in tko_tests that are

191

associated with the original job by flipping their 'invalid' bit to True.

192

In addition, in tko_tests, we also maintain a pointer from the retry results

193

to the original results, so that later we can always know which rows in

194

tko_tests are retries and which are the corresponding original results.

195

This is done by setting the field 'invalidates_test_idx' of the tests

196

associated with the retry job.

197

198

For example, assume Job(job_idx=105) are retried by Job(job_idx=108), after

199

this method is run, their tko_tests rows will look like:

200

__________________________________________________________________________

201

202

10 | 105 | dummy_Fail.Error| ... | 1 | NULL

203

11 | 105 | dummy_Fail.Fail | ... | 1 | NULL

204

...

205

20 | 108 | dummy_Fail.Error| ... | 0 | 10

206

21 | 108 | dummy_Fail.Fail | ... | 0 | 11

207

__________________________________________________________________________

208

Note the invalid bits of the rows for Job(job_idx=105) are set to '1'.

209

And the 'invalidates_test_idx' fields of the rows for Job(job_idx=108)

210

are set to 10 and 11 (the test_idx of the rows for the original job).

211

212

@param orig_job_idx: An integer representing the original job's

213

tko job_idx. Tests associated with this job will

214

be marked as 'invalid'.

215

@param retry_job_idx: An integer representing the retry job's

216

tko job_idx. The field 'invalidates_test_idx'

217

of the tests associated with this job will be updated.

218

219

"""

220

msg = 'orig_job_idx: %s, retry_job_idx: %s' % (orig_job_idx, retry_job_idx)

221

if not orig_job_idx or not retry_job_idx:

222

tko_utils.dprint('ERROR: Could not invalidate tests: ' + msg)

223

# Using django models here makes things easier, but make sure that

224

# before this method is called, all other relevant transactions have been

225

# committed to avoid race condition. In the long run, we might consider

226

# to make the rest of parser use django models.

227

orig_tests = tko_models.Test.objects.filter(job__job_idx=orig_job_idx)

228

retry_tests = tko_models.Test.objects.filter(job__job_idx=retry_job_idx)

229

230

# Invalidate original tests.

231

orig_tests.update(invalid=True)

232

233

# Maintain a dictionary that maps (test, subdir) to original tests.

234

# Note that within the scope of a job, (test, subdir) uniquelly

235

# identifies a test run, but 'test' does not.

236

# In a control file, one could run the same test with different

237

# 'subdir_tag', for example,

238

# job.run_test('dummy_Fail', tag='Error', subdir_tag='subdir_1')

239

# job.run_test('dummy_Fail', tag='Error', subdir_tag='subdir_2')

240

# In tko, we will get

241

# (test='dummy_Fail.Error', subdir='dummy_Fail.Error.subdir_1')

242

# (test='dummy_Fail.Error', subdir='dummy_Fail.Error.subdir_2')

243

invalidated_tests = {(orig_test.test, orig_test.subdir): orig_test

244

for orig_test in orig_tests}

245

for retry in retry_tests:

246

# It is possible that (retry.test, retry.subdir) doesn't exist

247

# in invalidated_tests. This could happen when the original job

248

# didn't run some of its tests. For example, a dut goes offline

249

# since the beginning of the job, in which case invalidated_tests

250

# will only have one entry for 'SERVER_JOB'.

251

orig_test = invalidated_tests.get((retry.test, retry.subdir), None)

252

if orig_test:

253

retry.invalidates_test = orig_test

254

retry.save()

255

tko_utils.dprint('DEBUG: Invalidated tests associated to job: ' + msg)

256

257

Dan Shi

2017-07-07 15:34:49 -0700

[diff] [blame]

258

def _throttle_result_size(path):

259

"""Limit the total size of test results for the given path.

260

261

@param path: Path of the result directory.

262

"""

263

if not result_runner.ENABLE_RESULT_THROTTLING:

264

tko_utils.dprint(

265

'Result throttling is not enabled. Skipping throttling %s' %

path)

return

Prathmesh Prabhu

2018-08-14 10:54:31 -0700

[diff] [blame]

269

max_result_size_KB = _max_result_size_from_control(path)

Prathmesh Prabhu

dfbaac1

2018-08-10 12:43:47 -0700

[diff] [blame]

270

if max_result_size_KB is None:

271

max_result_size_KB = control_data.DEFAULT_MAX_RESULT_SIZE_KB

272

273

try:

274

result_utils.execute(path, max_result_size_KB)

275

except:

276

tko_utils.dprint(

277

'Failed to throttle result size of %s.\nDetails %s' %

278

(path, traceback.format_exc()))

279

280

281

def _max_result_size_from_control(path):

282

"""Gets the max result size set in a control file, if any.

283

284

If not overrides is found, returns None.

285

"""

Prathmesh Prabhu

531188a

2018-08-10 12:47:39 -0700

[diff] [blame]

286

for control_file in _HARDCODED_CONTROL_FILE_NAMES:

Dan Shi

2017-07-07 15:34:49 -0700

[diff] [blame]

287

control = os.path.join(path, control_file)

Prathmesh Prabhu

5848ed0

2018-08-10 12:46:13 -0700

[diff] [blame]

288

if not os.path.exists(control):

289

continue

290

Dan Shi

2017-07-07 15:34:49 -0700

[diff] [blame]

291

try:

292

max_result_size_KB = control_data.parse_control(

293

control, raise_warnings=False).max_result_size_KB

Dan Shi

2017-07-07 15:34:49 -0700

[diff] [blame]

294

if max_result_size_KB != control_data.DEFAULT_MAX_RESULT_SIZE_KB:

Prathmesh Prabhu

dfbaac1

2018-08-10 12:43:47 -0700

[diff] [blame]

295

return max_result_size_KB

Dan Shi

2017-07-07 15:34:49 -0700

[diff] [blame]

296

except IOError as e:

297

tko_utils.dprint(

298

'Failed to access %s. Error: %s\nDetails %s' %

299

(control, e, traceback.format_exc()))

300

except control_data.ControlVariableException as e:

301

tko_utils.dprint(

302

'Failed to parse %s. Error: %s\nDetails %s' %

303

(control, e, traceback.format_exc()))

Prathmesh Prabhu

dfbaac1

2018-08-10 12:43:47 -0700

[diff] [blame]

304

return None

Dan Shi

2017-07-07 15:34:49 -0700

[diff] [blame]

305

306

Michael Tang

2017-08-03 14:27:10 -0700

[diff] [blame]

307

def export_tko_job_to_file(job, jobname, filename):

308

"""Exports the tko job to disk file.

309

310

@param job: database object.

311

@param jobname: the job name as string.

312

@param filename: The path to the results to be parsed.

313

"""

Prathmesh Prabhu

2018-08-16 09:34:59 -0700

[diff] [blame]

314

from autotest_lib.tko import job_serializer

Michael Tang

2017-08-03 14:27:10 -0700

[diff] [blame]

315

Prathmesh Prabhu

2018-08-16 09:34:59 -0700

[diff] [blame]

316

serializer = job_serializer.JobSerializer()

317

serializer.serialize_to_binary(job, jobname, filename)

Michael Tang

2017-08-03 14:27:10 -0700

[diff] [blame]

318

319

Prathmesh Prabhu

2018-04-19 18:09:43 -0700

[diff] [blame]

320

def parse_one(db, pid_file_manager, jobname, path, parse_options):

Fang Deng

2014-10-21 16:29:22 -0700

[diff] [blame]

321

"""Parse a single job. Optionally send email on failure.

322

323

@param db: database object.

Prathmesh Prabhu

2018-04-19 18:09:43 -0700

[diff] [blame]

324

@param pid_file_manager: pidfile.PidFileManager object.

Fang Deng

2014-10-21 16:29:22 -0700

[diff] [blame]

325

@param jobname: the tag used to search for existing job in db,

326

e.g. '1234-chromeos-test/host1'

327

@param path: The path to the results to be parsed.

Aviv Keshet

2016-10-20 15:41:16 -0700

[diff] [blame]

328

@param parse_options: _ParseOptions instance.

jadmanski

2008-06-06 21:10:57 +0000

[diff] [blame]

329

"""

Aviv Keshet

2016-10-20 15:41:16 -0700

[diff] [blame]

330

reparse = parse_options.reparse

331

mail_on_failure = parse_options.mail_on_failure

Aviv Keshet

2016-10-20 17:17:36 -0700

[diff] [blame]

332

dry_run = parse_options.dry_run

Shuqian Zhao

2016-12-07 09:35:03 -0800

[diff] [blame]

333

suite_report = parse_options.suite_report

Shuqian Zhao

2017-01-09 10:10:14 -0800

[diff] [blame]

334

datastore_creds = parse_options.datastore_creds

335

export_to_gcloud_path = parse_options.export_to_gcloud_path

Aviv Keshet

2016-10-20 15:41:16 -0700

[diff] [blame]

336

jadmanski

2008-06-06 21:10:57 +0000

[diff] [blame]

337

tko_utils.dprint("\nScanning %s (%s)" % (jobname, path))

jadmanski

9b6babf

2009-04-21 17:57:40 +0000

[diff] [blame]

338

old_job_idx = db.find_job(jobname)

Prathmesh Prabhu

2018-04-18 19:16:34 -0700

[diff] [blame]

339

if old_job_idx is not None and not reparse:

340

tko_utils.dprint("! Job is already parsed, done")

341

return

mbligh

2008-04-17 15:25:38 +0000

[diff] [blame]

342

jadmanski

2008-06-06 21:10:57 +0000

[diff] [blame]

343

# look up the status version

jadmanski

db4f9b5

2008-12-03 22:52:53 +0000

[diff] [blame]

344

job_keyval = models.job.read_keyval(path)

345

status_version = job_keyval.get("status_version", 0)

jadmanski

6e8bf75

2008-05-14 00:17:48 +0000

[diff] [blame]

346

Luigi Semenzato

e706481

2017-02-03 14:47:59 -0800

[diff] [blame]

347

parser = parser_lib.parser(status_version)

jadmanski

2008-06-06 21:10:57 +0000

[diff] [blame]

348

job = parser.make_job(path)

Prathmesh Prabhu

e06c49b

2018-04-18 19:01:23 -0700

[diff] [blame]

349

tko_utils.dprint("+ Parsing dir=%s, jobname=%s" % (path, jobname))

350

status_log_path = _find_status_log_path(path)

351

if not status_log_path:

jadmanski

2008-06-06 21:10:57 +0000

[diff] [blame]

352

tko_utils.dprint("! Unable to parse job, no status file")

353

return

Prathmesh Prabhu

e06c49b

2018-04-18 19:01:23 -0700

[diff] [blame]

354

_parse_status_log(parser, job, status_log_path)

jadmanski

9b6babf

2009-04-21 17:57:40 +0000

[diff] [blame]

355

Prathmesh Prabhu

2018-04-18 19:16:34 -0700

[diff] [blame]

356

if old_job_idx is not None:

357

job.job_idx = old_job_idx

358

unmatched_tests = _match_existing_tests(db, job)

Aviv Keshet

2016-10-20 17:17:36 -0700

[diff] [blame]

359

if not dry_run:

Prathmesh Prabhu

2018-04-18 19:16:34 -0700

[diff] [blame]

360

_delete_tests_from_db(db, unmatched_tests)

mbligh

2008-04-17 15:25:38 +0000

[diff] [blame]

361

Prathmesh Prabhu

30dee86

2018-04-18 20:24:20 -0700

[diff] [blame]

362

job.afe_job_id = tko_utils.get_afe_job_id(jobname)

Prathmesh Prabhu

1790588

2018-04-18 22:09:08 -0700

[diff] [blame]

363

job.skylab_task_id = tko_utils.get_skylab_task_id(jobname)

Prathmesh Prabhu

d25f15a

2018-05-03 13:49:58 -0700

[diff] [blame]

364

job.afe_parent_job_id = job_keyval.get(constants.PARENT_JOB_ID)

365

job.skylab_parent_task_id = job_keyval.get(constants.PARENT_JOB_ID)

Benny Peake

feb775c

2017-02-08 15:14:14 -0800

[diff] [blame]

366

job.build = None

367

job.board = None

368

job.build_version = None

369

job.suite = None

370

if job.label:

371

label_info = site_utils.parse_job_name(job.label)

372

if label_info:

373

job.build = label_info.get('build', None)

374

job.build_version = label_info.get('build_version', None)

375

job.board = label_info.get('board', None)

376

job.suite = label_info.get('suite', None)

377

Alex Zamorzaev

2d88612

2019-09-10 16:17:54 -0700

[diff] [blame^]

378

if 'suite' in job.keyval_dict:

379

job.suite = job.keyval_dict['suite']

380

Dan Shi

2017-07-07 15:34:49 -0700

[diff] [blame]

381

result_utils_lib.LOG = tko_utils.dprint

382

_throttle_result_size(path)

383

Dan Shi

2017-07-14 11:16:23 -0700

[diff] [blame]

384

# Record test result size to job_keyvals

Dan Shi

11e3506

2017-11-03 10:09:05 -0700

[diff] [blame]

385

start_time = time.time()

Dan Shi

2017-07-14 11:16:23 -0700

[diff] [blame]

386

result_size_info = site_utils.collect_result_sizes(

387

path, log=tko_utils.dprint)

Dan Shi

11e3506

2017-11-03 10:09:05 -0700

[diff] [blame]

388

tko_utils.dprint('Finished collecting result sizes after %s seconds' %

389

(time.time()-start_time))

Dan Shi

2017-07-14 11:16:23 -0700

[diff] [blame]

390

job.keyval_dict.update(result_size_info.__dict__)

391

Dan Shi

2017-07-14 11:16:23 -0700

[diff] [blame]

392

# TODO(dshi): Update sizes with sponge_invocation.xml and throttle it.

Dan Shi

96c3bdc

2017-05-24 11:34:30 -0700

[diff] [blame]

393

jadmanski

2008-06-06 21:10:57 +0000

[diff] [blame]

394

# check for failures

395

message_lines = [""]

Simran Basi

2015-04-16 15:09:56 -0700

[diff] [blame]

396

job_successful = True

jadmanski

2008-06-06 21:10:57 +0000

[diff] [blame]

397

for test in job.tests:

398

if not test.subdir:

399

continue

Sida Liu

afe550a

2017-09-03 19:03:40 -0700

[diff] [blame]

400

tko_utils.dprint("* testname, subdir, status, reason: %s %s %s %s"

401

% (test.testname, test.subdir, test.status,

402

test.reason))

Allen Li

82c68ce

2019-03-20 15:30:29 -0700

[diff] [blame]

403

if test.status not in ('GOOD', 'WARN'):

Simran Basi

2015-04-16 15:09:56 -0700

[diff] [blame]

404

job_successful = False

Prathmesh Prabhu

2018-04-19 18:09:43 -0700

[diff] [blame]

405

pid_file_manager.num_tests_failed += 1

jadmanski

2008-06-06 21:10:57 +0000

[diff] [blame]

406

message_lines.append(format_failure_message(

407

jobname, test.kernel.base, test.subdir,

408

test.status, test.reason))

Simran Basi

2015-04-16 15:09:56 -0700

[diff] [blame]

409

Prathmesh Prabhu

2018-08-16 09:34:59 -0700

[diff] [blame]

410

message = "\n".join(message_lines)

mbligh

2008-04-17 15:25:38 +0000

[diff] [blame]

411

Prathmesh Prabhu

2018-08-16 09:34:59 -0700

[diff] [blame]

412

if not dry_run:

413

# send out a email report of failure

414

if len(message) > 2 and mail_on_failure:

415

tko_utils.dprint("Sending email report of failure on %s to %s"

416

% (jobname, job.user))

417

mailfailure(jobname, job, message)

Dan Shi

e5d063f

2017-09-29 15:37:34 -0700

[diff] [blame]

418

Prathmesh Prabhu

2018-08-16 09:34:59 -0700

[diff] [blame]

419

# Upload perf values to the perf dashboard, if applicable.

420

for test in job.tests:

421

perf_uploader.upload_test(job, test, jobname)

Dan Shi

e5d063f

2017-09-29 15:37:34 -0700

[diff] [blame]

422

Prathmesh Prabhu

2018-08-16 09:34:59 -0700

[diff] [blame]

423

# Upload job details to Sponge.

424

sponge_url = sponge_utils.upload_results(job, log=tko_utils.dprint)

425

if sponge_url:

426

job.keyval_dict['sponge_url'] = sponge_url

mbligh

2008-04-17 15:25:38 +0000

[diff] [blame]

427

Prathmesh Prabhu

2018-08-16 09:34:59 -0700

[diff] [blame]

428

_write_job_to_db(db, jobname, job)

Dan Shi

b0af621

2017-07-17 14:40:02 -0700

[diff] [blame]

429

Prathmesh Prabhu

2018-08-16 09:34:59 -0700

[diff] [blame]

430

# Verify the job data is written to the database.

431

if job.tests:

432

tests_in_db = db.find_tests(job.job_idx)

433

tests_in_db_count = len(tests_in_db) if tests_in_db else 0

434

if tests_in_db_count != len(job.tests):

435

tko_utils.dprint(

436

'Failed to find enough tests for job_idx: %d. The '

437

'job should have %d tests, only found %d tests.' %

438

(job.job_idx, len(job.tests), tests_in_db_count))

439

metrics.Counter(

440

'chromeos/autotest/result/db_save_failure',

441

description='The number of times parse failed to '

442

'save job to TKO database.').increment()

Dennis Jeffrey

f9bef6c

2013-08-05 11:01:27 -0700

[diff] [blame]

443

Prathmesh Prabhu

2018-08-16 09:34:59 -0700

[diff] [blame]

444

# Although the cursor has autocommit, we still need to force it to

445

# commit existing changes before we can use django models, otherwise

446

# it will go into deadlock when django models try to start a new

447

# trasaction while the current one has not finished yet.

db.commit()

# Handle retry job.

orig_afe_job_id = job_keyval.get(constants.RETRY_ORIGINAL_JOB_ID,

452

None)

453

if orig_afe_job_id:

454

orig_job_idx = tko_models.Job.objects.get(

455

afe_job_id=orig_afe_job_id).job_idx

456

_invalidate_original_tests(orig_job_idx, job.job_idx)

Fang Deng

9ec6680

2014-04-28 19:04:33 +0000

[diff] [blame]

457

jamesren

7a52204

2010-06-10 22:53:55 +0000

[diff] [blame]

458

# Serializing job into a binary file

Michael Tang

2017-08-03 14:27:10 -0700

[diff] [blame]

459

export_tko_to_file = global_config.global_config.get_config_value(

460

'AUTOSERV', 'export_tko_job_to_file', type=bool, default=False)

Michael Tang

8303a37

2017-08-11 11:03:50 -0700

[diff] [blame]

461

462

binary_file_name = os.path.join(path, "job.serialize")

Michael Tang

2017-08-03 14:27:10 -0700

[diff] [blame]

463

if export_tko_to_file:

Michael Tang

2017-08-03 14:27:10 -0700

[diff] [blame]

464

export_tko_job_to_file(job, jobname, binary_file_name)

jamesren

4826cc4

2010-06-15 20:33:22 +0000

[diff] [blame]

465

Aviv Keshet

2016-10-20 17:17:36 -0700

[diff] [blame]

466

if not dry_run:

467

db.commit()

mbligh

26b992b

2008-02-19 15:46:21 +0000

[diff] [blame]

468

Shuqian Zhao

2016-12-07 09:35:03 -0800

[diff] [blame]

469

# Generate a suite report.

470

# Check whether this is a suite job, a suite job will be a hostless job, its

471

# jobname will be <JOB_ID>-<USERNAME>/hostless, the suite field will not be

Shuqian Zhao

a42bba1

2017-03-10 14:20:11 -0800

[diff] [blame]

472

# NULL. Only generate timeline report when datastore_parent_key is given.

Prathmesh Prabhu

2018-08-16 09:34:59 -0700

[diff] [blame]

473

datastore_parent_key = job_keyval.get('datastore_parent_key', None)

474

provision_job_id = job_keyval.get('provision_job_id', None)

475

if (suite_report and jobname.endswith('/hostless')

476

and job.suite and datastore_parent_key):

477

tko_utils.dprint('Start dumping suite timing report...')

478

timing_log = os.path.join(path, 'suite_timing.log')

479

dump_cmd = ("%s/site_utils/dump_suite_report.py %s "

480

"--output='%s' --debug" %

481

(common.autotest_dir, job.afe_job_id,

482

timing_log))

Ningning Xia

bbba11f

2018-03-16 13:35:24 -0700

[diff] [blame]

483

Prathmesh Prabhu

2018-08-16 09:34:59 -0700

[diff] [blame]

484

if provision_job_id is not None:

485

dump_cmd += " --provision_job_id=%d" % int(provision_job_id)

Ningning Xia

bbba11f

2018-03-16 13:35:24 -0700

[diff] [blame]

486

Prathmesh Prabhu

2018-08-16 09:34:59 -0700

[diff] [blame]

487

subprocess.check_output(dump_cmd, shell=True)

488

tko_utils.dprint('Successfully finish dumping suite timing report')

Shuqian Zhao

2016-12-07 09:35:03 -0800

[diff] [blame]

489

Prathmesh Prabhu

2018-08-16 09:34:59 -0700

[diff] [blame]

490

if (datastore_creds and export_to_gcloud_path

491

and os.path.exists(export_to_gcloud_path)):

492

upload_cmd = [export_to_gcloud_path, datastore_creds,

493

timing_log, '--parent_key',

494

datastore_parent_key]

495

tko_utils.dprint('Start exporting timeline report to gcloud')

496

subprocess.check_output(upload_cmd)

497

tko_utils.dprint('Successfully export timeline report to '

498

'gcloud')

499

else:

500

tko_utils.dprint('DEBUG: skip exporting suite timeline to '

501

'gcloud, because either gcloud creds or '

502

'export_to_gcloud script is not found.')

Shuqian Zhao

2016-12-07 09:35:03 -0800

[diff] [blame]

503

Dan Shi

5f62633

2016-01-27 15:25:58 -0800

[diff] [blame]

504

# Mark GS_OFFLOADER_NO_OFFLOAD in gs_offloader_instructions at the end of

505

# the function, so any failure, e.g., db connection error, will stop

506

# gs_offloader_instructions being updated, and logs can be uploaded for

507

# troubleshooting.

508

if job_successful:

509

# Check if we should not offload this test's results.

510

if job_keyval.get(constants.JOB_OFFLOAD_FAILURES_KEY, False):

511

# Update the gs_offloader_instructions json file.

512

gs_instructions_file = os.path.join(

513

path, constants.GS_OFFLOADER_INSTRUCTIONS)

514

gs_offloader_instructions = {}

515

if os.path.exists(gs_instructions_file):

516

with open(gs_instructions_file, 'r') as f:

517

gs_offloader_instructions = json.load(f)

518

519

gs_offloader_instructions[constants.GS_OFFLOADER_NO_OFFLOAD] = True

520

with open(gs_instructions_file, 'w') as f:

521

json.dump(gs_offloader_instructions, f)

522

523

Prathmesh Prabhu

30dee86

2018-04-18 20:24:20 -0700

[diff] [blame]

524

def _write_job_to_db(db, jobname, job):

Prathmesh Prabhu

8957a34

2018-04-18 18:29:09 -0700

[diff] [blame]

525

"""Write all TKO data associated with a job to DB.

526

527

This updates the job object as a side effect.

528

529

@param db: tko.db.db_sql object.

530

@param jobname: Name of the job to write.

531

@param job: tko.models.job object.

532

"""

533

db.insert_or_update_machine(job)

Prathmesh Prabhu

30dee86

2018-04-18 20:24:20 -0700

[diff] [blame]

534

db.insert_job(jobname, job)

Prathmesh Prabhu

1790588

2018-04-18 22:09:08 -0700

[diff] [blame]

535

db.insert_or_update_task_reference(

536

job,

537

'skylab' if tko_utils.is_skylab_task(jobname) else 'afe',

538

)

Prathmesh Prabhu

8957a34

2018-04-18 18:29:09 -0700

[diff] [blame]

539

db.update_job_keyvals(job)

540

for test in job.tests:

541

db.insert_test(job, test)

542

543

Prathmesh Prabhu

42a2bb4

2018-04-18 18:56:16 -0700

[diff] [blame]

544

def _find_status_log_path(path):

545

if os.path.exists(os.path.join(path, "status.log")):

546

return os.path.join(path, "status.log")

547

if os.path.exists(os.path.join(path, "status")):

548

return os.path.join(path, "status")

return ""

Prathmesh Prabhu

2018-04-18 19:01:23 -0700

[diff] [blame]

552

def _parse_status_log(parser, job, status_log_path):

553

status_lines = open(status_log_path).readlines()

554

parser.start(job)

555

tests = parser.end(status_lines)

556

557

# parser.end can return the same object multiple times, so filter out dups

558

job.tests = []

559

already_added = set()

560

for test in tests:

561

if test not in already_added:

562

already_added.add(test)

563

job.tests.append(test)

564

565

Prathmesh Prabhu

2018-04-18 19:16:34 -0700

[diff] [blame]

566

def _match_existing_tests(db, job):

567

"""Find entries in the DB corresponding to the job's tests, update job.

568

569

@return: Any unmatched tests in the db.

570

"""

571

old_job_idx = job.job_idx

572

raw_old_tests = db.select("test_idx,subdir,test", "tko_tests",

573

{"job_idx": old_job_idx})

574

if raw_old_tests:

575

old_tests = dict(((test, subdir), test_idx)

576

for test_idx, subdir, test in raw_old_tests)

else:

old_tests = {}

for test in job.tests:

581

test_idx = old_tests.pop((test.testname, test.subdir), None)

582

if test_idx is not None:

583

test.test_idx = test_idx

584

else:

585

tko_utils.dprint("! Reparse returned new test "

586

"testname=%r subdir=%r" %

587

(test.testname, test.subdir))

return old_tests

def _delete_tests_from_db(db, tests):

592

for test_idx in tests.itervalues():

593

where = {'test_idx' : test_idx}

594

db.delete('tko_iteration_result', where)

595

db.delete('tko_iteration_perf_value', where)

596

db.delete('tko_iteration_attributes', where)

597

db.delete('tko_test_attributes', where)

598

db.delete('tko_test_labels_tests', {'test_id': test_idx})

599

db.delete('tko_tests', where)

600

601

jadmanski

2008-11-11 00:29:02 +0000

[diff] [blame]

602

def _get_job_subdirs(path):

603

"""

604

Returns a list of job subdirectories at path. Returns None if the test

605

is itself a job directory. Does not recurse into the subdirs.

606

"""

607

# if there's a .machines file, use it to get the subdirs

jadmanski

2008-06-06 21:10:57 +0000

[diff] [blame]

608

machine_list = os.path.join(path, ".machines")

609

if os.path.exists(machine_list):

jadmanski

42fbd07

2009-01-30 15:07:05 +0000

[diff] [blame]

610

subdirs = set(line.strip() for line in file(machine_list))

611

existing_subdirs = set(subdir for subdir in subdirs

612

if os.path.exists(os.path.join(path, subdir)))

613

if len(existing_subdirs) != 0:

614

return existing_subdirs

jadmanski

2008-11-11 00:29:02 +0000

[diff] [blame]

615

616

# if this dir contains ONLY subdirectories, return them

617

contents = set(os.listdir(path))

618

contents.discard(".parse.lock")

619

subdirs = set(sub for sub in contents if

620

os.path.isdir(os.path.join(path, sub)))

621

if len(contents) == len(subdirs) != 0:

622

return subdirs

623

624

# this is a job directory, or something else we don't understand

return None

Prathmesh Prabhu

2018-04-19 18:09:43 -0700

[diff] [blame]

628

def parse_leaf_path(db, pid_file_manager, path, level, parse_options):

Fang Deng

2014-10-21 16:29:22 -0700

[diff] [blame]

629

"""Parse a leaf path.

630

631

@param db: database handle.

Prathmesh Prabhu

2018-04-19 18:09:43 -0700

[diff] [blame]

632

@param pid_file_manager: pidfile.PidFileManager object.

Fang Deng

2014-10-21 16:29:22 -0700

[diff] [blame]

633

@param path: The path to the results to be parsed.

634

@param level: Integer, level of subdirectories to include in the job name.

Aviv Keshet

2016-10-20 15:41:16 -0700

[diff] [blame]

635

@param parse_options: _ParseOptions instance.

Fang Deng

2014-10-21 16:29:22 -0700

[diff] [blame]

636

637

@returns: The job name of the parsed job, e.g. '123-chromeos-test/host1'

638

"""

mbligh

a48eeb2

2009-03-11 16:44:43 +0000

[diff] [blame]

639

job_elements = path.split("/")[-level:]

640

jobname = "/".join(job_elements)

Prathmesh Prabhu

2018-08-16 09:34:59 -0700

[diff] [blame]

641

db.run_with_retry(parse_one, db, pid_file_manager, jobname, path,

642

parse_options)

Fang Deng

2014-10-21 16:29:22 -0700

[diff] [blame]

643

return jobname

mbligh

a48eeb2

2009-03-11 16:44:43 +0000

[diff] [blame]

644

645

Prathmesh Prabhu

2018-04-19 18:09:43 -0700

[diff] [blame]

646

def parse_path(db, pid_file_manager, path, level, parse_options):

Fang Deng

2014-10-21 16:29:22 -0700

[diff] [blame]

647

"""Parse a path

648

649

@param db: database handle.

Prathmesh Prabhu

2018-04-19 18:09:43 -0700

[diff] [blame]

650

@param pid_file_manager: pidfile.PidFileManager object.

Fang Deng

2014-10-21 16:29:22 -0700

[diff] [blame]

651

@param path: The path to the results to be parsed.

652

@param level: Integer, level of subdirectories to include in the job name.

Aviv Keshet

2016-10-20 15:41:16 -0700

[diff] [blame]

653

@param parse_options: _ParseOptions instance.

Fang Deng

2014-10-21 16:29:22 -0700

[diff] [blame]

654

655

@returns: A set of job names of the parsed jobs.

656

set(['123-chromeos-test/host1', '123-chromeos-test/host2'])

657

"""

658

processed_jobs = set()

jadmanski

2008-11-11 00:29:02 +0000

[diff] [blame]

659

job_subdirs = _get_job_subdirs(path)

660

if job_subdirs is not None:

mbligh

a48eeb2

2009-03-11 16:44:43 +0000

[diff] [blame]

661

# parse status.log in current directory, if it exists. multi-machine

662

# synchronous server side tests record output in this directory. without

663

# this check, we do not parse these results.

664

if os.path.exists(os.path.join(path, 'status.log')):

Prathmesh Prabhu

2018-04-19 18:09:43 -0700

[diff] [blame]

665

new_job = parse_leaf_path(db, pid_file_manager, path, level,

666

parse_options)

Fang Deng

2014-10-21 16:29:22 -0700

[diff] [blame]

667

processed_jobs.add(new_job)

jadmanski

2008-06-06 21:10:57 +0000

[diff] [blame]

668

# multi-machine job

jadmanski

2008-11-11 00:29:02 +0000

[diff] [blame]

669

for subdir in job_subdirs:

670

jobpath = os.path.join(path, subdir)

Prathmesh Prabhu

2018-04-19 18:09:43 -0700

[diff] [blame]

671

new_jobs = parse_path(db, pid_file_manager, jobpath, level + 1,

672

parse_options)

Fang Deng

2014-10-21 16:29:22 -0700

[diff] [blame]

673

processed_jobs.update(new_jobs)

jadmanski

2008-06-06 21:10:57 +0000

[diff] [blame]

674

else:

675

# single machine job

Prathmesh Prabhu

2018-04-19 18:09:43 -0700

[diff] [blame]

676

new_job = parse_leaf_path(db, pid_file_manager, path, level,

677

parse_options)

Fang Deng

2014-10-21 16:29:22 -0700

[diff] [blame]

678

processed_jobs.add(new_job)

679

return processed_jobs

680

681

Prathmesh Prabhu

3e319da

2017-08-30 19:13:03 -0700

[diff] [blame]

682

def _detach_from_parent_process():

683

"""Allow reparenting the parse process away from caller.

684

685

When monitor_db is run via upstart, restarting the job sends SIGTERM to

686

the whole process group. This makes us immune from that.

687

"""

688

if os.getpid() != os.getpgid(0):

689

os.setsid()

mbligh

bb7b891

2006-10-08 03:59:02 +0000

[diff] [blame]

690

Aviv Keshet

6469b53

2018-07-17 16:44:39 -0700

[diff] [blame]

691

mbligh

2008-04-17 15:25:38 +0000

[diff] [blame]

692

def main():

Aviv Keshet

6469b53

2018-07-17 16:44:39 -0700

[diff] [blame]

693

"""tko_parse entry point."""

694

options, args = parse_args()

695

696

# We are obliged to use indirect=False, not use the SetupTsMonGlobalState

697

# context manager, and add a manual flush, because tko/parse is expected to

698

# be a very short lived (<1 min) script when working effectively, and we

699

# can't afford to either a) wait for up to 1min for metrics to flush at the

700

# end or b) drop metrics that were sent within the last minute of execution.

701

site_utils.SetupTsMonGlobalState('tko_parse', indirect=False,

702

short_lived=True)

703

try:

704

with metrics.SuccessCounter('chromeos/autotest/tko_parse/runs'):

705

_main_with_options(options, args)

finally:

metrics.Flush()

def _main_with_options(options, args):

711

"""Entry point with options parsed and metrics already set up."""

Fang Deng

2014-10-21 16:29:22 -0700

[diff] [blame]

712

# Record the processed jobs so that

713

# we can send the duration of parsing to metadata db.

714

processed_jobs = set()

715

Prathmesh Prabhu

3e319da

2017-08-30 19:13:03 -0700

[diff] [blame]

716

if options.detach:

717

_detach_from_parent_process()

718

Aviv Keshet

2016-10-20 17:17:36 -0700

[diff] [blame]

719

parse_options = _ParseOptions(options.reparse, options.mailit,

Shuqian Zhao

2017-01-09 10:10:14 -0800

[diff] [blame]

720

options.dry_run, options.suite_report,

721

options.datastore_creds,

722

options.export_to_gcloud_path)

jadmanski

2008-06-06 21:10:57 +0000

[diff] [blame]

723

results_dir = os.path.abspath(args[0])

724

assert os.path.exists(results_dir)

mbligh

2008-04-17 15:25:38 +0000

[diff] [blame]

725

jadmanski

2008-12-03 16:27:07 +0000

[diff] [blame]

726

pid_file_manager = pidfile.PidFileManager("parser", results_dir)

mbligh

2008-04-17 15:25:38 +0000

[diff] [blame]

727

jadmanski

2008-12-03 16:27:07 +0000

[diff] [blame]

728

if options.write_pidfile:

729

pid_file_manager.open_file()

mbligh

2008-04-17 15:25:38 +0000

[diff] [blame]

730

jadmanski

2008-12-03 16:27:07 +0000

[diff] [blame]

731

try:

732

# build up the list of job dirs to parse

733

if options.singledir:

734

jobs_list = [results_dir]

735

else:

736

jobs_list = [os.path.join(results_dir, subdir)

737

for subdir in os.listdir(results_dir)]

738

739

# build up the database

740

db = tko_db.db(autocommit=False, host=options.db_host,

741

user=options.db_user, password=options.db_pass,

742

database=options.db_name)

743

744

# parse all the jobs

745

for path in jobs_list:

746

lockfile = open(os.path.join(path, ".parse.lock"), "w")

747

flags = fcntl.LOCK_EX

748

if options.noblock:

mbligh

db18b0e

2009-01-30 00:34:32 +0000

[diff] [blame]

749

flags |= fcntl.LOCK_NB

jadmanski

2008-12-03 16:27:07 +0000

[diff] [blame]

750

try:

751

fcntl.flock(lockfile, flags)

752

except IOError, e:

mbligh

db18b0e

2009-01-30 00:34:32 +0000

[diff] [blame]

753

# lock is not available and nonblock has been requested

jadmanski

2008-12-03 16:27:07 +0000

[diff] [blame]

754

if e.errno == errno.EWOULDBLOCK:

lockfile.close()

continue

else:

raise # something unexpected happened

759

try:

Prathmesh Prabhu

2018-04-19 18:09:43 -0700

[diff] [blame]

760

new_jobs = parse_path(db, pid_file_manager, path, options.level,

761

parse_options)

Fang Deng

2014-10-21 16:29:22 -0700

[diff] [blame]

762

processed_jobs.update(new_jobs)

mbligh

9e93640

2009-05-13 20:42:17 +0000

[diff] [blame]

763

jadmanski

2008-12-03 16:27:07 +0000

[diff] [blame]

764

finally:

765

fcntl.flock(lockfile, fcntl.LOCK_UN)

jadmanski

2008-06-06 21:10:57 +0000

[diff] [blame]

766

lockfile.close()

mbligh

e97e0e6

2009-05-21 01:41:58 +0000

[diff] [blame]

767

Dan Shi

b7a36ea

2017-02-28 21:52:20 -0800

[diff] [blame]

768

except Exception as e:

jadmanski

2008-12-03 16:27:07 +0000

[diff] [blame]

769

pid_file_manager.close_file(1)

770

raise

771

else:

772

pid_file_manager.close_file(0)

mbligh

71d340d

2008-03-05 15:51:16 +0000

[diff] [blame]

773

mbligh

532cb27

2007-11-26 18:54:20 +0000

[diff] [blame]

774

mbligh