Blame - site_utils/run_suite.py - platform/external/autotest

2012-02-14 14:18:01 -0800

[diff] [blame]

#!/usr/bin/python

#

# Use of this source code is governed by a BSD-style license that can be

5

# found in the LICENSE file.

6

7

"""Tool for running suites of tests and waiting for completion.

8

9

The desired test suite will be scheduled with autotest, and then

10

this tool will block until the job is complete, printing a summary

11

at the end. Error conditions result in exceptions.

12

13

This is intended for use only with Chrome OS test suits that leverage the

14

dynamic suite infrastructure in server/cros/dynamic_suite.py.

15

"""

16

beeps

2013-03-22 13:15:49 -0700

[diff] [blame]

17

import getpass, hashlib, logging, optparse, os, re, sys, time

Chris Masone

2012-09-06 16:00:07 -0700

[diff] [blame]

18

from datetime import datetime

19

Chris Masone

2012-02-14 14:18:01 -0800

[diff] [blame]

20

import common

Chris Masone

2012-09-06 16:00:07 -0700

[diff] [blame]

21

Aviv Keshet

2013-03-21 16:38:31 -0700

[diff] [blame]

22

from autotest_lib.client.common_lib import global_config, error, utils, enum

Alex Miller

88762a8

2013-09-04 15:41:28 -0700

[diff] [blame]

23

from autotest_lib.client.common_lib import site_utils, priorities

Chris Masone

44e4d6c

2012-08-15 14:25:53 -0700

[diff] [blame]

24

from autotest_lib.server.cros.dynamic_suite import constants

Chris Masone

b493555

2012-08-14 12:05:54 -0700

[diff] [blame]

25

from autotest_lib.server.cros.dynamic_suite import frontend_wrappers

26

from autotest_lib.server.cros.dynamic_suite import job_status

J. Richard Barnette

e7b98bb

2013-08-21 16:34:16 -0700

[diff] [blame]

27

from autotest_lib.server.cros.dynamic_suite import tools

beeps

2013-03-22 13:15:49 -0700

[diff] [blame]

28

from autotest_lib.site_utils.graphite import stats

Chris Masone

2012-02-14 14:18:01 -0800

[diff] [blame]

29

Chris Masone

1120cdf

2012-02-27 17:35:07 -0800

[diff] [blame]

30

CONFIG = global_config.global_config

31

Simran Basi

2012-12-07 16:37:09 -0800

[diff] [blame]

32

# Return code that will be sent back to autotest_rpc_server.py

Aviv Keshet

2013-03-21 16:38:31 -0700

[diff] [blame]

33

RETURN_CODES = enum.Enum('OK', 'ERROR', 'WARNING')

Simran Basi

2012-12-07 16:37:09 -0800

[diff] [blame]

34

Chris Masone

dfa0beba

2012-03-19 11:41:47 -0700

[diff] [blame]

35

Scott Zawalski

2012-07-02 18:45:07 -0400

[diff] [blame]

36

def setup_logging(logfile=None):

37

"""Setup basic logging with all logging info stripped.

38

39

Calls to logging will only show the message. No severity is logged.

40

41

@param logfile: If specified dump output to a file as well.

42

"""

Dan Shi

8a2dddd

2013-08-09 12:58:44 -0700

[diff] [blame]

43

# Remove all existing handlers. client/common_lib/logging_config adds

44

# a StreamHandler to logger when modules are imported, e.g.,

45

# autotest_lib.client.bin.utils. A new StreamHandler will be added here to

46

# log only messages, not severity.

47

logging.getLogger().handlers = []

48

Scott Zawalski

2012-07-02 18:45:07 -0400

[diff] [blame]

49

screen_handler = logging.StreamHandler()

50

screen_handler.setFormatter(logging.Formatter('%(message)s'))

51

logging.getLogger().addHandler(screen_handler)

52

logging.getLogger().setLevel(logging.INFO)

53

if logfile:

54

file_handler = logging.FileHandler(logfile)

55

file_handler.setLevel(logging.DEBUG)

56

logging.getLogger().addHandler(file_handler)

Chris Masone

dfa0beba

2012-03-19 11:41:47 -0700

[diff] [blame]

57

58

Chris Masone

2012-02-14 14:18:01 -0800

[diff] [blame]

59

def parse_options():

Aviv Keshet

2013-03-21 16:38:31 -0700

[diff] [blame]

60

#pylint: disable-msg=C0111

Zdenek Behan

77290c3

2012-06-26 17:39:47 +0200

[diff] [blame]

61

usage = "usage: %prog [options]"

Chris Masone

2012-02-14 14:18:01 -0800

[diff] [blame]

62

parser = optparse.OptionParser(usage=usage)

63

parser.add_option("-b", "--board", dest="board")

64

parser.add_option("-i", "--build", dest="build")

Chris Masone

359c0fd

2012-03-13 15:18:59 -0700

[diff] [blame]

65

# This should just be a boolean flag, but the autotest "proxy" code

66

# can't handle flags that don't take arguments.

Alex Miller

2012-10-03 12:56:02 -0700

[diff] [blame]

67

parser.add_option("-n", "--no_wait", dest="no_wait", default="False",

68

help='Must pass "True" or "False" if used.')

Alex Miller

86ed979

2013-11-05 07:54:51 -0800

[diff] [blame]

69

parser.add_option("-p", "--pool", dest="pool", default="")

Chris Masone

2012-02-14 14:18:01 -0800

[diff] [blame]

70

parser.add_option("-s", "--suite_name", dest="name")

Chris Masone

8ac6671

2012-02-15 14:21:02 -0800

[diff] [blame]

71

parser.add_option("-t", "--timeout_min", dest="timeout_min", default=30)

72

parser.add_option("-d", "--delay_sec", dest="delay_sec", default=10)

Chris Masone

2012-04-11 11:36:48 -0700

[diff] [blame]

73

parser.add_option("-m", "--mock_job_id", dest="mock_job_id",

74

help="Skips running suite; creates report for given ID.")

Alex Miller

05a2fff

2012-09-10 10:14:34 -0700

[diff] [blame]

75

parser.add_option("-u", "--num", dest="num", type="int", default=None,

Chris Masone

8906ab1

2012-07-23 15:37:56 -0700

[diff] [blame]

76

help="Run on at most NUM machines.")

Alex Miller

f43d0eb

2012-10-01 13:43:13 -0700

[diff] [blame]

77

# Same boolean flag issue applies here.

Alex Miller

2012-10-03 12:56:02 -0700

[diff] [blame]

78

parser.add_option("-f", "--file_bugs", dest="file_bugs", default='False',

79

help='File bugs on test failures. Must pass "True" or '

80

'"False" if used.')

Dan Shi

a02181f

2013-01-29 14:03:32 -0800

[diff] [blame]

81

parser.add_option("-l", "--bypass_labstatus", dest="bypass_labstatus",

82

action="store_true", help='Bypass lab status check.')

Alex Miller

88762a8

2013-09-04 15:41:28 -0700

[diff] [blame]

83

# We allow either a number or a string for the priority. This way, if you

84

# know what you're doing, one can specify a custom priority level between

85

# other levels.

86

parser.add_option("-r", "--priority", dest="priority",

87

default=priorities.Priority.DEFAULT,

88

action="store", help="Priority of suite")

Aviv Keshet

7cd1231

2013-07-25 10:25:55 -0700

[diff] [blame]

89

parser.add_option("--suite_args", dest="suite_args",

90

default=None, action="store",

91

help="Argument string for suite control file.")

Alex Miller

f43d0eb

2012-10-01 13:43:13 -0700

[diff] [blame]

92

Chris Masone

2012-02-14 14:18:01 -0800

[diff] [blame]

93

options, args = parser.parse_args()

94

return parser, options, args

95

96

97

def get_pretty_status(status):

Aviv Keshet

2013-03-21 16:38:31 -0700

[diff] [blame]

98

"""

99

Converts a status string into a pretty-for-printing string.

100

101

@param status: Status to convert.

102

103

@return: Returns pretty string.

GOOD -> [ PASSED ]

TEST_NA -> [ INFO ]

other -> [ FAILED ]

"""

Chris Masone

2012-02-14 14:18:01 -0800

[diff] [blame]

108

if status == 'GOOD':

109

return '[ PASSED ]'

Chris Masone

8906ab1

2012-07-23 15:37:56 -0700

[diff] [blame]

110

elif status == 'TEST_NA':

111

return '[ INFO ]'

Chris Masone

2012-02-14 14:18:01 -0800

[diff] [blame]

112

return '[ FAILED ]'

113

Zdenek Behan

2012-04-06 17:20:01 +0200

[diff] [blame]

114

def is_fail_status(status):

Aviv Keshet

2013-03-21 16:38:31 -0700

[diff] [blame]

115

"""

116

Check if the given status corresponds to a failure.

117

118

@param status: The status to check. (string)

119

120

@return: True if status is FAIL or ERROR. False otherwise.

121

"""

Zdenek Behan

2012-04-06 17:20:01 +0200

[diff] [blame]

122

# All the statuses tests can have when they fail.

beeps

2013-04-10 20:56:09 -0700

[diff] [blame]

123

if status in ['FAIL', 'ERROR', 'ABORT']:

Zdenek Behan

2012-04-06 17:20:01 +0200

[diff] [blame]

return True

return False

Chris Masone

2012-02-14 14:18:01 -0800

[diff] [blame]

127

beeps

2013-04-10 20:56:09 -0700

[diff] [blame]

128

def get_view_info(suite_job_id, view, build, suite):

Zdenek Behan

2012-04-06 17:20:01 +0200

[diff] [blame]

129

"""

130

Parse a view for the slave job name and job_id.

131

132

@param suite_job_id: The job id of our master suite job.

133

@param view: Test result view.

beeps

2013-04-10 20:56:09 -0700

[diff] [blame]

134

@param build: build passed in via the -b option.

135

eg: lumpy-release/R28-3947.0.0

136

@param suite: suite passed in via the -s option.

137

eg: dummy

138

@return A tuple job_name, experimental, name of the slave test run

139

described by view. eg:

140

experimental_dummy_Pass fails: (1130-owner, True, dummy_Pass)

141

experimental_dummy_Pass aborts: (1130-owner, True,

142

experimental_dummy_Pass)

143

dummy_Fail: (1130-owner, False, dummy_Fail.Error)

Zdenek Behan

2012-04-06 17:20:01 +0200

[diff] [blame]

144

"""

145

# By default, we are the main suite job since there is no

146

# keyval entry for our job_name.

147

job_name = '%s-%s' % (suite_job_id, getpass.getuser())

148

experimental = False

beeps

2013-04-10 20:56:09 -0700

[diff] [blame]

149

test_name = ''

Dan Shi

d152180

2013-05-24 13:08:37 -0700

[diff] [blame]

150

# raw test name is the test_name from tko status view. tko_job_keyvals may

151

# have a record of the hash of this name mapping to job_id-owner, which can

152

# be used to reference the test to its job url. The change is made to

153

# support tests in different jobs within a suite that shares the same test

154

# class, e.g., AU suite.

155

raw_test_name = view['test_name']

Zdenek Behan

2012-04-06 17:20:01 +0200

[diff] [blame]

156

if 'job_keyvals' in view:

beeps

2013-04-10 20:56:09 -0700

[diff] [blame]

157

# For a test invocation like:

158

# NAME = "dummy_Fail"

159

# job.run_test('dummy_Fail', tag='Error', to_throw='TestError')

160

# we will:

161

# Record a keyval of the jobs test_name field: dummy_Fail

162

# On success, yield a tko status with the tagged name:

163

# dummy_Fail.Error

164

# On abort, yield a status (not from tko) with the job name:

165

# /build/suite/dummy_Fail.Error

166

# Note the last 2 options include the tag. The tag is seperated

167

# from the rest of the name with a '.'. The tag or test name can

168

# also include a /, and we must isolate the tag before we compare it

169

# to the hashed keyval. Based on this we have the following cases:

170

# 1. Regular test failure with or without a tag '.': std_job_name is

171

# set to the view test_name, after removing the tag.

172

# 2. Regular test Aborts: we know that dynamic_suite inserted a name

173

# like build/suite/test.name (eg:

174

# lumpy-release/R28-3947.0.0/dummy/dummy_Fail.Error), so we

175

# intersect the build/suite/ string we already have with the

176

# test_name in the view. The name of the aborted test is

177

# instrumental in generating the job_name, which is used in

178

# creating a link to the logs.

179

# 3. Experimental tests, Aborts and Failures: The test view

180

# corresponding to the afe_job_id of the suite job contains

181

# stubs for each test in this suite. The names of these jobs

182

# will contain an experimental prefix if they were aborted;

183

# If they failed the same names will not contain an experimental

184

# prefix but we would have hashed the name with a prefix. Eg:

185

# Test name = experimental_pass

186

# keyval contains: hash(experimental_pass)

187

# Fail/Pass view['test_name'] = pass

188

# Abort view['test_name'] = board/build/experimental_pass

189

# So we need to add the experimental prefix only if the test was

190

# aborted. Everything else is the same as [2].

191

# 4. Experimental server job failures: eg verify passes, something on

192

# the DUT crashes, the experimental server job fails to ssh in. We

193

# need to manually set the experimental flag in this case because the

194

# server job name isn't recorded in the keyvals. For a normal suite

195

# the views will contain: SERVER_JOB, try_new_image, test_name. i.e

196

# the test server jobs should be handled transparently and only the

197

# suite server job should appear in the view. If a server job fails

198

# (for an experimental test or otherwise) we insert the server job

199

# entry into the tko database instead. Put more generally we insert

200

# the last stage we knew about into the db record associated with

201

# that suites afe_job_id. This could lead to a view containing:

202

# SERVER_JOB, try_new_image,

203

# lumpy-release/R28-4008.0.0/bvt/experimental_pass_SERVER_JOB.

Alex Miller

9a1987a

2013-08-21 15:51:16 -0700

[diff] [blame]

204

# Neither of these operations will stomp on a pristine string.

205

test_name = view['test_name'].replace('%s/%s/'% (build, suite), '')

206

std_job_name = test_name.split('.')[0]

Chris Masone

2012-05-21 16:08:39 -0700

[diff] [blame]

207

beeps

2013-04-10 20:56:09 -0700

[diff] [blame]

208

if (job_status.view_is_for_infrastructure_fail(view) and

209

std_job_name.startswith(constants.EXPERIMENTAL_PREFIX)):

210

experimental = True

211

212

if std_job_name.startswith(constants.EXPERIMENTAL_PREFIX):

213

exp_job_name = std_job_name

214

else:

215

exp_job_name = constants.EXPERIMENTAL_PREFIX + std_job_name

Chris Masone

2012-08-29 10:37:08 -0700

[diff] [blame]

216

std_job_hash = hashlib.md5(std_job_name).hexdigest()

217

exp_job_hash = hashlib.md5(exp_job_name).hexdigest()

Dan Shi

d152180

2013-05-24 13:08:37 -0700

[diff] [blame]

218

raw_test_name_hash = hashlib.md5(raw_test_name).hexdigest()

Chris Masone

2012-05-21 16:08:39 -0700

[diff] [blame]

219

beeps

2013-04-10 20:56:09 -0700

[diff] [blame]

220

# In the experimental abort case both these clauses can evaluate

221

# to True.

Chris Masone

2012-05-21 16:08:39 -0700

[diff] [blame]

222

if std_job_hash in view['job_keyvals']:

223

job_name = view['job_keyvals'][std_job_hash]

beeps

2013-04-10 20:56:09 -0700

[diff] [blame]

224

if exp_job_hash in view['job_keyvals']:

Chris Masone

2012-05-21 16:08:39 -0700

[diff] [blame]

225

experimental = True

226

job_name = view['job_keyvals'][exp_job_hash]

Dan Shi

d152180

2013-05-24 13:08:37 -0700

[diff] [blame]

227

if raw_test_name_hash in view['job_keyvals']:

228

job_name = view['job_keyvals'][raw_test_name_hash]

Chris Masone

2012-05-21 16:08:39 -0700

[diff] [blame]

229

beeps

2013-04-10 20:56:09 -0700

[diff] [blame]

230

# If the name being returned is the test name it needs to include the tag

231

return job_name, experimental, std_job_name if not test_name else test_name

Zdenek Behan

2012-04-06 17:20:01 +0200

[diff] [blame]

232

233

Craig Harrison

2012-08-23 16:48:49 -0700

[diff] [blame]

234

class LogLink(object):

J. Richard Barnette

2013-08-23 11:24:21 -0700

[diff] [blame]

235

"""Information needed to record a link in the logs.

Craig Harrison

2012-08-23 16:48:49 -0700

[diff] [blame]

236

J. Richard Barnette

2013-08-23 11:24:21 -0700

[diff] [blame]

237

Depending on context and the information provided at

238

construction time, the link may point to either to log files for

239

a job, or to a bug filed for a failure in the job.

Craig Harrison

2012-08-23 16:48:49 -0700

[diff] [blame]

240

J. Richard Barnette

2013-08-23 11:24:21 -0700

[diff] [blame]

241

@var anchor The link text.

242

@var url The link url.

243

@var bug_id Id of a bug to link to, or None.

244

"""

245

246

_BUG_URL_PREFIX = CONFIG.get_config_value('BUG_REPORTING',

247

'tracker_url')

J. Richard Barnette

2013-08-23 11:24:21 -0700

[diff] [blame]

248

_URL_PATTERN = CONFIG.get_config_value('CROS',

249

'log_url_pattern', type=str)

250

251

Alex Miller

2013-10-30 15:18:57 -0700

[diff] [blame]

252

def __init__(self, anchor, server, job_string, bug_info=None):

J. Richard Barnette

2013-08-23 11:24:21 -0700

[diff] [blame]

253

"""Initialize the LogLink by generating the log URL.

254

255

@param anchor The link text.

Alex Miller

2013-10-30 15:18:57 -0700

[diff] [blame]

256

@param server The hostname of the server this suite ran on.

J. Richard Barnette

2013-08-23 11:24:21 -0700

[diff] [blame]

257

@param job_string The job whose logs we'd like to link to.

258

@param bug_info Info about the bug, if one was filed.

Craig Harrison

2012-08-23 16:48:49 -0700

[diff] [blame]

259

"""

260

self.anchor = anchor

Alex Miller

2013-10-30 15:18:57 -0700

[diff] [blame]

261

self.url = self._URL_PATTERN % (server, job_string)

J. Richard Barnette

2013-08-23 11:24:21 -0700

[diff] [blame]

262

if bug_info:

263

self.bug_id, self.bug_count = bug_info

264

else:

265

self.bug_id = None

266

self.bug_count = None

Craig Harrison

2012-08-23 16:48:49 -0700

[diff] [blame]

267

268

269

def GenerateBuildbotLink(self):

J. Richard Barnette

2013-08-23 11:24:21 -0700

[diff] [blame]

270

"""Generate a link formatted to meet buildbot expectations.

271

272

If there is a bug associated with this link, report that;

273

otherwise report a link to the job logs.

Craig Harrison

2012-08-23 16:48:49 -0700

[diff] [blame]

274

275

@return A link formatted for the buildbot log annotator.

276

"""

J. Richard Barnette

2013-08-23 11:24:21 -0700

[diff] [blame]

277

if self.bug_id:

278

url = '%s%s' % (self._BUG_URL_PREFIX, self.bug_id)

beeps

ad4daf8

2013-09-26 10:07:33 -0700

[diff] [blame]

279

if self.bug_count is None:

280

anchor_text = "%s (Unknown number of reports)" % (

281

self.anchor.strip())

282

elif self.bug_count == 1:

J. Richard Barnette

2013-08-23 11:24:21 -0700

[diff] [blame]

283

anchor_text = "%s (new)" % self.anchor.strip()

284

else:

285

anchor_text = "%s (%s reports)" % (

286

self.anchor.strip(), self.bug_count)

287

else:

288

url = self.url

289

anchor_text = self.anchor.strip()

290

return "@@@STEP_LINK@%s@%s@@@"% (anchor_text, url)

Craig Harrison

2012-08-23 16:48:49 -0700

[diff] [blame]

291

292

Craig Harrison

2012-08-31 10:29:33 -0700

[diff] [blame]

293

def GenerateTextLink(self):

J. Richard Barnette

2013-08-23 11:24:21 -0700

[diff] [blame]

294

"""Generate a link to the job's logs, for consumption by a human.

Craig Harrison

2012-08-23 16:48:49 -0700

[diff] [blame]

295

Craig Harrison

2012-08-31 10:29:33 -0700

[diff] [blame]

296

@return A link formatted for human readability.

Craig Harrison

2012-08-23 16:48:49 -0700

[diff] [blame]

297

"""

Craig Harrison

2012-08-31 10:29:33 -0700

[diff] [blame]

298

return "%s%s" % (self.anchor, self.url)

Craig Harrison

2012-08-23 16:48:49 -0700

[diff] [blame]

299

300

Chris Masone

2012-04-30 14:35:28 -0700

[diff] [blame]

301

class Timings(object):

302

"""Timings for important events during a suite.

303

304

All timestamps are datetime.datetime objects.

305

306

@var suite_start_time: the time the suite started.

Chris Masone

2012-04-30 14:35:28 -0700

[diff] [blame]

307

@var tests_start_time: the time the first test started running.

308

"""

beeps

2013-03-22 13:15:49 -0700

[diff] [blame]

309

310

# Recorded in create_suite_job as we're staging the components of a

311

# build on the devserver. Only the artifacts necessary to start

312

# installing images onto DUT's will be staged when we record

313

# payload_end_time, the remaining artifacts are downloaded after we kick

314

# off the reimaging job, at which point we record artifact_end_time.

Chris Masone

2012-05-01 16:52:31 -0700

[diff] [blame]

315

download_start_time = None

316

payload_end_time = None

317

artifact_end_time = None

beeps

2013-03-22 13:15:49 -0700

[diff] [blame]

318

319

# The test_start_time, but taken off the view that corresponds to the

320

# suite instead of an individual test.

Chris Masone

2012-04-30 14:35:28 -0700

[diff] [blame]

321

suite_start_time = None

beeps

2013-03-22 13:15:49 -0700

[diff] [blame]

322

beeps

2013-03-22 13:15:49 -0700

[diff] [blame]

323

# Earliest and Latest tests in the set of TestViews passed to us.

Chris Masone

2012-04-30 14:35:28 -0700

[diff] [blame]

324

tests_start_time = None

325

tests_end_time = None

326

327

beeps

2013-03-22 13:15:49 -0700

[diff] [blame]

328

def _GetDatetime(self, timing_string, timing_string_format):

329

"""

330

Formats the timing_string according to the timing_string_format.

331

332

@param timing_string: A datetime timing string.

333

@param timing_string_format: Format of the time in timing_string.

334

@return: A datetime object for the given timing string.

335

"""

336

try:

337

return datetime.strptime(timing_string, timing_string_format)

except TypeError:

return None

Chris Masone

2012-08-29 10:37:08 -0700

[diff] [blame]

342

def RecordTiming(self, view):

343

"""Given a test report view, extract and record pertinent time info.

Chris Masone

2012-04-30 14:35:28 -0700

[diff] [blame]

344

345

get_detailed_test_views() returns a list of entries that provide

346

info about the various parts of a suite run. This method can take

347

any one of these entries and look up timestamp info we might want

348

and record it.

349

Chris Masone

2012-09-06 16:00:07 -0700

[diff] [blame]

350

If timestamps are unavailable, datetime.datetime.min/max will be used.

351

Chris Masone

2012-08-29 10:37:08 -0700

[diff] [blame]

352

@param view: a view dict, as returned by get_detailed_test_views().

Chris Masone

2012-04-30 14:35:28 -0700

[diff] [blame]

353

"""

Chris Masone

2012-09-06 16:00:07 -0700

[diff] [blame]

354

start_candidate = datetime.min

355

end_candidate = datetime.max

356

if view['test_started_time']:

357

start_candidate = datetime.strptime(view['test_started_time'],

358

job_status.TIME_FMT)

359

if view['test_finished_time']:

360

end_candidate = datetime.strptime(view['test_finished_time'],

361

job_status.TIME_FMT)

362

Chris Masone

2012-08-29 10:37:08 -0700

[diff] [blame]

363

if job_status.view_is_for_suite_prep(view):

Chris Masone

2012-04-30 14:35:28 -0700

[diff] [blame]

364

self.suite_start_time = start_candidate

Chris Masone

2012-04-30 14:35:28 -0700

[diff] [blame]

365

else:

366

self._UpdateFirstTestStartTime(start_candidate)

367

self._UpdateLastTestEndTime(end_candidate)

Chris Masone

2012-08-29 10:37:08 -0700

[diff] [blame]

368

if 'job_keyvals' in view:

369

keyvals = view['job_keyvals']

beeps

2013-03-22 13:15:49 -0700

[diff] [blame]

370

self.download_start_time = self._GetDatetime(

371

keyvals.get(constants.DOWNLOAD_STARTED_TIME),

372

job_status.TIME_FMT)

373

374

self.payload_end_time = self._GetDatetime(

375

keyvals.get(constants.PAYLOAD_FINISHED_TIME),

376

job_status.TIME_FMT)

377

378

self.artifact_end_time = self._GetDatetime(

379

keyvals.get(constants.ARTIFACT_FINISHED_TIME),

380

job_status.TIME_FMT)

Chris Masone

44e4d6c

2012-08-15 14:25:53 -0700

[diff] [blame]

381

Chris Masone

2012-04-30 14:35:28 -0700

[diff] [blame]

382

383

def _UpdateFirstTestStartTime(self, candidate):

384

"""Update self.tests_start_time, iff candidate is an earlier time.

385

386

@param candidate: a datetime.datetime object.

387

"""

388

if not self.tests_start_time or candidate < self.tests_start_time:

389

self.tests_start_time = candidate

390

391

392

def _UpdateLastTestEndTime(self, candidate):

393

"""Update self.tests_end_time, iff candidate is a later time.

394

395

@param candidate: a datetime.datetime object.

396

"""

397

if not self.tests_end_time or candidate > self.tests_end_time:

398

self.tests_end_time = candidate

def __str__(self):

return ('\n'

'Suite timings:\n'

Chris Masone

2012-05-01 16:52:31 -0700

[diff] [blame]

404

'Downloads started at %s\n'

405

'Payload downloads ended at %s\n'

Chris Masone

2012-04-30 14:35:28 -0700

[diff] [blame]

406

'Suite started at %s\n'

Chris Masone

2012-05-01 16:52:31 -0700

[diff] [blame]

407

'Artifact downloads ended (at latest) at %s\n'

Chris Masone

2012-04-30 14:35:28 -0700

[diff] [blame]

408

'Testing started at %s\n'

Chris Masone

2012-05-01 16:52:31 -0700

[diff] [blame]

409

'Testing ended at %s\n' % (self.download_start_time,

410

self.payload_end_time,

411

self.suite_start_time,

Chris Masone

2012-05-01 16:52:31 -0700

[diff] [blame]

412

self.artifact_end_time,

Chris Masone

2012-04-30 14:35:28 -0700

[diff] [blame]

413

self.tests_start_time,

414

self.tests_end_time))

415

416

beeps

2013-03-22 13:15:49 -0700

[diff] [blame]

417

def _GetDataKeyForStatsd(self, suite, build, board):

418

"""

419

Constructs the key used for logging statsd timing data.

420

421

@param suite: scheduled suite that we want to record the results of.

422

@param build: The build string. This string should have a consistent

423

format eg: x86-mario-release/R26-3570.0.0. If the format of this

424

string changes such that we can't determine build_type or branch

425

we give up and use the parametes we're sure of instead (suite,

426

board). eg:

427

1. build = x86-alex-pgo-release/R26-3570.0.0

428

branch = 26

429

build_type = pgo-release

430

2. build = lumpy-paladin/R28-3993.0.0-rc5

431

branch = 28

432

build_type = paladin

433

@param board: The board that this suite ran on.

434

@return: The key used to log timing information in statsd.

435

"""

436

try:

Fang Deng

3197b39

2013-06-26 11:42:02 -0700

[diff] [blame]

437

_board, build_type, branch = site_utils.ParseBuildName(build)[:3]

438

except site_utils.ParseBuildNameException as e:

beeps

2013-03-22 13:15:49 -0700

[diff] [blame]

439

logging.error(str(e))

440

branch = 'Unknown'

441

build_type = 'Unknown'

442

else:

443

embedded_str = re.search(r'x86-\w+-(.*)', _board)

444

if embedded_str:

445

build_type = embedded_str.group(1) + '-' + build_type

data_key_dict = {

'board': board,

'branch': branch,

'build_type': build_type,

451

'suite': suite,

452

}

453

return ('run_suite.%(board)s.%(build_type)s.%(branch)s.%(suite)s'

% data_key_dict)

def SendResultsToStatsd(self, suite, build, board):

458

"""

459

Sends data to statsd.

460

461

1. Makes a data_key of the form: run_suite.$board.$branch.$suite

462

eg: stats/gauges/<hostname>/run_suite/<board>/<branch>/<suite>/

463

2. Computes timings for several start and end event pairs.

Alex Miller

9a1987a

2013-08-21 15:51:16 -0700

[diff] [blame]

464

3. Sends all timing values to statsd.

beeps

2013-03-22 13:15:49 -0700

[diff] [blame]

465

466

@param suite: scheduled suite that we want to record the results of.

467

@param build: the build that this suite ran on.

468

eg: 'lumpy-release/R26-3570.0.0'

469

@param board: the board that this suite ran on.

470

"""

471

if sys.version_info < (2, 7):

472

logging.error('Sending run_suite perf data to statsd requires'

473

'python 2.7 or greater.')

474

return

475

476

data_key = self._GetDataKeyForStatsd(suite, build, board)

477

478

# Since we don't want to try subtracting corrupted datetime values

479

# we catch TypeErrors in _GetDatetime and insert None instead. This

480

# means that even if, say, keyvals.get(constants.ARTIFACT_FINISHED_TIME)

481

# returns a corrupt value the member artifact_end_time is set to None.

482

if self.download_start_time:

483

if self.payload_end_time:

484

stats.Timer(data_key).send('payload_download_time',

485

(self.payload_end_time -

486

self.download_start_time).total_seconds())

487

488

if self.artifact_end_time:

489

stats.Timer(data_key).send('artifact_download_time',

490

(self.artifact_end_time -

491

self.download_start_time).total_seconds())

492

493

if self.tests_end_time:

494

if self.suite_start_time:

495

stats.Timer(data_key).send('suite_run_time',

496

(self.tests_end_time -

497

self.suite_start_time).total_seconds())

498

499

if self.tests_start_time:

500

stats.Timer(data_key).send('tests_run_time',

501

(self.tests_end_time -

502

self.tests_start_time).total_seconds())

503

beeps

2013-03-22 13:15:49 -0700

[diff] [blame]

504

beeps

2013-04-10 20:56:09 -0700

[diff] [blame]

505

def _full_test_name(job_id, view, build, suite):

506

"""

507

Generates the full test name for printing to logs and generating a link to

508

the results.

Craig Harrison

2012-08-31 10:29:33 -0700

[diff] [blame]

509

510

@param job_id: the job id.

511

@param view: the view for which we are generating the name.

beeps

2013-04-10 20:56:09 -0700

[diff] [blame]

512

@param build: the build for this invocation of run_suite.

513

@param suite: the suite for this invocation of run_suite.

Craig Harrison

2012-08-31 10:29:33 -0700

[diff] [blame]

514

@return The test name, possibly with a descriptive prefix appended.

515

"""

beeps

2013-04-10 20:56:09 -0700

[diff] [blame]

516

experimental, test_name = get_view_info(job_id, view, build, suite)[1:]

517

518

# If an experimental test is aborted get_view_info returns a name which

519

# includes the prefix.

520

prefix = constants.EXPERIMENTAL_PREFIX if (experimental and

521

not test_name.startswith(constants.EXPERIMENTAL_PREFIX)) else ''

522

return prefix + test_name

Craig Harrison

2012-08-31 10:29:33 -0700

[diff] [blame]

523

524

Alex Miller

2013-10-30 15:18:57 -0700

[diff] [blame]

525

_DEFAULT_AUTOTEST_INSTANCE = CONFIG.get_config_value(

526

'SERVER', 'hostname', type=str)

527

528

529

def instance_for_pool(pool_name):

530

"""

531

Return the hostname of the server that should be used to service a suite

532

for the specified pool.

533

534

@param pool_name: The pool (without 'pool:' to schedule the suite against.

535

@return: The correct host that should be used to service this suite run.

536

"""

537

return CONFIG.get_config_value(

538

'POOL_INSTANCE_SHARDING', pool_name,

539

default=_DEFAULT_AUTOTEST_INSTANCE)

540

541

Chris Masone

2012-02-14 14:18:01 -0800

[diff] [blame]

542

def main():

Aviv Keshet

2013-03-21 16:38:31 -0700

[diff] [blame]

543

"""

544

Entry point for run_suite script.

545

"""

Chris Masone

2012-02-14 14:18:01 -0800

[diff] [blame]

546

parser, options, args = parse_options()

Chris Masone

3a85064

2012-07-11 11:11:18 -0700

[diff] [blame]

547

log_name = 'run_suite-default.log'

Chris Masone

2012-04-11 11:36:48 -0700

[diff] [blame]

548

if not options.mock_job_id:

Zdenek Behan

77290c3

2012-06-26 17:39:47 +0200

[diff] [blame]

549

if args:

550

print 'Unknown arguments: ' + str(args)

551

parser.print_help()

552

return

553

if not options.build:

554

print 'Need to specify which build to use'

555

parser.print_help()

556

return

557

if not options.board:

558

print 'Need to specify board'

parser.print_help()

return

if not options.name:

print 'Need to specify suite name'

Chris Masone

2012-04-11 11:36:48 -0700

[diff] [blame]

563

parser.print_help()

564

return

Chris Masone

3a85064

2012-07-11 11:11:18 -0700

[diff] [blame]

565

# convert build name from containing / to containing only _

566

log_name = 'run_suite-%s.log' % options.build.replace('/', '_')

567

log_dir = os.path.join(common.autotest_dir, 'logs')

568

if os.path.exists(log_dir):

569

log_name = os.path.join(log_dir, log_name)

Alex Miller

8e75d0d

2012-07-31 15:13:32 -0700

[diff] [blame]

570

if options.num is not None and options.num < 1:

571

print 'Number of machines must be more than 0, if specified.'

572

parser.print_help()

573

return

Alex Miller

2012-10-03 12:56:02 -0700

[diff] [blame]

574

if options.no_wait != 'True' and options.no_wait != 'False':

575

print 'Please specify "True" or "False" for --no_wait.'

576

parser.print_help()

577

return

578

if options.file_bugs != 'True' and options.file_bugs != 'False':

579

print 'Please specify "True" or "False" for --file_bugs.'

580

parser.print_help()

581

return

Alex Miller

88762a8

2013-09-04 15:41:28 -0700

[diff] [blame]

582

583

try:

584

priority = int(options.priority)

585

except ValueError:

586

try:

587

priority = priorities.Priority.get_value(options.priority)

588

except AttributeError:

589

print 'Unknown priority level %s. Try one of %s.' % (

590

options.priority, ', '.join(priorities.Priority.names))

591

Scott Zawalski

2012-07-02 18:45:07 -0400

[diff] [blame]

592

setup_logging(logfile=log_name)

Chris Masone

dfa0beba

2012-03-19 11:41:47 -0700

[diff] [blame]

593

Simran Basi

2012-12-07 16:37:09 -0800

[diff] [blame]

594

try:

Dan Shi

a02181f

2013-01-29 14:03:32 -0800

[diff] [blame]

595

if not options.bypass_labstatus:

596

utils.check_lab_status(options.board)

Simran Basi

41bfae4

2013-01-09 10:50:47 -0800

[diff] [blame]

597

except (error.LabIsDownException, error.BoardIsDisabledException) as e:

598

logging.debug('Error Message: %s', e)

Simran Basi

2012-12-07 16:37:09 -0800

[diff] [blame]

599

print str(e)

Aviv Keshet

2013-03-21 16:38:31 -0700

[diff] [blame]

600

return RETURN_CODES.WARNING

Simran Basi

2012-12-07 16:37:09 -0800

[diff] [blame]

601

Alex Miller

2013-10-30 15:18:57 -0700

[diff] [blame]

602

instance_server = instance_for_pool(options.pool)

603

afe = frontend_wrappers.RetryingAFE(server=instance_server,

604

timeout_min=options.timeout_min,

Chris Masone

8ac6671

2012-02-15 14:21:02 -0800

[diff] [blame]

605

delay_sec=options.delay_sec)

Alex Miller

2013-10-30 15:18:57 -0700

[diff] [blame]

606

logging.info('Autotest instance: %s', instance_server)

Chris Masone

359c0fd

2012-03-13 15:18:59 -0700

[diff] [blame]

607

Alex Miller

2012-10-03 12:56:02 -0700

[diff] [blame]

608

wait = (options.no_wait == 'False')

609

file_bugs = (options.file_bugs == 'True')

Chris Masone

2012-04-11 11:36:48 -0700

[diff] [blame]

610

if options.mock_job_id:

611

job_id = int(options.mock_job_id)

612

else:

Alex Miller

8e75d0d

2012-07-31 15:13:32 -0700

[diff] [blame]

613

job_id = afe.run('create_suite_job', suite_name=options.name,

614

board=options.board, build=options.build,

Alex Miller

f43d0eb

2012-10-01 13:43:13 -0700

[diff] [blame]

615

check_hosts=wait, pool=options.pool, num=options.num,

Aviv Keshet

7cd1231

2013-07-25 10:25:55 -0700

[diff] [blame]

616

file_bugs=file_bugs, priority=priority,

617

suite_args=options.suite_args)

Alex Miller

2013-10-30 15:18:57 -0700

[diff] [blame]

618

TKO = frontend_wrappers.RetryingTKO(server=instance_server,

619

timeout_min=options.timeout_min,

Chris Masone

8ac6671

2012-02-15 14:21:02 -0800

[diff] [blame]

620

delay_sec=options.delay_sec)

Scott Zawalski

2012-07-02 18:45:07 -0400

[diff] [blame]

621

logging.info('Started suite job: %s', job_id)

Simran Basi

2012-12-07 16:37:09 -0800

[diff] [blame]

622

Aviv Keshet

2013-03-21 16:38:31 -0700

[diff] [blame]

623

code = RETURN_CODES.OK

J. Richard Barnette

712eb40

2013-08-13 18:03:00 -0700

[diff] [blame]

624

if wait:

625

while not afe.get_jobs(id=job_id, finished=True):

Chris Masone

2012-02-14 14:18:01 -0800

[diff] [blame]

626

time.sleep(1)

J. Richard Barnette

712eb40

2013-08-13 18:03:00 -0700

[diff] [blame]

627

Scott Zawalski

2012-03-06 09:21:44 -0500

[diff] [blame]

628

views = TKO.run('get_detailed_test_views', afe_job_id=job_id)

Dan Shi

f327b91

2013-07-18 15:35:05 -0700

[diff] [blame]

629

# The intended behavior is to refrain from recording stats if the suite

630

# was aborted (either by a user or through the golo rpc). Since all the

631

# views associated with the afe_job_id of the suite contain the keyvals

632

# of the suite and not the individual tests themselves, we can achieve

633

# this without digging through the views.

634

is_aborted = any([view['job_keyvals'].get('aborted_by')

635

for view in views])

beeps

2013-04-10 20:56:09 -0700

[diff] [blame]

636

width = max((len(_full_test_name(job_id, view, options.build,

637

options.name)) for view in views)) + 3

Scott Zawalski

2012-03-06 09:21:44 -0500

[diff] [blame]

638

Chris Masone

2012-08-29 10:37:08 -0700

[diff] [blame]

639

relevant_views = filter(job_status.view_is_relevant, views)

Scott Zawalski

2012-03-06 09:21:44 -0500

[diff] [blame]

640

if not relevant_views:

Zdenek Behan

2012-04-06 17:20:01 +0200

[diff] [blame]

641

# The main suite job most likely failed in SERVER_JOB.

642

relevant_views = views

Scott Zawalski

2012-03-06 09:21:44 -0500

[diff] [blame]

643

Chris Masone

2012-04-30 14:35:28 -0700

[diff] [blame]

644

timings = Timings()

Craig Harrison

2012-08-23 16:48:49 -0700

[diff] [blame]

645

web_links = []

646

buildbot_links = []

Chris Masone

2012-08-29 10:37:08 -0700

[diff] [blame]

647

for view in relevant_views:

648

timings.RecordTiming(view)

649

if job_status.view_is_for_suite_prep(view):

650

view['test_name'] = 'Suite prep'

Chris Masone

3a85064

2012-07-11 11:11:18 -0700

[diff] [blame]

651

beeps

2013-04-10 20:56:09 -0700

[diff] [blame]

652

job_name, experimental = get_view_info(job_id, view, options.build,

653

options.name)[:2]

654

test_view = _full_test_name(job_id, view, options.build,

655

options.name).ljust(width)

Chris Masone

2012-08-29 10:37:08 -0700

[diff] [blame]

656

logging.info("%s%s", test_view, get_pretty_status(view['status']))

beeps

8ead53c

2013-04-26 19:12:46 -0700

[diff] [blame]

657

J. Richard Barnette

e7b98bb

2013-08-21 16:34:16 -0700

[diff] [blame]

658

# It's important that we use the test name in the view

659

# and not the name returned by full_test_name, as this

660

# was the name inserted after the test ran, e.g. for an

661

# aborted test full_test_name will return

662

# 'experimental_testname' but the view and the bug_id

663

# keyval will use '/build/suite/experimental_testname'.

J. Richard Barnette

2013-08-23 11:24:21 -0700

[diff] [blame]

664

bug_info = tools.get_test_failure_bug_info(

J. Richard Barnette

e7b98bb

2013-08-21 16:34:16 -0700

[diff] [blame]

665

view['job_keyvals'], view['test_name'])

beeps

8ead53c

2013-04-26 19:12:46 -0700

[diff] [blame]

666

Alex Miller

2013-10-30 15:18:57 -0700

[diff] [blame]

667

link = LogLink(test_view, instance_server, job_name, bug_info)

Craig Harrison

2012-08-23 16:48:49 -0700

[diff] [blame]

668

web_links.append(link)

Chris Masone

2012-08-29 10:37:08 -0700

[diff] [blame]

669

J. Richard Barnette

02f815e

2013-08-22 16:28:32 -0700

[diff] [blame]

670

# Don't show links on the buildbot waterfall for tests with

671

# GOOD status.

Chris Masone

2012-08-29 10:37:08 -0700

[diff] [blame]

672

if view['status'] != 'GOOD':

673

logging.info("%s %s: %s", test_view, view['status'],

674

view['reason'])

Chris Masone

8906ab1

2012-07-23 15:37:56 -0700

[diff] [blame]

675

if view['status'] == 'TEST_NA':

676

# Didn't run; nothing to do here!

677

continue

J. Richard Barnette

02f815e

2013-08-22 16:28:32 -0700

[diff] [blame]

678

buildbot_links.append(link)

Aviv Keshet

2013-03-21 16:38:31 -0700

[diff] [blame]

679

if code == RETURN_CODES.ERROR:

Zdenek Behan

2012-04-06 17:20:01 +0200

[diff] [blame]

680

# Failed already, no need to worry further.

681

continue

beeps

2013-04-10 20:56:09 -0700

[diff] [blame]

682

683

# Any non experimental test that has a status other than WARN

684

# or GOOD will result in the tree closing. Experimental tests

685

# will not close the tree, even if they have been aborted.

beeps

b8117ff

2013-10-01 13:50:07 -0700

[diff] [blame]

686

if not experimental:

687

if view['status'] == 'WARN':

688

code = RETURN_CODES.WARNING

689

elif is_fail_status(view['status']):

690

code = RETURN_CODES.ERROR

Aviv Keshet

2013-03-21 16:38:31 -0700

[diff] [blame]

691

Dan Shi

f327b91

2013-07-18 15:35:05 -0700

[diff] [blame]

692

# Do not record stats for aborted suites.

693

if not is_aborted:

694

timings.SendResultsToStatsd(options.name, options.build,

695

options.board)

Scott Zawalski

2012-07-02 18:45:07 -0400

[diff] [blame]

696

logging.info(timings)

Craig Harrison

2012-08-23 16:48:49 -0700

[diff] [blame]

697

logging.info('\n'

698

'Links to test logs:')

699

for link in web_links:

Craig Harrison

2012-08-31 10:29:33 -0700

[diff] [blame]

700

logging.info(link.GenerateTextLink())

Aviv Keshet

2013-03-21 16:38:31 -0700

[diff] [blame]

701

702

try:

703

returnmessage = RETURN_CODES.get_string(code)

704

except ValueError:

705

returnmessage = 'UNKNOWN'

706

logging.info('\n'

707

'Will return from run_suite with status: %s',

708

returnmessage)

709

Craig Harrison

2012-08-23 16:48:49 -0700

[diff] [blame]

710

logging.info('\n'

711

'Output below this line is for buildbot consumption:')

712

for link in buildbot_links:

713

logging.info(link.GenerateBuildbotLink())

Chris Masone

d5939fe

2012-03-13 10:11:06 -0700

[diff] [blame]

714

else:

Scott Zawalski

2012-07-02 18:45:07 -0400

[diff] [blame]

715

logging.info('Created suite job: %r', job_id)

Alex Miller

a05498f

2013-11-01 16:16:21 -0700

[diff] [blame]

716

link = LogLink(options.name, instance_server,

717

'%s-%s' % (job_id, getpass.getuser()))

Craig Harrison

2012-08-23 16:48:49 -0700

[diff] [blame]

718

logging.info(link.GenerateBuildbotLink())

Scott Zawalski

2012-07-02 18:45:07 -0400

[diff] [blame]

719

logging.info('--no_wait specified; Exiting.')

Chris Masone