Blame - site_utils/run_suite.py - platform/external/autotest

2012-02-14 14:18:01 -0800

[diff] [blame]

#!/usr/bin/python

#

# Use of this source code is governed by a BSD-style license that can be

5

# found in the LICENSE file.

6

7

"""Tool for running suites of tests and waiting for completion.

8

9

The desired test suite will be scheduled with autotest, and then

10

this tool will block until the job is complete, printing a summary

11

at the end. Error conditions result in exceptions.

12

13

This is intended for use only with Chrome OS test suits that leverage the

14

dynamic suite infrastructure in server/cros/dynamic_suite.py.

15

"""

16

beeps

2013-03-22 13:15:49 -0700

[diff] [blame]

17

import getpass, hashlib, logging, optparse, os, re, sys, time

Chris Masone

2012-09-06 16:00:07 -0700

[diff] [blame]

18

from datetime import datetime

19

Chris Masone

2012-02-14 14:18:01 -0800

[diff] [blame]

20

import common

Chris Masone

2012-09-06 16:00:07 -0700

[diff] [blame]

21

J. Richard Barnette

3cbd76b

2013-11-27 12:11:25 -0800

[diff] [blame]

22

from autotest_lib.client.common_lib import global_config, enum

23

from autotest_lib.client.common_lib import priorities

24

from autotest_lib.server import utils

Chris Masone

44e4d6c

2012-08-15 14:25:53 -0700

[diff] [blame]

25

from autotest_lib.server.cros.dynamic_suite import constants

Chris Masone

b493555

2012-08-14 12:05:54 -0700

[diff] [blame]

26

from autotest_lib.server.cros.dynamic_suite import frontend_wrappers

27

from autotest_lib.server.cros.dynamic_suite import job_status

J. Richard Barnette

e7b98bb

2013-08-21 16:34:16 -0700

[diff] [blame]

28

from autotest_lib.server.cros.dynamic_suite import tools

beeps

2013-03-22 13:15:49 -0700

[diff] [blame]

29

from autotest_lib.site_utils.graphite import stats

Chris Masone

2012-02-14 14:18:01 -0800

[diff] [blame]

30

Chris Masone

1120cdf

2012-02-27 17:35:07 -0800

[diff] [blame]

31

CONFIG = global_config.global_config

32

Simran Basi

2012-12-07 16:37:09 -0800

[diff] [blame]

33

# Return code that will be sent back to autotest_rpc_server.py

Aviv Keshet

2013-03-21 16:38:31 -0700

[diff] [blame]

34

RETURN_CODES = enum.Enum('OK', 'ERROR', 'WARNING')

Simran Basi

2012-12-07 16:37:09 -0800

[diff] [blame]

35

Chris Masone

dfa0beba

2012-03-19 11:41:47 -0700

[diff] [blame]

36

Scott Zawalski

2012-07-02 18:45:07 -0400

[diff] [blame]

37

def setup_logging(logfile=None):

38

"""Setup basic logging with all logging info stripped.

39

40

Calls to logging will only show the message. No severity is logged.

41

42

@param logfile: If specified dump output to a file as well.

43

"""

Dan Shi

8a2dddd

2013-08-09 12:58:44 -0700

[diff] [blame]

44

# Remove all existing handlers. client/common_lib/logging_config adds

45

# a StreamHandler to logger when modules are imported, e.g.,

46

# autotest_lib.client.bin.utils. A new StreamHandler will be added here to

47

# log only messages, not severity.

48

logging.getLogger().handlers = []

49

Scott Zawalski

2012-07-02 18:45:07 -0400

[diff] [blame]

50

screen_handler = logging.StreamHandler()

51

screen_handler.setFormatter(logging.Formatter('%(message)s'))

52

logging.getLogger().addHandler(screen_handler)

53

logging.getLogger().setLevel(logging.INFO)

54

if logfile:

55

file_handler = logging.FileHandler(logfile)

56

file_handler.setLevel(logging.DEBUG)

57

logging.getLogger().addHandler(file_handler)

Chris Masone

dfa0beba

2012-03-19 11:41:47 -0700

[diff] [blame]

58

59

Chris Masone

2012-02-14 14:18:01 -0800

[diff] [blame]

60

def parse_options():

Aviv Keshet

2013-03-21 16:38:31 -0700

[diff] [blame]

61

#pylint: disable-msg=C0111

Zdenek Behan

77290c3

2012-06-26 17:39:47 +0200

[diff] [blame]

62

usage = "usage: %prog [options]"

Chris Masone

2012-02-14 14:18:01 -0800

[diff] [blame]

63

parser = optparse.OptionParser(usage=usage)

64

parser.add_option("-b", "--board", dest="board")

65

parser.add_option("-i", "--build", dest="build")

Chris Masone

359c0fd

2012-03-13 15:18:59 -0700

[diff] [blame]

66

# This should just be a boolean flag, but the autotest "proxy" code

67

# can't handle flags that don't take arguments.

Alex Miller

ab33ddb

2012-10-03 12:56:02 -0700

[diff] [blame]

68

parser.add_option("-n", "--no_wait", dest="no_wait", default="False",

69

help='Must pass "True" or "False" if used.')

Alex Miller

0032e93

2013-10-23 12:52:58 -0700

[diff] [blame]

70

# If you really want no pool, --pool="" will do it. USE WITH CARE.

71

parser.add_option("-p", "--pool", dest="pool", default="suites")

Chris Masone

2012-02-14 14:18:01 -0800

[diff] [blame]

72

parser.add_option("-s", "--suite_name", dest="name")

Simran Basi

2013-11-26 13:02:11 -0800

[diff] [blame]

73

parser.add_option("-a", "--afe_timeout_mins", dest="afe_timeout_mins",

74

default=30)

75

parser.add_option("-t", "--timeout_mins", dest="timeout_mins",

76

default=1440)

Chris Masone

8ac6671

2012-02-15 14:21:02 -0800

[diff] [blame]

77

parser.add_option("-d", "--delay_sec", dest="delay_sec", default=10)

Chris Masone

2012-04-11 11:36:48 -0700

[diff] [blame]

78

parser.add_option("-m", "--mock_job_id", dest="mock_job_id",

79

help="Skips running suite; creates report for given ID.")

Alex Miller

05a2fff

2012-09-10 10:14:34 -0700

[diff] [blame]

80

parser.add_option("-u", "--num", dest="num", type="int", default=None,

Chris Masone

8906ab1

2012-07-23 15:37:56 -0700

[diff] [blame]

81

help="Run on at most NUM machines.")

Alex Miller

f43d0eb

2012-10-01 13:43:13 -0700

[diff] [blame]

82

# Same boolean flag issue applies here.

Alex Miller

ab33ddb

2012-10-03 12:56:02 -0700

[diff] [blame]

83

parser.add_option("-f", "--file_bugs", dest="file_bugs", default='False',

84

help='File bugs on test failures. Must pass "True" or '

85

'"False" if used.')

Dan Shi

a02181f

2013-01-29 14:03:32 -0800

[diff] [blame]

86

parser.add_option("-l", "--bypass_labstatus", dest="bypass_labstatus",

87

action="store_true", help='Bypass lab status check.')

Alex Miller

88762a8

2013-09-04 15:41:28 -0700

[diff] [blame]

88

# We allow either a number or a string for the priority. This way, if you

89

# know what you're doing, one can specify a custom priority level between

90

# other levels.

91

parser.add_option("-r", "--priority", dest="priority",

92

default=priorities.Priority.DEFAULT,

93

action="store", help="Priority of suite")

Aviv Keshet

7cd1231

2013-07-25 10:25:55 -0700

[diff] [blame]

94

parser.add_option("--suite_args", dest="suite_args",

95

default=None, action="store",

96

help="Argument string for suite control file.")

Alex Miller

f43d0eb

2012-10-01 13:43:13 -0700

[diff] [blame]

97

Chris Masone

2012-02-14 14:18:01 -0800

[diff] [blame]

98

options, args = parser.parse_args()

99

return parser, options, args

100

101

102

def get_pretty_status(status):

Aviv Keshet

2013-03-21 16:38:31 -0700

[diff] [blame]

103

"""

104

Converts a status string into a pretty-for-printing string.

105

106

@param status: Status to convert.

107

108

@return: Returns pretty string.

GOOD -> [ PASSED ]

TEST_NA -> [ INFO ]

other -> [ FAILED ]

"""

Chris Masone

2012-02-14 14:18:01 -0800

[diff] [blame]

113

if status == 'GOOD':

114

return '[ PASSED ]'

Chris Masone

8906ab1

2012-07-23 15:37:56 -0700

[diff] [blame]

115

elif status == 'TEST_NA':

116

return '[ INFO ]'

Chris Masone

2012-02-14 14:18:01 -0800

[diff] [blame]

117

return '[ FAILED ]'

118

Zdenek Behan

2012-04-06 17:20:01 +0200

[diff] [blame]

119

def is_fail_status(status):

Aviv Keshet

2013-03-21 16:38:31 -0700

[diff] [blame]

120

"""

121

Check if the given status corresponds to a failure.

122

123

@param status: The status to check. (string)

124

125

@return: True if status is FAIL or ERROR. False otherwise.

126

"""

Zdenek Behan

2012-04-06 17:20:01 +0200

[diff] [blame]

127

# All the statuses tests can have when they fail.

beeps

2013-04-10 20:56:09 -0700

[diff] [blame]

128

if status in ['FAIL', 'ERROR', 'ABORT']:

Zdenek Behan

2012-04-06 17:20:01 +0200

[diff] [blame]

return True

return False

Chris Masone

2012-02-14 14:18:01 -0800

[diff] [blame]

132

beeps

2013-04-10 20:56:09 -0700

[diff] [blame]

133

def get_view_info(suite_job_id, view, build, suite):

Zdenek Behan

2012-04-06 17:20:01 +0200

[diff] [blame]

134

"""

135

Parse a view for the slave job name and job_id.

136

137

@param suite_job_id: The job id of our master suite job.

138

@param view: Test result view.

beeps

2013-04-10 20:56:09 -0700

[diff] [blame]

139

@param build: build passed in via the -b option.

140

eg: lumpy-release/R28-3947.0.0

141

@param suite: suite passed in via the -s option.

142

eg: dummy

143

@return A tuple job_name, experimental, name of the slave test run

144

described by view. eg:

145

experimental_dummy_Pass fails: (1130-owner, True, dummy_Pass)

146

experimental_dummy_Pass aborts: (1130-owner, True,

147

experimental_dummy_Pass)

148

dummy_Fail: (1130-owner, False, dummy_Fail.Error)

Zdenek Behan

2012-04-06 17:20:01 +0200

[diff] [blame]

149

"""

150

# By default, we are the main suite job since there is no

151

# keyval entry for our job_name.

152

job_name = '%s-%s' % (suite_job_id, getpass.getuser())

153

experimental = False

beeps

2013-04-10 20:56:09 -0700

[diff] [blame]

154

test_name = ''

Dan Shi

d152180

2013-05-24 13:08:37 -0700

[diff] [blame]

155

# raw test name is the test_name from tko status view. tko_job_keyvals may

156

# have a record of the hash of this name mapping to job_id-owner, which can

157

# be used to reference the test to its job url. The change is made to

158

# support tests in different jobs within a suite that shares the same test

159

# class, e.g., AU suite.

160

raw_test_name = view['test_name']

Zdenek Behan

2012-04-06 17:20:01 +0200

[diff] [blame]

161

if 'job_keyvals' in view:

beeps

2013-04-10 20:56:09 -0700

[diff] [blame]

162

# For a test invocation like:

163

# NAME = "dummy_Fail"

164

# job.run_test('dummy_Fail', tag='Error', to_throw='TestError')

165

# we will:

166

# Record a keyval of the jobs test_name field: dummy_Fail

167

# On success, yield a tko status with the tagged name:

168

# dummy_Fail.Error

169

# On abort, yield a status (not from tko) with the job name:

170

# /build/suite/dummy_Fail.Error

171

# Note the last 2 options include the tag. The tag is seperated

172

# from the rest of the name with a '.'. The tag or test name can

173

# also include a /, and we must isolate the tag before we compare it

174

# to the hashed keyval. Based on this we have the following cases:

175

# 1. Regular test failure with or without a tag '.': std_job_name is

176

# set to the view test_name, after removing the tag.

177

# 2. Regular test Aborts: we know that dynamic_suite inserted a name

178

# like build/suite/test.name (eg:

179

# lumpy-release/R28-3947.0.0/dummy/dummy_Fail.Error), so we

180

# intersect the build/suite/ string we already have with the

181

# test_name in the view. The name of the aborted test is

182

# instrumental in generating the job_name, which is used in

183

# creating a link to the logs.

184

# 3. Experimental tests, Aborts and Failures: The test view

185

# corresponding to the afe_job_id of the suite job contains

186

# stubs for each test in this suite. The names of these jobs

187

# will contain an experimental prefix if they were aborted;

188

# If they failed the same names will not contain an experimental

189

# prefix but we would have hashed the name with a prefix. Eg:

190

# Test name = experimental_pass

191

# keyval contains: hash(experimental_pass)

192

# Fail/Pass view['test_name'] = pass

193

# Abort view['test_name'] = board/build/experimental_pass

194

# So we need to add the experimental prefix only if the test was

195

# aborted. Everything else is the same as [2].

196

# 4. Experimental server job failures: eg verify passes, something on

197

# the DUT crashes, the experimental server job fails to ssh in. We

198

# need to manually set the experimental flag in this case because the

199

# server job name isn't recorded in the keyvals. For a normal suite

200

# the views will contain: SERVER_JOB, try_new_image, test_name. i.e

201

# the test server jobs should be handled transparently and only the

202

# suite server job should appear in the view. If a server job fails

203

# (for an experimental test or otherwise) we insert the server job

204

# entry into the tko database instead. Put more generally we insert

205

# the last stage we knew about into the db record associated with

206

# that suites afe_job_id. This could lead to a view containing:

207

# SERVER_JOB, try_new_image,

208

# lumpy-release/R28-4008.0.0/bvt/experimental_pass_SERVER_JOB.

Alex Miller

9a1987a

2013-08-21 15:51:16 -0700

[diff] [blame]

209

# Neither of these operations will stomp on a pristine string.

Dan Shi

605f764

2013-11-04 16:32:54 -0800

[diff] [blame]

210

test_name = tools.get_test_name(build, suite, view['test_name'])

Alex Miller

9a1987a

2013-08-21 15:51:16 -0700

[diff] [blame]

211

std_job_name = test_name.split('.')[0]

Chris Masone

2012-05-21 16:08:39 -0700

[diff] [blame]

212

beeps

2013-04-10 20:56:09 -0700

[diff] [blame]

213

if (job_status.view_is_for_infrastructure_fail(view) and

214

std_job_name.startswith(constants.EXPERIMENTAL_PREFIX)):

215

experimental = True

216

217

if std_job_name.startswith(constants.EXPERIMENTAL_PREFIX):

218

exp_job_name = std_job_name

219

else:

220

exp_job_name = constants.EXPERIMENTAL_PREFIX + std_job_name

Chris Masone

2012-08-29 10:37:08 -0700

[diff] [blame]

221

std_job_hash = hashlib.md5(std_job_name).hexdigest()

222

exp_job_hash = hashlib.md5(exp_job_name).hexdigest()

Dan Shi

d152180

2013-05-24 13:08:37 -0700

[diff] [blame]

223

raw_test_name_hash = hashlib.md5(raw_test_name).hexdigest()

Chris Masone

2012-05-21 16:08:39 -0700

[diff] [blame]

224

beeps

2013-04-10 20:56:09 -0700

[diff] [blame]

225

# In the experimental abort case both these clauses can evaluate

226

# to True.

Chris Masone

2012-05-21 16:08:39 -0700

[diff] [blame]

227

if std_job_hash in view['job_keyvals']:

228

job_name = view['job_keyvals'][std_job_hash]

beeps

2013-04-10 20:56:09 -0700

[diff] [blame]

229

if exp_job_hash in view['job_keyvals']:

Chris Masone

2012-05-21 16:08:39 -0700

[diff] [blame]

230

experimental = True

231

job_name = view['job_keyvals'][exp_job_hash]

Dan Shi

d152180

2013-05-24 13:08:37 -0700

[diff] [blame]

232

if raw_test_name_hash in view['job_keyvals']:

233

job_name = view['job_keyvals'][raw_test_name_hash]

Chris Masone

2012-05-21 16:08:39 -0700

[diff] [blame]

234

beeps

2013-04-10 20:56:09 -0700

[diff] [blame]

235

# If the name being returned is the test name it needs to include the tag

236

return job_name, experimental, std_job_name if not test_name else test_name

Zdenek Behan

2012-04-06 17:20:01 +0200

[diff] [blame]

237

238

Craig Harrison

2012-08-23 16:48:49 -0700

[diff] [blame]

239

class LogLink(object):

J. Richard Barnette

2013-08-23 11:24:21 -0700

[diff] [blame]

240

"""Information needed to record a link in the logs.

Craig Harrison

2012-08-23 16:48:49 -0700

[diff] [blame]

241

J. Richard Barnette

2013-08-23 11:24:21 -0700

[diff] [blame]

242

Depending on context and the information provided at

243

construction time, the link may point to either to log files for

244

a job, or to a bug filed for a failure in the job.

Craig Harrison

2012-08-23 16:48:49 -0700

[diff] [blame]

245

J. Richard Barnette

2013-08-23 11:24:21 -0700

[diff] [blame]

246

@var anchor The link text.

247

@var url The link url.

248

@var bug_id Id of a bug to link to, or None.

249

"""

250

251

_BUG_URL_PREFIX = CONFIG.get_config_value('BUG_REPORTING',

252

'tracker_url')

J. Richard Barnette

2013-08-23 11:24:21 -0700

[diff] [blame]

253

_URL_PATTERN = CONFIG.get_config_value('CROS',

254

'log_url_pattern', type=str)

255

256

Fang Deng

2014-02-24 17:51:24 -0800

[diff] [blame^]

257

def __init__(self, anchor, server, job_string, bug_info=None, reason=None):

J. Richard Barnette

2013-08-23 11:24:21 -0700

[diff] [blame]

258

"""Initialize the LogLink by generating the log URL.

259

260

@param anchor The link text.

Alex Miller

2013-10-30 15:18:57 -0700

[diff] [blame]

261

@param server The hostname of the server this suite ran on.

J. Richard Barnette

2013-08-23 11:24:21 -0700

[diff] [blame]

262

@param job_string The job whose logs we'd like to link to.

263

@param bug_info Info about the bug, if one was filed.

Fang Deng

2014-02-24 17:51:24 -0800

[diff] [blame^]

264

@param reason A string representing the reason of failure if any.

Craig Harrison

2012-08-23 16:48:49 -0700

[diff] [blame]

265

"""

266

self.anchor = anchor

Alex Miller

2013-10-30 15:18:57 -0700

[diff] [blame]

267

self.url = self._URL_PATTERN % (server, job_string)

Fang Deng

2014-02-24 17:51:24 -0800

[diff] [blame^]

268

self.reason = reason

J. Richard Barnette

2013-08-23 11:24:21 -0700

[diff] [blame]

269

if bug_info:

270

self.bug_id, self.bug_count = bug_info

271

else:

272

self.bug_id = None

273

self.bug_count = None

Craig Harrison

2012-08-23 16:48:49 -0700

[diff] [blame]

274

275

276

def GenerateBuildbotLink(self):

J. Richard Barnette

2013-08-23 11:24:21 -0700

[diff] [blame]

277

"""Generate a link formatted to meet buildbot expectations.

278

279

If there is a bug associated with this link, report that;

280

otherwise report a link to the job logs.

Craig Harrison

2012-08-23 16:48:49 -0700

[diff] [blame]

281

282

@return A link formatted for the buildbot log annotator.

283

"""

J. Richard Barnette

2013-08-23 11:24:21 -0700

[diff] [blame]

284

if self.bug_id:

285

url = '%s%s' % (self._BUG_URL_PREFIX, self.bug_id)

beeps

ad4daf8

2013-09-26 10:07:33 -0700

[diff] [blame]

286

if self.bug_count is None:

Fang Deng

2014-02-24 17:51:24 -0800

[diff] [blame^]

287

anchor_text = '%s (Unknown number of reports)' % (

beeps

ad4daf8

2013-09-26 10:07:33 -0700

[diff] [blame]

288

self.anchor.strip())

289

elif self.bug_count == 1:

Fang Deng

2014-02-24 17:51:24 -0800

[diff] [blame^]

290

anchor_text = '%s (new)' % self.anchor.strip()

J. Richard Barnette

2013-08-23 11:24:21 -0700

[diff] [blame]

291

else:

Fang Deng

2014-02-24 17:51:24 -0800

[diff] [blame^]

292

anchor_text = '%s (%s reports)' % (

J. Richard Barnette

2013-08-23 11:24:21 -0700

[diff] [blame]

293

self.anchor.strip(), self.bug_count)

294

else:

295

url = self.url

296

anchor_text = self.anchor.strip()

Fang Deng

2014-02-24 17:51:24 -0800

[diff] [blame^]

297

298

if self.reason:

299

anchor_text = '%s - %s' % (anchor_text, self.reason)

300

301

return '@@@STEP_LINK@%s@%s@@@'% (anchor_text, url)

Craig Harrison

2012-08-23 16:48:49 -0700

[diff] [blame]

302

303

Craig Harrison

2012-08-31 10:29:33 -0700

[diff] [blame]

304

def GenerateTextLink(self):

J. Richard Barnette

2013-08-23 11:24:21 -0700

[diff] [blame]

305

"""Generate a link to the job's logs, for consumption by a human.

Craig Harrison

2012-08-23 16:48:49 -0700

[diff] [blame]

306

Craig Harrison

2012-08-31 10:29:33 -0700

[diff] [blame]

307

@return A link formatted for human readability.

Craig Harrison

2012-08-23 16:48:49 -0700

[diff] [blame]

308

"""

Fang Deng

2014-02-24 17:51:24 -0800

[diff] [blame^]

309

return '%s%s' % (self.anchor, self.url)

Craig Harrison

2012-08-23 16:48:49 -0700

[diff] [blame]

310

311

Chris Masone

2012-04-30 14:35:28 -0700

[diff] [blame]

312

class Timings(object):

313

"""Timings for important events during a suite.

314

315

All timestamps are datetime.datetime objects.

316

317

@var suite_start_time: the time the suite started.

Chris Masone

2012-04-30 14:35:28 -0700

[diff] [blame]

318

@var tests_start_time: the time the first test started running.

319

"""

beeps

2013-03-22 13:15:49 -0700

[diff] [blame]

320

321

# Recorded in create_suite_job as we're staging the components of a

322

# build on the devserver. Only the artifacts necessary to start

323

# installing images onto DUT's will be staged when we record

324

# payload_end_time, the remaining artifacts are downloaded after we kick

325

# off the reimaging job, at which point we record artifact_end_time.

Chris Masone

2012-05-01 16:52:31 -0700

[diff] [blame]

326

download_start_time = None

327

payload_end_time = None

328

artifact_end_time = None

beeps

2013-03-22 13:15:49 -0700

[diff] [blame]

329

330

# The test_start_time, but taken off the view that corresponds to the

331

# suite instead of an individual test.

Chris Masone

2012-04-30 14:35:28 -0700

[diff] [blame]

332

suite_start_time = None

beeps

2013-03-22 13:15:49 -0700

[diff] [blame]

333

beeps

2013-03-22 13:15:49 -0700

[diff] [blame]

334

# Earliest and Latest tests in the set of TestViews passed to us.

Chris Masone

2012-04-30 14:35:28 -0700

[diff] [blame]

335

tests_start_time = None

336

tests_end_time = None

337

338

beeps

2013-03-22 13:15:49 -0700

[diff] [blame]

339

def _GetDatetime(self, timing_string, timing_string_format):

340

"""

341

Formats the timing_string according to the timing_string_format.

342

343

@param timing_string: A datetime timing string.

344

@param timing_string_format: Format of the time in timing_string.

345

@return: A datetime object for the given timing string.

346

"""

347

try:

348

return datetime.strptime(timing_string, timing_string_format)

except TypeError:

return None

Chris Masone

2012-08-29 10:37:08 -0700

[diff] [blame]

353

def RecordTiming(self, view):

354

"""Given a test report view, extract and record pertinent time info.

Chris Masone

2012-04-30 14:35:28 -0700

[diff] [blame]

355

356

get_detailed_test_views() returns a list of entries that provide

357

info about the various parts of a suite run. This method can take

358

any one of these entries and look up timestamp info we might want

359

and record it.

360

Chris Masone

2012-09-06 16:00:07 -0700

[diff] [blame]

361

If timestamps are unavailable, datetime.datetime.min/max will be used.

362

Chris Masone

2012-08-29 10:37:08 -0700

[diff] [blame]

363

@param view: a view dict, as returned by get_detailed_test_views().

Chris Masone

2012-04-30 14:35:28 -0700

[diff] [blame]

364

"""

Chris Masone

2012-09-06 16:00:07 -0700

[diff] [blame]

365

start_candidate = datetime.min

366

end_candidate = datetime.max

367

if view['test_started_time']:

368

start_candidate = datetime.strptime(view['test_started_time'],

369

job_status.TIME_FMT)

370

if view['test_finished_time']:

371

end_candidate = datetime.strptime(view['test_finished_time'],

372

job_status.TIME_FMT)

373

Chris Masone

2012-08-29 10:37:08 -0700

[diff] [blame]

374

if job_status.view_is_for_suite_prep(view):

Chris Masone

2012-04-30 14:35:28 -0700

[diff] [blame]

375

self.suite_start_time = start_candidate

Chris Masone

2012-04-30 14:35:28 -0700

[diff] [blame]

376

else:

377

self._UpdateFirstTestStartTime(start_candidate)

378

self._UpdateLastTestEndTime(end_candidate)

Chris Masone

2012-08-29 10:37:08 -0700

[diff] [blame]

379

if 'job_keyvals' in view:

380

keyvals = view['job_keyvals']

beeps

2013-03-22 13:15:49 -0700

[diff] [blame]

381

self.download_start_time = self._GetDatetime(

382

keyvals.get(constants.DOWNLOAD_STARTED_TIME),

383

job_status.TIME_FMT)

384

385

self.payload_end_time = self._GetDatetime(

386

keyvals.get(constants.PAYLOAD_FINISHED_TIME),

387

job_status.TIME_FMT)

388

389

self.artifact_end_time = self._GetDatetime(

390

keyvals.get(constants.ARTIFACT_FINISHED_TIME),

391

job_status.TIME_FMT)

Chris Masone

44e4d6c

2012-08-15 14:25:53 -0700

[diff] [blame]

392

Chris Masone

2012-04-30 14:35:28 -0700

[diff] [blame]

393

394

def _UpdateFirstTestStartTime(self, candidate):

395

"""Update self.tests_start_time, iff candidate is an earlier time.

396

397

@param candidate: a datetime.datetime object.

398

"""

399

if not self.tests_start_time or candidate < self.tests_start_time:

400

self.tests_start_time = candidate

401

402

403

def _UpdateLastTestEndTime(self, candidate):

404

"""Update self.tests_end_time, iff candidate is a later time.

405

406

@param candidate: a datetime.datetime object.

407

"""

408

if not self.tests_end_time or candidate > self.tests_end_time:

409

self.tests_end_time = candidate

def __str__(self):

return ('\n'

'Suite timings:\n'

Chris Masone

2012-05-01 16:52:31 -0700

[diff] [blame]

415

'Downloads started at %s\n'

416

'Payload downloads ended at %s\n'

Chris Masone

2012-04-30 14:35:28 -0700

[diff] [blame]

417

'Suite started at %s\n'

Chris Masone

2012-05-01 16:52:31 -0700

[diff] [blame]

418

'Artifact downloads ended (at latest) at %s\n'

Chris Masone

2012-04-30 14:35:28 -0700

[diff] [blame]

419

'Testing started at %s\n'

Chris Masone

2012-05-01 16:52:31 -0700

[diff] [blame]

420

'Testing ended at %s\n' % (self.download_start_time,

421

self.payload_end_time,

422

self.suite_start_time,

Chris Masone

2012-05-01 16:52:31 -0700

[diff] [blame]

423

self.artifact_end_time,

Chris Masone

2012-04-30 14:35:28 -0700

[diff] [blame]

424

self.tests_start_time,

425

self.tests_end_time))

426

427

beeps

2013-03-22 13:15:49 -0700

[diff] [blame]

428

def _GetDataKeyForStatsd(self, suite, build, board):

429

"""

430

Constructs the key used for logging statsd timing data.

431

432

@param suite: scheduled suite that we want to record the results of.

433

@param build: The build string. This string should have a consistent

434

format eg: x86-mario-release/R26-3570.0.0. If the format of this

435

string changes such that we can't determine build_type or branch

436

we give up and use the parametes we're sure of instead (suite,

437

board). eg:

438

1. build = x86-alex-pgo-release/R26-3570.0.0

439

branch = 26

440

build_type = pgo-release

441

2. build = lumpy-paladin/R28-3993.0.0-rc5

442

branch = 28

443

build_type = paladin

444

@param board: The board that this suite ran on.

445

@return: The key used to log timing information in statsd.

446

"""

447

try:

J. Richard Barnette

3cbd76b

2013-11-27 12:11:25 -0800

[diff] [blame]

448

_board, build_type, branch = utils.ParseBuildName(build)[:3]

449

except utils.ParseBuildNameException as e:

beeps

2013-03-22 13:15:49 -0700

[diff] [blame]

450

logging.error(str(e))

451

branch = 'Unknown'

452

build_type = 'Unknown'

453

else:

454

embedded_str = re.search(r'x86-\w+-(.*)', _board)

455

if embedded_str:

456

build_type = embedded_str.group(1) + '-' + build_type

data_key_dict = {

'board': board,

'branch': branch,

'build_type': build_type,

462

'suite': suite,

463

}

464

return ('run_suite.%(board)s.%(build_type)s.%(branch)s.%(suite)s'

% data_key_dict)

def SendResultsToStatsd(self, suite, build, board):

469

"""

470

Sends data to statsd.

471

472

1. Makes a data_key of the form: run_suite.$board.$branch.$suite

473

eg: stats/gauges/<hostname>/run_suite/<board>/<branch>/<suite>/

474

2. Computes timings for several start and end event pairs.

Alex Miller

9a1987a

2013-08-21 15:51:16 -0700

[diff] [blame]

475

3. Sends all timing values to statsd.

beeps

2013-03-22 13:15:49 -0700

[diff] [blame]

476

477

@param suite: scheduled suite that we want to record the results of.

478

@param build: the build that this suite ran on.

479

eg: 'lumpy-release/R26-3570.0.0'

480

@param board: the board that this suite ran on.

481

"""

482

if sys.version_info < (2, 7):

483

logging.error('Sending run_suite perf data to statsd requires'

484

'python 2.7 or greater.')

485

return

486

487

data_key = self._GetDataKeyForStatsd(suite, build, board)

488

489

# Since we don't want to try subtracting corrupted datetime values

490

# we catch TypeErrors in _GetDatetime and insert None instead. This

491

# means that even if, say, keyvals.get(constants.ARTIFACT_FINISHED_TIME)

492

# returns a corrupt value the member artifact_end_time is set to None.

493

if self.download_start_time:

494

if self.payload_end_time:

495

stats.Timer(data_key).send('payload_download_time',

496

(self.payload_end_time -

497

self.download_start_time).total_seconds())

498

499

if self.artifact_end_time:

500

stats.Timer(data_key).send('artifact_download_time',

501

(self.artifact_end_time -

502

self.download_start_time).total_seconds())

503

504

if self.tests_end_time:

505

if self.suite_start_time:

506

stats.Timer(data_key).send('suite_run_time',

507

(self.tests_end_time -

508

self.suite_start_time).total_seconds())

509

510

if self.tests_start_time:

511

stats.Timer(data_key).send('tests_run_time',

512

(self.tests_end_time -

513

self.tests_start_time).total_seconds())

514

beeps

2013-03-22 13:15:49 -0700

[diff] [blame]

515

beeps

2013-04-10 20:56:09 -0700

[diff] [blame]

516

def _full_test_name(job_id, view, build, suite):

517

"""

518

Generates the full test name for printing to logs and generating a link to

519

the results.

Craig Harrison

2012-08-31 10:29:33 -0700

[diff] [blame]

520

521

@param job_id: the job id.

522

@param view: the view for which we are generating the name.

beeps

2013-04-10 20:56:09 -0700

[diff] [blame]

523

@param build: the build for this invocation of run_suite.

524

@param suite: the suite for this invocation of run_suite.

Craig Harrison

2012-08-31 10:29:33 -0700

[diff] [blame]

525

@return The test name, possibly with a descriptive prefix appended.

526

"""

beeps

2013-04-10 20:56:09 -0700

[diff] [blame]

527

experimental, test_name = get_view_info(job_id, view, build, suite)[1:]

528

529

# If an experimental test is aborted get_view_info returns a name which

530

# includes the prefix.

531

prefix = constants.EXPERIMENTAL_PREFIX if (experimental and

532

not test_name.startswith(constants.EXPERIMENTAL_PREFIX)) else ''

533

return prefix + test_name

Craig Harrison

2012-08-31 10:29:33 -0700

[diff] [blame]

534

535

Alex Miller

2013-10-30 15:18:57 -0700

[diff] [blame]

536

_DEFAULT_AUTOTEST_INSTANCE = CONFIG.get_config_value(

537

'SERVER', 'hostname', type=str)

538

539

540

def instance_for_pool(pool_name):

541

"""

542

Return the hostname of the server that should be used to service a suite

543

for the specified pool.

544

545

@param pool_name: The pool (without 'pool:' to schedule the suite against.

546

@return: The correct host that should be used to service this suite run.

547

"""

548

return CONFIG.get_config_value(

549

'POOL_INSTANCE_SHARDING', pool_name,

550

default=_DEFAULT_AUTOTEST_INSTANCE)

551

552

Chris Masone

2012-02-14 14:18:01 -0800

[diff] [blame]

553

def main():

Aviv Keshet

2013-03-21 16:38:31 -0700

[diff] [blame]

554

"""

555

Entry point for run_suite script.

556

"""

Chris Masone

2012-02-14 14:18:01 -0800

[diff] [blame]

557

parser, options, args = parse_options()

Chris Masone

3a85064

2012-07-11 11:11:18 -0700

[diff] [blame]

558

log_name = 'run_suite-default.log'

Chris Masone

2012-04-11 11:36:48 -0700

[diff] [blame]

559

if not options.mock_job_id:

Zdenek Behan

77290c3

2012-06-26 17:39:47 +0200

[diff] [blame]

560

if args:

561

print 'Unknown arguments: ' + str(args)

562

parser.print_help()

563

return

564

if not options.build:

565

print 'Need to specify which build to use'

566

parser.print_help()

567

return

568

if not options.board:

569

print 'Need to specify board'

parser.print_help()

return

if not options.name:

print 'Need to specify suite name'

Chris Masone

2012-04-11 11:36:48 -0700

[diff] [blame]

574

parser.print_help()

575

return

Chris Masone

3a85064

2012-07-11 11:11:18 -0700

[diff] [blame]

576

# convert build name from containing / to containing only _

577

log_name = 'run_suite-%s.log' % options.build.replace('/', '_')

578

log_dir = os.path.join(common.autotest_dir, 'logs')

579

if os.path.exists(log_dir):

580

log_name = os.path.join(log_dir, log_name)

Alex Miller

8e75d0d

2012-07-31 15:13:32 -0700

[diff] [blame]

581

if options.num is not None and options.num < 1:

582

print 'Number of machines must be more than 0, if specified.'

583

parser.print_help()

584

return

Alex Miller

ab33ddb

2012-10-03 12:56:02 -0700

[diff] [blame]

585

if options.no_wait != 'True' and options.no_wait != 'False':

586

print 'Please specify "True" or "False" for --no_wait.'

587

parser.print_help()

588

return

589

if options.file_bugs != 'True' and options.file_bugs != 'False':

590

print 'Please specify "True" or "False" for --file_bugs.'

591

parser.print_help()

592

return

Alex Miller

88762a8

2013-09-04 15:41:28 -0700

[diff] [blame]

593

594

try:

595

priority = int(options.priority)

596

except ValueError:

597

try:

598

priority = priorities.Priority.get_value(options.priority)

599

except AttributeError:

600

print 'Unknown priority level %s. Try one of %s.' % (

601

options.priority, ', '.join(priorities.Priority.names))

602

Scott Zawalski

2012-07-02 18:45:07 -0400

[diff] [blame]

603

setup_logging(logfile=log_name)

Chris Masone

dfa0beba

2012-03-19 11:41:47 -0700

[diff] [blame]

604

Simran Basi

2012-12-07 16:37:09 -0800

[diff] [blame]

605

try:

Dan Shi

a02181f

2013-01-29 14:03:32 -0800

[diff] [blame]

606

if not options.bypass_labstatus:

J. Richard Barnette

abbe096

2013-12-10 18:15:44 -0800

[diff] [blame]

607

utils.check_lab_status(options.build)

608

except utils.TestLabException as e:

Alex Miller

c3262f0

2013-11-15 11:27:19 -0800

[diff] [blame]

609

logging.warning('Error Message: %s', e)

Aviv Keshet

2013-03-21 16:38:31 -0700

[diff] [blame]

610

return RETURN_CODES.WARNING

Simran Basi

2012-12-07 16:37:09 -0800

[diff] [blame]

611

Alex Miller

2013-10-30 15:18:57 -0700

[diff] [blame]

612

instance_server = instance_for_pool(options.pool)

613

afe = frontend_wrappers.RetryingAFE(server=instance_server,

Simran Basi

2013-11-26 13:02:11 -0800

[diff] [blame]

614

timeout_min=options.afe_timeout_mins,

Chris Masone

8ac6671

2012-02-15 14:21:02 -0800

[diff] [blame]

615

delay_sec=options.delay_sec)

Alex Miller

2013-10-30 15:18:57 -0700

[diff] [blame]

616

logging.info('Autotest instance: %s', instance_server)

Chris Masone

359c0fd

2012-03-13 15:18:59 -0700

[diff] [blame]

617

Dan Shi

9512241

2013-11-12 16:20:33 -0800

[diff] [blame]

618

wait = options.no_wait == 'False'

619

file_bugs = options.file_bugs == 'True'

Chris Masone

2012-04-11 11:36:48 -0700

[diff] [blame]

620

if options.mock_job_id:

621

job_id = int(options.mock_job_id)

622

else:

Alex Miller

8e75d0d

2012-07-31 15:13:32 -0700

[diff] [blame]

623

job_id = afe.run('create_suite_job', suite_name=options.name,

624

board=options.board, build=options.build,

Alex Miller

f43d0eb

2012-10-01 13:43:13 -0700

[diff] [blame]

625

check_hosts=wait, pool=options.pool, num=options.num,

Aviv Keshet

7cd1231

2013-07-25 10:25:55 -0700

[diff] [blame]

626

file_bugs=file_bugs, priority=priority,

Dan Shi

9512241

2013-11-12 16:20:33 -0800

[diff] [blame]

627

suite_args=options.suite_args,

Simran Basi

2013-11-26 13:02:11 -0800

[diff] [blame]

628

wait_for_results=wait,

629

timeout_mins=options.timeout_mins)

Alex Miller

2013-10-30 15:18:57 -0700

[diff] [blame]

630

TKO = frontend_wrappers.RetryingTKO(server=instance_server,

Simran Basi

2013-11-26 13:02:11 -0800

[diff] [blame]

631

timeout_min=options.afe_timeout_mins,

Chris Masone

8ac6671

2012-02-15 14:21:02 -0800

[diff] [blame]

632

delay_sec=options.delay_sec)

Scott Zawalski

2012-07-02 18:45:07 -0400

[diff] [blame]

633

logging.info('Started suite job: %s', job_id)

Simran Basi

2012-12-07 16:37:09 -0800

[diff] [blame]

634

Aviv Keshet

2013-03-21 16:38:31 -0700

[diff] [blame]

635

code = RETURN_CODES.OK

J. Richard Barnette

712eb40

2013-08-13 18:03:00 -0700

[diff] [blame]

636

if wait:

637

while not afe.get_jobs(id=job_id, finished=True):

Alex Miller

764227d

2013-11-15 10:28:56 -0800

[diff] [blame]

638

time.sleep(10)

J. Richard Barnette

712eb40

2013-08-13 18:03:00 -0700

[diff] [blame]

639

Scott Zawalski

2012-03-06 09:21:44 -0500

[diff] [blame]

640

views = TKO.run('get_detailed_test_views', afe_job_id=job_id)

Dan Shi

f327b91

2013-07-18 15:35:05 -0700

[diff] [blame]

641

# The intended behavior is to refrain from recording stats if the suite

642

# was aborted (either by a user or through the golo rpc). Since all the

643

# views associated with the afe_job_id of the suite contain the keyvals

644

# of the suite and not the individual tests themselves, we can achieve

645

# this without digging through the views.

646

is_aborted = any([view['job_keyvals'].get('aborted_by')

647

for view in views])

Dan Shi

76af802

2013-10-19 01:59:49 -0700

[diff] [blame]

648

# For hostless job in Starting status, there is no test view associated.

649

# This can happen when a suite job in Starting status is aborted. When

650

# the scheduler hits some limit, e.g., max_hostless_jobs_per_drone,

651

# max_jobs_started_per_cycle, a suite job can stays in Starting status.

652

if not views:

653

code = RETURN_CODES.ERROR

654

returnmessage = RETURN_CODES.get_string(code)

655

logging.info('\nNo test view was found.\n'

656

'Will return from run_suite with status: %s',

returnmessage)

return code

beeps

2013-04-10 20:56:09 -0700

[diff] [blame]

660

width = max((len(_full_test_name(job_id, view, options.build,

Dan Shi

76af802

2013-10-19 01:59:49 -0700

[diff] [blame]

661

options.name)) for view in views)) + 3

Scott Zawalski

2012-03-06 09:21:44 -0500

[diff] [blame]

662

Chris Masone

2012-08-29 10:37:08 -0700

[diff] [blame]

663

relevant_views = filter(job_status.view_is_relevant, views)

Scott Zawalski

2012-03-06 09:21:44 -0500

[diff] [blame]

664

if not relevant_views:

Zdenek Behan

2012-04-06 17:20:01 +0200

[diff] [blame]

665

# The main suite job most likely failed in SERVER_JOB.

666

relevant_views = views

Scott Zawalski

2012-03-06 09:21:44 -0500

[diff] [blame]

667

Chris Masone

2012-04-30 14:35:28 -0700

[diff] [blame]

668

timings = Timings()

Craig Harrison

2012-08-23 16:48:49 -0700

[diff] [blame]

669

web_links = []

670

buildbot_links = []

Chris Masone

2012-08-29 10:37:08 -0700

[diff] [blame]

671

for view in relevant_views:

672

timings.RecordTiming(view)

673

if job_status.view_is_for_suite_prep(view):

674

view['test_name'] = 'Suite prep'

Chris Masone

3a85064

2012-07-11 11:11:18 -0700

[diff] [blame]

675

beeps

2013-04-10 20:56:09 -0700

[diff] [blame]

676

job_name, experimental = get_view_info(job_id, view, options.build,

677

options.name)[:2]

678

test_view = _full_test_name(job_id, view, options.build,

679

options.name).ljust(width)

Chris Masone

2012-08-29 10:37:08 -0700

[diff] [blame]

680

logging.info("%s%s", test_view, get_pretty_status(view['status']))

beeps

8ead53c

2013-04-26 19:12:46 -0700

[diff] [blame]

681

J. Richard Barnette

e7b98bb

2013-08-21 16:34:16 -0700

[diff] [blame]

682

# It's important that we use the test name in the view

683

# and not the name returned by full_test_name, as this

684

# was the name inserted after the test ran, e.g. for an

685

# aborted test full_test_name will return

686

# 'experimental_testname' but the view and the bug_id

687

# keyval will use '/build/suite/experimental_testname'.

J. Richard Barnette

2013-08-23 11:24:21 -0700

[diff] [blame]

688

bug_info = tools.get_test_failure_bug_info(

J. Richard Barnette

e7b98bb

2013-08-21 16:34:16 -0700

[diff] [blame]

689

view['job_keyvals'], view['test_name'])

beeps

8ead53c

2013-04-26 19:12:46 -0700

[diff] [blame]

690

Fang Deng

2014-02-24 17:51:24 -0800

[diff] [blame^]

691

link = LogLink(test_view, instance_server, job_name,

692

bug_info)

Craig Harrison

2012-08-23 16:48:49 -0700

[diff] [blame]

693

web_links.append(link)

Chris Masone

2012-08-29 10:37:08 -0700

[diff] [blame]

694

J. Richard Barnette

02f815e

2013-08-22 16:28:32 -0700

[diff] [blame]

695

# Don't show links on the buildbot waterfall for tests with

696

# GOOD status.

Chris Masone

2012-08-29 10:37:08 -0700

[diff] [blame]

697

if view['status'] != 'GOOD':

698

logging.info("%s %s: %s", test_view, view['status'],

699

view['reason'])

Fang Deng

2014-02-24 17:51:24 -0800

[diff] [blame^]

700

link.reason = '%s: %s' % (view['status'], view['reason'])

Chris Masone

8906ab1

2012-07-23 15:37:56 -0700

[diff] [blame]

701

if view['status'] == 'TEST_NA':

702

# Didn't run; nothing to do here!

703

continue

J. Richard Barnette

02f815e

2013-08-22 16:28:32 -0700

[diff] [blame]

704

buildbot_links.append(link)

Aviv Keshet

2013-03-21 16:38:31 -0700

[diff] [blame]

705

if code == RETURN_CODES.ERROR:

Zdenek Behan

2012-04-06 17:20:01 +0200

[diff] [blame]

706

# Failed already, no need to worry further.

707

continue

beeps

2013-04-10 20:56:09 -0700

[diff] [blame]

708

709

# Any non experimental test that has a status other than WARN

710

# or GOOD will result in the tree closing. Experimental tests

711

# will not close the tree, even if they have been aborted.

beeps

b8117ff

2013-10-01 13:50:07 -0700

[diff] [blame]

712

if not experimental:

713

if view['status'] == 'WARN':

714

code = RETURN_CODES.WARNING

715

elif is_fail_status(view['status']):

716

code = RETURN_CODES.ERROR

Aviv Keshet

2013-03-21 16:38:31 -0700

[diff] [blame]

717

Dan Shi

f327b91

2013-07-18 15:35:05 -0700

[diff] [blame]

718

# Do not record stats for aborted suites.

Fang Deng

1cf6234

2013-11-22 10:06:32 -0800

[diff] [blame]

719

if not is_aborted and not options.mock_job_id:

Dan Shi

f327b91

2013-07-18 15:35:05 -0700

[diff] [blame]

720

timings.SendResultsToStatsd(options.name, options.build,

721

options.board)

Scott Zawalski

2012-07-02 18:45:07 -0400

[diff] [blame]

722

logging.info(timings)

Craig Harrison

2012-08-23 16:48:49 -0700

[diff] [blame]

723

logging.info('\n'

724

'Links to test logs:')

725

for link in web_links:

Craig Harrison

2012-08-31 10:29:33 -0700

[diff] [blame]

726

logging.info(link.GenerateTextLink())

Aviv Keshet

2013-03-21 16:38:31 -0700

[diff] [blame]

727

728

try:

729

returnmessage = RETURN_CODES.get_string(code)

730

except ValueError:

731

returnmessage = 'UNKNOWN'

732

logging.info('\n'

733

'Will return from run_suite with status: %s',

734

returnmessage)

735

Craig Harrison

2012-08-23 16:48:49 -0700

[diff] [blame]

736

logging.info('\n'

737

'Output below this line is for buildbot consumption:')

738

for link in buildbot_links:

739

logging.info(link.GenerateBuildbotLink())

Chris Masone

d5939fe

2012-03-13 10:11:06 -0700

[diff] [blame]

740

else:

Scott Zawalski

2012-07-02 18:45:07 -0400

[diff] [blame]

741

logging.info('Created suite job: %r', job_id)

Alex Miller

a05498f

2013-11-01 16:16:21 -0700

[diff] [blame]

742

link = LogLink(options.name, instance_server,

743

'%s-%s' % (job_id, getpass.getuser()))

Craig Harrison

2012-08-23 16:48:49 -0700

[diff] [blame]

744

logging.info(link.GenerateBuildbotLink())

Scott Zawalski

2012-07-02 18:45:07 -0400

[diff] [blame]

745

logging.info('--no_wait specified; Exiting.')

Chris Masone