Blame - site_utils/run_suite.py - platform/external/autotest

2012-02-14 14:18:01 -0800

[diff] [blame]

#!/usr/bin/python

#

# Use of this source code is governed by a BSD-style license that can be

5

# found in the LICENSE file.

6

7

"""Tool for running suites of tests and waiting for completion.

8

9

The desired test suite will be scheduled with autotest, and then

10

this tool will block until the job is complete, printing a summary

11

at the end. Error conditions result in exceptions.

12

13

This is intended for use only with Chrome OS test suits that leverage the

14

dynamic suite infrastructure in server/cros/dynamic_suite.py.

15

"""

16

beeps

2013-03-22 13:15:49 -0700

[diff] [blame]

17

import getpass, hashlib, logging, optparse, os, re, sys, time

Chris Masone

2012-09-06 16:00:07 -0700

[diff] [blame]

18

from datetime import datetime

19

Chris Masone

2012-02-14 14:18:01 -0800

[diff] [blame]

20

import common

Chris Masone

2012-09-06 16:00:07 -0700

[diff] [blame]

21

Aviv Keshet

2013-03-21 16:38:31 -0700

[diff] [blame]

22

from autotest_lib.client.common_lib import global_config, error, utils, enum

Chris Masone

44e4d6c

2012-08-15 14:25:53 -0700

[diff] [blame]

23

from autotest_lib.server.cros.dynamic_suite import constants

Chris Masone

b493555

2012-08-14 12:05:54 -0700

[diff] [blame]

24

from autotest_lib.server.cros.dynamic_suite import frontend_wrappers

25

from autotest_lib.server.cros.dynamic_suite import job_status

Chris Masone

2012-08-29 10:37:08 -0700

[diff] [blame]

26

from autotest_lib.server.cros.dynamic_suite.reimager import Reimager

beeps

2013-03-22 13:15:49 -0700

[diff] [blame]

27

from autotest_lib.site_utils.suite_scheduler import base_event

28

from autotest_lib.site_utils.graphite import stats

Chris Masone

2012-02-14 14:18:01 -0800

[diff] [blame]

29

Chris Masone

1120cdf

2012-02-27 17:35:07 -0800

[diff] [blame]

30

CONFIG = global_config.global_config

31

Simran Basi

2012-12-07 16:37:09 -0800

[diff] [blame]

32

# Return code that will be sent back to autotest_rpc_server.py

Aviv Keshet

2013-03-21 16:38:31 -0700

[diff] [blame]

33

RETURN_CODES = enum.Enum('OK', 'ERROR', 'WARNING')

Simran Basi

2012-12-07 16:37:09 -0800

[diff] [blame]

34

Chris Masone

dfa0beba

2012-03-19 11:41:47 -0700

[diff] [blame]

35

Scott Zawalski

2012-07-02 18:45:07 -0400

[diff] [blame]

36

def setup_logging(logfile=None):

37

"""Setup basic logging with all logging info stripped.

38

39

Calls to logging will only show the message. No severity is logged.

40

41

@param logfile: If specified dump output to a file as well.

42

"""

43

screen_handler = logging.StreamHandler()

44

screen_handler.setFormatter(logging.Formatter('%(message)s'))

45

logging.getLogger().addHandler(screen_handler)

46

logging.getLogger().setLevel(logging.INFO)

47

if logfile:

48

file_handler = logging.FileHandler(logfile)

49

file_handler.setLevel(logging.DEBUG)

50

logging.getLogger().addHandler(file_handler)

Chris Masone

dfa0beba

2012-03-19 11:41:47 -0700

[diff] [blame]

51

52

Chris Masone

2012-02-14 14:18:01 -0800

[diff] [blame]

53

def parse_options():

Aviv Keshet

2013-03-21 16:38:31 -0700

[diff] [blame]

54

#pylint: disable-msg=C0111

Zdenek Behan

77290c3

2012-06-26 17:39:47 +0200

[diff] [blame]

55

usage = "usage: %prog [options]"

Chris Masone

2012-02-14 14:18:01 -0800

[diff] [blame]

56

parser = optparse.OptionParser(usage=usage)

57

parser.add_option("-b", "--board", dest="board")

58

parser.add_option("-i", "--build", dest="build")

Chris Masone

359c0fd

2012-03-13 15:18:59 -0700

[diff] [blame]

59

# This should just be a boolean flag, but the autotest "proxy" code

60

# can't handle flags that don't take arguments.

Alex Miller

2012-10-03 12:56:02 -0700

[diff] [blame]

61

parser.add_option("-n", "--no_wait", dest="no_wait", default="False",

62

help='Must pass "True" or "False" if used.')

Scott Zawalski

6565017

2012-02-16 11:48:26 -0500

[diff] [blame]

63

parser.add_option("-p", "--pool", dest="pool", default=None)

Chris Masone

2012-02-14 14:18:01 -0800

[diff] [blame]

64

parser.add_option("-s", "--suite_name", dest="name")

Chris Masone

8ac6671

2012-02-15 14:21:02 -0800

[diff] [blame]

65

parser.add_option("-t", "--timeout_min", dest="timeout_min", default=30)

66

parser.add_option("-d", "--delay_sec", dest="delay_sec", default=10)

Chris Masone

2012-04-11 11:36:48 -0700

[diff] [blame]

67

parser.add_option("-m", "--mock_job_id", dest="mock_job_id",

68

help="Skips running suite; creates report for given ID.")

Alex Miller

05a2fff

2012-09-10 10:14:34 -0700

[diff] [blame]

69

parser.add_option("-u", "--num", dest="num", type="int", default=None,

Chris Masone

8906ab1

2012-07-23 15:37:56 -0700

[diff] [blame]

70

help="Run on at most NUM machines.")

Alex Miller

f43d0eb

2012-10-01 13:43:13 -0700

[diff] [blame]

71

# Same boolean flag issue applies here.

Alex Miller

2012-10-03 12:56:02 -0700

[diff] [blame]

72

parser.add_option("-f", "--file_bugs", dest="file_bugs", default='False',

73

help='File bugs on test failures. Must pass "True" or '

74

'"False" if used.')

Dan Shi

a02181f

2013-01-29 14:03:32 -0800

[diff] [blame]

75

parser.add_option("-l", "--bypass_labstatus", dest="bypass_labstatus",

76

action="store_true", help='Bypass lab status check.')

Alex Miller

f43d0eb

2012-10-01 13:43:13 -0700

[diff] [blame]

77

Chris Masone

2012-02-14 14:18:01 -0800

[diff] [blame]

78

options, args = parser.parse_args()

79

return parser, options, args

80

81

82

def get_pretty_status(status):

Aviv Keshet

2013-03-21 16:38:31 -0700

[diff] [blame]

83

"""

84

Converts a status string into a pretty-for-printing string.

85

86

@param status: Status to convert.

87

88

@return: Returns pretty string.

GOOD -> [ PASSED ]

TEST_NA -> [ INFO ]

other -> [ FAILED ]

"""

Chris Masone

2012-02-14 14:18:01 -0800

[diff] [blame]

93

if status == 'GOOD':

94

return '[ PASSED ]'

Chris Masone

8906ab1

2012-07-23 15:37:56 -0700

[diff] [blame]

95

elif status == 'TEST_NA':

96

return '[ INFO ]'

Chris Masone

2012-02-14 14:18:01 -0800

[diff] [blame]

97

return '[ FAILED ]'

98

Zdenek Behan

2012-04-06 17:20:01 +0200

[diff] [blame]

99

def is_fail_status(status):

Aviv Keshet

2013-03-21 16:38:31 -0700

[diff] [blame]

100

"""

101

Check if the given status corresponds to a failure.

102

103

@param status: The status to check. (string)

104

105

@return: True if status is FAIL or ERROR. False otherwise.

106

"""

Zdenek Behan

2012-04-06 17:20:01 +0200

[diff] [blame]

107

# All the statuses tests can have when they fail.

beeps

2013-04-10 20:56:09 -0700

[diff] [blame]

108

if status in ['FAIL', 'ERROR', 'ABORT']:

Zdenek Behan

2012-04-06 17:20:01 +0200

[diff] [blame]

return True

return False

Chris Masone

2012-02-14 14:18:01 -0800

[diff] [blame]

112

beeps

2013-04-10 20:56:09 -0700

[diff] [blame]

113

def get_view_info(suite_job_id, view, build, suite):

Zdenek Behan

2012-04-06 17:20:01 +0200

[diff] [blame]

114

"""

115

Parse a view for the slave job name and job_id.

116

117

@param suite_job_id: The job id of our master suite job.

118

@param view: Test result view.

beeps

2013-04-10 20:56:09 -0700

[diff] [blame]

119

@param build: build passed in via the -b option.

120

eg: lumpy-release/R28-3947.0.0

121

@param suite: suite passed in via the -s option.

122

eg: dummy

123

@return A tuple job_name, experimental, name of the slave test run

124

described by view. eg:

125

experimental_dummy_Pass fails: (1130-owner, True, dummy_Pass)

126

experimental_dummy_Pass aborts: (1130-owner, True,

127

experimental_dummy_Pass)

128

dummy_Fail: (1130-owner, False, dummy_Fail.Error)

Zdenek Behan

2012-04-06 17:20:01 +0200

[diff] [blame]

129

"""

130

# By default, we are the main suite job since there is no

131

# keyval entry for our job_name.

132

job_name = '%s-%s' % (suite_job_id, getpass.getuser())

133

experimental = False

beeps

2013-04-10 20:56:09 -0700

[diff] [blame]

134

test_name = ''

Dan Shi

d152180

2013-05-24 13:08:37 -0700

[diff] [blame]

135

# raw test name is the test_name from tko status view. tko_job_keyvals may

136

# have a record of the hash of this name mapping to job_id-owner, which can

137

# be used to reference the test to its job url. The change is made to

138

# support tests in different jobs within a suite that shares the same test

139

# class, e.g., AU suite.

140

raw_test_name = view['test_name']

Zdenek Behan

2012-04-06 17:20:01 +0200

[diff] [blame]

141

if 'job_keyvals' in view:

beeps

2013-04-10 20:56:09 -0700

[diff] [blame]

142

# For a test invocation like:

143

# NAME = "dummy_Fail"

144

# job.run_test('dummy_Fail', tag='Error', to_throw='TestError')

145

# we will:

146

# Record a keyval of the jobs test_name field: dummy_Fail

147

# On success, yield a tko status with the tagged name:

148

# dummy_Fail.Error

149

# On abort, yield a status (not from tko) with the job name:

150

# /build/suite/dummy_Fail.Error

151

# Note the last 2 options include the tag. The tag is seperated

152

# from the rest of the name with a '.'. The tag or test name can

153

# also include a /, and we must isolate the tag before we compare it

154

# to the hashed keyval. Based on this we have the following cases:

155

# 1. Regular test failure with or without a tag '.': std_job_name is

156

# set to the view test_name, after removing the tag.

157

# 2. Regular test Aborts: we know that dynamic_suite inserted a name

158

# like build/suite/test.name (eg:

159

# lumpy-release/R28-3947.0.0/dummy/dummy_Fail.Error), so we

160

# intersect the build/suite/ string we already have with the

161

# test_name in the view. The name of the aborted test is

162

# instrumental in generating the job_name, which is used in

163

# creating a link to the logs.

164

# 3. Experimental tests, Aborts and Failures: The test view

165

# corresponding to the afe_job_id of the suite job contains

166

# stubs for each test in this suite. The names of these jobs

167

# will contain an experimental prefix if they were aborted;

168

# If they failed the same names will not contain an experimental

169

# prefix but we would have hashed the name with a prefix. Eg:

170

# Test name = experimental_pass

171

# keyval contains: hash(experimental_pass)

172

# Fail/Pass view['test_name'] = pass

173

# Abort view['test_name'] = board/build/experimental_pass

174

# So we need to add the experimental prefix only if the test was

175

# aborted. Everything else is the same as [2].

176

# 4. Experimental server job failures: eg verify passes, something on

177

# the DUT crashes, the experimental server job fails to ssh in. We

178

# need to manually set the experimental flag in this case because the

179

# server job name isn't recorded in the keyvals. For a normal suite

180

# the views will contain: SERVER_JOB, try_new_image, test_name. i.e

181

# the test server jobs should be handled transparently and only the

182

# suite server job should appear in the view. If a server job fails

183

# (for an experimental test or otherwise) we insert the server job

184

# entry into the tko database instead. Put more generally we insert

185

# the last stage we knew about into the db record associated with

186

# that suites afe_job_id. This could lead to a view containing:

187

# SERVER_JOB, try_new_image,

188

# lumpy-release/R28-4008.0.0/bvt/experimental_pass_SERVER_JOB.

Chris Masone

2012-08-29 10:37:08 -0700

[diff] [blame]

189

if view['test_name'].startswith(Reimager.JOB_NAME):

190

std_job_name = Reimager.JOB_NAME

Chris Masone

2012-08-29 10:37:08 -0700

[diff] [blame]

191

else:

beeps

2013-04-10 20:56:09 -0700

[diff] [blame]

192

# Neither of these operations will stomp on a pristine string.

193

test_name = view['test_name'].replace('%s/%s/'% (build, suite), '')

194

std_job_name = test_name.split('.')[0]

Chris Masone

2012-05-21 16:08:39 -0700

[diff] [blame]

195

beeps

2013-04-10 20:56:09 -0700

[diff] [blame]

196

if (job_status.view_is_for_infrastructure_fail(view) and

197

std_job_name.startswith(constants.EXPERIMENTAL_PREFIX)):

198

experimental = True

199

200

if std_job_name.startswith(constants.EXPERIMENTAL_PREFIX):

201

exp_job_name = std_job_name

202

else:

203

exp_job_name = constants.EXPERIMENTAL_PREFIX + std_job_name

Chris Masone

2012-08-29 10:37:08 -0700

[diff] [blame]

204

std_job_hash = hashlib.md5(std_job_name).hexdigest()

205

exp_job_hash = hashlib.md5(exp_job_name).hexdigest()

Dan Shi

d152180

2013-05-24 13:08:37 -0700

[diff] [blame]

206

raw_test_name_hash = hashlib.md5(raw_test_name).hexdigest()

Chris Masone

2012-05-21 16:08:39 -0700

[diff] [blame]

207

beeps

2013-04-10 20:56:09 -0700

[diff] [blame]

208

# In the experimental abort case both these clauses can evaluate

209

# to True.

Chris Masone

2012-05-21 16:08:39 -0700

[diff] [blame]

210

if std_job_hash in view['job_keyvals']:

211

job_name = view['job_keyvals'][std_job_hash]

beeps

2013-04-10 20:56:09 -0700

[diff] [blame]

212

if exp_job_hash in view['job_keyvals']:

Chris Masone

2012-05-21 16:08:39 -0700

[diff] [blame]

213

experimental = True

214

job_name = view['job_keyvals'][exp_job_hash]

Dan Shi

d152180

2013-05-24 13:08:37 -0700

[diff] [blame]

215

if raw_test_name_hash in view['job_keyvals']:

216

job_name = view['job_keyvals'][raw_test_name_hash]

Chris Masone

2012-05-21 16:08:39 -0700

[diff] [blame]

217

beeps

2013-04-10 20:56:09 -0700

[diff] [blame]

218

# If the name being returned is the test name it needs to include the tag

219

return job_name, experimental, std_job_name if not test_name else test_name

Zdenek Behan

2012-04-06 17:20:01 +0200

[diff] [blame]

220

221

Craig Harrison

2012-08-23 16:48:49 -0700

[diff] [blame]

222

class LogLink(object):

223

"""

beeps

2013-04-26 19:12:46 -0700

[diff] [blame]

224

Link to a log. Since we create bugs for failures the

225

link text will include a link to the bug too.

Craig Harrison

2012-08-23 16:48:49 -0700

[diff] [blame]

226

227

@var anchor: the link text.

228

@var url: the link url.

229

"""

beeps

2013-04-26 19:12:46 -0700

[diff] [blame]

230

def __init__(self, anchor, job_string, bug_id=None):

Craig Harrison

2012-08-23 16:48:49 -0700

[diff] [blame]

231

"""

232

Initialize the LogLink by generating the log URL.

233

234

@param anchor: the link text.

235

@param job_string: the job whose logs we'd like to link to.

beeps

2013-04-26 19:12:46 -0700

[diff] [blame]

236

@param bug_id: the bug id, if one was filed for this failure.

Craig Harrison

2012-08-23 16:48:49 -0700

[diff] [blame]

237

"""

238

self.anchor = anchor

239

host = CONFIG.get_config_value('SERVER', 'hostname', type=str)

240

pattern = CONFIG.get_config_value('CROS', 'log_url_pattern', type=str)

241

self.url = pattern % (host, job_string)

beeps

2013-04-26 19:12:46 -0700

[diff] [blame]

self.bug_id = bug_id

beeps

2013-05-08 21:49:48 -0700

[diff] [blame]

245

def GenerateBugLink(self):

beeps

2013-04-26 19:12:46 -0700

[diff] [blame]

246

"""

247

@return: A plain text link to the bug filed, if any.

248

"""

249

if self.bug_id:

250

crbug_url = CONFIG.get_config_value('BUG_REPORTING', 'tracker_url')

beeps

c4fb147

2013-05-08 21:49:48 -0700

[diff] [blame]

251

return '%s%s'% (crbug_url, self.bug_id)

beeps

2013-04-26 19:12:46 -0700

[diff] [blame]

252

return ''

Craig Harrison

2012-08-23 16:48:49 -0700

[diff] [blame]

253

254

255

def GenerateBuildbotLink(self):

256

"""

257

Generate a link to the job's logs, for consumption by buildbot.

258

259

@return A link formatted for the buildbot log annotator.

260

"""

beeps

c4fb147

2013-05-08 21:49:48 -0700

[diff] [blame]

261

bug_link = self.GenerateBugLink()

262

url = bug_link if bug_link else self.url

263

return "@@@STEP_LINK@%s@%s@@@"% (self.anchor.strip(), url)

Craig Harrison

2012-08-23 16:48:49 -0700

[diff] [blame]

264

265

Craig Harrison

2012-08-31 10:29:33 -0700

[diff] [blame]

266

def GenerateTextLink(self):

Craig Harrison

2012-08-23 16:48:49 -0700

[diff] [blame]

267

"""

Craig Harrison

2012-08-31 10:29:33 -0700

[diff] [blame]

268

Generate a link to the job's logs, for consumption by a human.

Craig Harrison

2012-08-23 16:48:49 -0700

[diff] [blame]

269

Craig Harrison

2012-08-31 10:29:33 -0700

[diff] [blame]

270

@return A link formatted for human readability.

Craig Harrison

2012-08-23 16:48:49 -0700

[diff] [blame]

271

"""

Craig Harrison

2012-08-31 10:29:33 -0700

[diff] [blame]

272

return "%s%s" % (self.anchor, self.url)

Craig Harrison

2012-08-23 16:48:49 -0700

[diff] [blame]

273

274

Chris Masone

2012-04-30 14:35:28 -0700

[diff] [blame]

275

class Timings(object):

276

"""Timings for important events during a suite.

277

278

All timestamps are datetime.datetime objects.

279

280

@var suite_start_time: the time the suite started.

281

@var reimage_start_time: the time we started reimaging devices.

282

@var reimage_end_time: the time we finished reimaging devices.

283

@var tests_start_time: the time the first test started running.

284

"""

beeps

2013-03-22 13:15:49 -0700

[diff] [blame]

285

286

# Recorded in create_suite_job as we're staging the components of a

287

# build on the devserver. Only the artifacts necessary to start

288

# installing images onto DUT's will be staged when we record

289

# payload_end_time, the remaining artifacts are downloaded after we kick

290

# off the reimaging job, at which point we record artifact_end_time.

Chris Masone

2012-05-01 16:52:31 -0700

[diff] [blame]

291

download_start_time = None

292

payload_end_time = None

293

artifact_end_time = None

beeps

2013-03-22 13:15:49 -0700

[diff] [blame]

294

295

# The test_start_time, but taken off the view that corresponds to the

296

# suite instead of an individual test.

Chris Masone

2012-04-30 14:35:28 -0700

[diff] [blame]

297

suite_start_time = None

beeps

2013-03-22 13:15:49 -0700

[diff] [blame]

298

299

# reimaging_times is a dictionary mapping a host name to it's start and

300

# end reimage timings. RecordTiming is invoked with test views, that

301

# correspond to tests in a suite; these tests might've run across

302

# different hosts. Each view that RecordTiming is invoked with creates a

303

# new entry in reimaging_times; When it's time to log reimage timings we

304

# iterate over this dict and create a reimaging_info string. This is a

305

# one time operation and only happens after all the TestViews in a suite

306

# are added to the reimaging_times dictionary.

307

# reimaging_times eg: {'hostname': (start_time, end_time)}

308

reimage_times = {}

309

310

# Earliest and Latest tests in the set of TestViews passed to us.

Chris Masone

2012-04-30 14:35:28 -0700

[diff] [blame]

311

tests_start_time = None

312

tests_end_time = None

313

314

beeps

2013-03-22 13:15:49 -0700

[diff] [blame]

315

def _GetDatetime(self, timing_string, timing_string_format):

316

"""

317

Formats the timing_string according to the timing_string_format.

318

319

@param timing_string: A datetime timing string.

320

@param timing_string_format: Format of the time in timing_string.

321

@return: A datetime object for the given timing string.

322

"""

323

try:

324

return datetime.strptime(timing_string, timing_string_format)

except TypeError:

return None

Chris Masone

2012-08-29 10:37:08 -0700

[diff] [blame]

329

def RecordTiming(self, view):

330

"""Given a test report view, extract and record pertinent time info.

Chris Masone

2012-04-30 14:35:28 -0700

[diff] [blame]

331

332

get_detailed_test_views() returns a list of entries that provide

333

info about the various parts of a suite run. This method can take

334

any one of these entries and look up timestamp info we might want

335

and record it.

336

Chris Masone

2012-09-06 16:00:07 -0700

[diff] [blame]

337

If timestamps are unavailable, datetime.datetime.min/max will be used.

338

Chris Masone

2012-08-29 10:37:08 -0700

[diff] [blame]

339

@param view: a view dict, as returned by get_detailed_test_views().

Chris Masone

2012-04-30 14:35:28 -0700

[diff] [blame]

340

"""

Chris Masone

2012-09-06 16:00:07 -0700

[diff] [blame]

341

start_candidate = datetime.min

342

end_candidate = datetime.max

343

if view['test_started_time']:

344

start_candidate = datetime.strptime(view['test_started_time'],

345

job_status.TIME_FMT)

346

if view['test_finished_time']:

347

end_candidate = datetime.strptime(view['test_finished_time'],

348

job_status.TIME_FMT)

349

Chris Masone

2012-08-29 10:37:08 -0700

[diff] [blame]

350

if job_status.view_is_for_suite_prep(view):

Chris Masone

2012-04-30 14:35:28 -0700

[diff] [blame]

351

self.suite_start_time = start_candidate

Chris Masone

2012-08-29 10:37:08 -0700

[diff] [blame]

352

elif view['test_name'].startswith(Reimager.JOB_NAME):

353

if '-' in view['test_name']:

354

hostname = view['test_name'].split('-', 1)[1]

Chris Masone

f76ebba

2012-06-28 14:56:22 -0700

[diff] [blame]

355

else:

356

hostname = ''

Chris Masone

2012-06-28 08:45:30 -0700

[diff] [blame]

357

self.reimage_times[hostname] = (start_candidate, end_candidate)

Chris Masone

2012-04-30 14:35:28 -0700

[diff] [blame]

358

else:

359

self._UpdateFirstTestStartTime(start_candidate)

360

self._UpdateLastTestEndTime(end_candidate)

Chris Masone

2012-08-29 10:37:08 -0700

[diff] [blame]

361

if 'job_keyvals' in view:

362

keyvals = view['job_keyvals']

beeps

2013-03-22 13:15:49 -0700

[diff] [blame]

363

self.download_start_time = self._GetDatetime(

364

keyvals.get(constants.DOWNLOAD_STARTED_TIME),

365

job_status.TIME_FMT)

366

367

self.payload_end_time = self._GetDatetime(

368

keyvals.get(constants.PAYLOAD_FINISHED_TIME),

369

job_status.TIME_FMT)

370

371

self.artifact_end_time = self._GetDatetime(

372

keyvals.get(constants.ARTIFACT_FINISHED_TIME),

373

job_status.TIME_FMT)

Chris Masone

44e4d6c

2012-08-15 14:25:53 -0700

[diff] [blame]

374

Chris Masone

2012-04-30 14:35:28 -0700

[diff] [blame]

375

376

def _UpdateFirstTestStartTime(self, candidate):

377

"""Update self.tests_start_time, iff candidate is an earlier time.

378

379

@param candidate: a datetime.datetime object.

380

"""

381

if not self.tests_start_time or candidate < self.tests_start_time:

382

self.tests_start_time = candidate

383

384

385

def _UpdateLastTestEndTime(self, candidate):

386

"""Update self.tests_end_time, iff candidate is a later time.

387

388

@param candidate: a datetime.datetime object.

389

"""

390

if not self.tests_end_time or candidate > self.tests_end_time:

391

self.tests_end_time = candidate

392

393

394

def __str__(self):

Chris Masone

2012-06-28 08:45:30 -0700

[diff] [blame]

395

reimaging_info = ''

396

for host, (start, end) in self.reimage_times.iteritems():

397

reimaging_info += ('Reimaging %s started at %s\n'

398

'Reimaging %s ended at %s\n' % (host, start,

399

host, end))

Chris Masone

2012-04-30 14:35:28 -0700

[diff] [blame]

400

return ('\n'

401

'Suite timings:\n'

Chris Masone

2012-05-01 16:52:31 -0700

[diff] [blame]

402

'Downloads started at %s\n'

403

'Payload downloads ended at %s\n'

Chris Masone

2012-04-30 14:35:28 -0700

[diff] [blame]

404

'Suite started at %s\n'

Chris Masone

2012-06-28 08:45:30 -0700

[diff] [blame]

405

'%s'

Chris Masone

2012-05-01 16:52:31 -0700

[diff] [blame]

406

'Artifact downloads ended (at latest) at %s\n'

Chris Masone

2012-04-30 14:35:28 -0700

[diff] [blame]

407

'Testing started at %s\n'

Chris Masone

2012-05-01 16:52:31 -0700

[diff] [blame]

408

'Testing ended at %s\n' % (self.download_start_time,

409

self.payload_end_time,

410

self.suite_start_time,

Chris Masone

2012-06-28 08:45:30 -0700

[diff] [blame]

411

reimaging_info,

Chris Masone

2012-05-01 16:52:31 -0700

[diff] [blame]

412

self.artifact_end_time,

Chris Masone

2012-04-30 14:35:28 -0700

[diff] [blame]

413

self.tests_start_time,

414

self.tests_end_time))

415

416

beeps

2013-03-22 13:15:49 -0700

[diff] [blame]

417

def _GetDataKeyForStatsd(self, suite, build, board):

418

"""

419

Constructs the key used for logging statsd timing data.

420

421

@param suite: scheduled suite that we want to record the results of.

422

@param build: The build string. This string should have a consistent

423

format eg: x86-mario-release/R26-3570.0.0. If the format of this

424

string changes such that we can't determine build_type or branch

425

we give up and use the parametes we're sure of instead (suite,

426

board). eg:

427

1. build = x86-alex-pgo-release/R26-3570.0.0

428

branch = 26

429

build_type = pgo-release

430

2. build = lumpy-paladin/R28-3993.0.0-rc5

431

branch = 28

432

build_type = paladin

433

@param board: The board that this suite ran on.

434

@return: The key used to log timing information in statsd.

435

"""

436

try:

437

_board, build_type, branch = base_event.ParseBuildName(build)[:3]

438

except base_event.ParseBuildNameException as e:

439

logging.error(str(e))

440

branch = 'Unknown'

441

build_type = 'Unknown'

442

else:

443

embedded_str = re.search(r'x86-\w+-(.*)', _board)

444

if embedded_str:

445

build_type = embedded_str.group(1) + '-' + build_type

data_key_dict = {

'board': board,

'branch': branch,

'build_type': build_type,

451

'suite': suite,

452

}

453

return ('run_suite.%(board)s.%(build_type)s.%(branch)s.%(suite)s'

% data_key_dict)

def SendResultsToStatsd(self, suite, build, board):

458

"""

459

Sends data to statsd.

460

461

1. Makes a data_key of the form: run_suite.$board.$branch.$suite

462

eg: stats/gauges/<hostname>/run_suite/<board>/<branch>/<suite>/

463

2. Computes timings for several start and end event pairs.

464

3. Computes timings for reimage events for all hosts.

465

4. Sends all timing values to statsd.

466

467

@param suite: scheduled suite that we want to record the results of.

468

@param build: the build that this suite ran on.

469

eg: 'lumpy-release/R26-3570.0.0'

470

@param board: the board that this suite ran on.

471

"""

472

if sys.version_info < (2, 7):

473

logging.error('Sending run_suite perf data to statsd requires'

474

'python 2.7 or greater.')

475

return

476

477

data_key = self._GetDataKeyForStatsd(suite, build, board)

478

479

# Since we don't want to try subtracting corrupted datetime values

480

# we catch TypeErrors in _GetDatetime and insert None instead. This

481

# means that even if, say, keyvals.get(constants.ARTIFACT_FINISHED_TIME)

482

# returns a corrupt value the member artifact_end_time is set to None.

483

if self.download_start_time:

484

if self.payload_end_time:

485

stats.Timer(data_key).send('payload_download_time',

486

(self.payload_end_time -

487

self.download_start_time).total_seconds())

488

489

if self.artifact_end_time:

490

stats.Timer(data_key).send('artifact_download_time',

491

(self.artifact_end_time -

492

self.download_start_time).total_seconds())

493

494

if self.tests_end_time:

495

if self.suite_start_time:

496

stats.Timer(data_key).send('suite_run_time',

497

(self.tests_end_time -

498

self.suite_start_time).total_seconds())

499

500

if self.tests_start_time:

501

stats.Timer(data_key).send('tests_run_time',

502

(self.tests_end_time -

503

self.tests_start_time).total_seconds())

504

505

# The reimage times are the start and end times of the reimage

506

# job (taken directly from the view), converted to datetime objects.

507

# If the reimage job never ran we won't enter the loop and if it didn't

508

# finish for some reason the start and end times are still initialized

509

# to valid datetimes.

510

for host, (start, end) in self.reimage_times.iteritems():

511

if start and end:

512

stats.Timer(data_key).send(host.replace('.', '_'),

513

(end - start).total_seconds())

514

515

beeps

2013-04-10 20:56:09 -0700

[diff] [blame]

516

def _full_test_name(job_id, view, build, suite):

517

"""

518

Generates the full test name for printing to logs and generating a link to

519

the results.

Craig Harrison

2012-08-31 10:29:33 -0700

[diff] [blame]

520

521

@param job_id: the job id.

522

@param view: the view for which we are generating the name.

beeps

2013-04-10 20:56:09 -0700

[diff] [blame]

523

@param build: the build for this invocation of run_suite.

524

@param suite: the suite for this invocation of run_suite.

Craig Harrison

2012-08-31 10:29:33 -0700

[diff] [blame]

525

@return The test name, possibly with a descriptive prefix appended.

526

"""

beeps

2013-04-10 20:56:09 -0700

[diff] [blame]

527

experimental, test_name = get_view_info(job_id, view, build, suite)[1:]

528

529

# If an experimental test is aborted get_view_info returns a name which

530

# includes the prefix.

531

prefix = constants.EXPERIMENTAL_PREFIX if (experimental and

532

not test_name.startswith(constants.EXPERIMENTAL_PREFIX)) else ''

533

return prefix + test_name

Craig Harrison

2012-08-31 10:29:33 -0700

[diff] [blame]

534

535

Chris Masone

2012-02-14 14:18:01 -0800

[diff] [blame]

536

def main():

Aviv Keshet

2013-03-21 16:38:31 -0700

[diff] [blame]

537

"""

538

Entry point for run_suite script.

539

"""

Chris Masone

2012-02-14 14:18:01 -0800

[diff] [blame]

540

parser, options, args = parse_options()

Chris Masone

3a85064

2012-07-11 11:11:18 -0700

[diff] [blame]

541

log_name = 'run_suite-default.log'

Chris Masone

2012-04-11 11:36:48 -0700

[diff] [blame]

542

if not options.mock_job_id:

Zdenek Behan

77290c3

2012-06-26 17:39:47 +0200

[diff] [blame]

543

if args:

544

print 'Unknown arguments: ' + str(args)

545

parser.print_help()

546

return

547

if not options.build:

548

print 'Need to specify which build to use'

549

parser.print_help()

550

return

551

if not options.board:

552

print 'Need to specify board'

parser.print_help()

return

if not options.name:

print 'Need to specify suite name'

Chris Masone

2012-04-11 11:36:48 -0700

[diff] [blame]

557

parser.print_help()

558

return

Chris Masone

3a85064

2012-07-11 11:11:18 -0700

[diff] [blame]

559

# convert build name from containing / to containing only _

560

log_name = 'run_suite-%s.log' % options.build.replace('/', '_')

561

log_dir = os.path.join(common.autotest_dir, 'logs')

562

if os.path.exists(log_dir):

563

log_name = os.path.join(log_dir, log_name)

Alex Miller

8e75d0d

2012-07-31 15:13:32 -0700

[diff] [blame]

564

if options.num is not None and options.num < 1:

565

print 'Number of machines must be more than 0, if specified.'

566

parser.print_help()

567

return

Alex Miller

2012-10-03 12:56:02 -0700

[diff] [blame]

568

if options.no_wait != 'True' and options.no_wait != 'False':

569

print 'Please specify "True" or "False" for --no_wait.'

570

parser.print_help()

571

return

572

if options.file_bugs != 'True' and options.file_bugs != 'False':

573

print 'Please specify "True" or "False" for --file_bugs.'

574

parser.print_help()

575

return

Scott Zawalski

2012-07-02 18:45:07 -0400

[diff] [blame]

576

setup_logging(logfile=log_name)

Chris Masone

dfa0beba

2012-03-19 11:41:47 -0700

[diff] [blame]

577

Simran Basi

2012-12-07 16:37:09 -0800

[diff] [blame]

578

try:

Dan Shi

a02181f

2013-01-29 14:03:32 -0800

[diff] [blame]

579

if not options.bypass_labstatus:

580

utils.check_lab_status(options.board)

Simran Basi

41bfae4

2013-01-09 10:50:47 -0800

[diff] [blame]

581

except (error.LabIsDownException, error.BoardIsDisabledException) as e:

582

logging.debug('Error Message: %s', e)

Simran Basi

2012-12-07 16:37:09 -0800

[diff] [blame]

583

print str(e)

Aviv Keshet

2013-03-21 16:38:31 -0700

[diff] [blame]

584

return RETURN_CODES.WARNING

Simran Basi

2012-12-07 16:37:09 -0800

[diff] [blame]

585

Chris Masone

8ac6671

2012-02-15 14:21:02 -0800

[diff] [blame]

586

afe = frontend_wrappers.RetryingAFE(timeout_min=options.timeout_min,

587

delay_sec=options.delay_sec)

Chris Masone

359c0fd

2012-03-13 15:18:59 -0700

[diff] [blame]

588

Alex Miller

2012-10-03 12:56:02 -0700

[diff] [blame]

589

wait = (options.no_wait == 'False')

590

file_bugs = (options.file_bugs == 'True')

Chris Masone

2012-04-11 11:36:48 -0700

[diff] [blame]

591

if options.mock_job_id:

592

job_id = int(options.mock_job_id)

593

else:

Alex Miller

8e75d0d

2012-07-31 15:13:32 -0700

[diff] [blame]

594

job_id = afe.run('create_suite_job', suite_name=options.name,

595

board=options.board, build=options.build,

Alex Miller

f43d0eb

2012-10-01 13:43:13 -0700

[diff] [blame]

596

check_hosts=wait, pool=options.pool, num=options.num,

597

file_bugs=file_bugs)

Chris Masone

8ac6671

2012-02-15 14:21:02 -0800

[diff] [blame]

598

TKO = frontend_wrappers.RetryingTKO(timeout_min=options.timeout_min,

599

delay_sec=options.delay_sec)

Scott Zawalski

2012-07-02 18:45:07 -0400

[diff] [blame]

600

logging.info('Started suite job: %s', job_id)

Simran Basi

2012-12-07 16:37:09 -0800

[diff] [blame]

601

Aviv Keshet

2013-03-21 16:38:31 -0700

[diff] [blame]

602

code = RETURN_CODES.OK

Chris Masone

359c0fd

2012-03-13 15:18:59 -0700

[diff] [blame]

603

while wait and True:

Chris Masone

2012-02-14 14:18:01 -0800

[diff] [blame]

604

if not afe.get_jobs(id=job_id, finished=True):

605

time.sleep(1)

606

continue

Scott Zawalski

2012-03-06 09:21:44 -0500

[diff] [blame]

607

views = TKO.run('get_detailed_test_views', afe_job_id=job_id)

beeps

2013-04-10 20:56:09 -0700

[diff] [blame]

608

width = max((len(_full_test_name(job_id, view, options.build,

609

options.name)) for view in views)) + 3

Scott Zawalski

2012-03-06 09:21:44 -0500

[diff] [blame]

610

Chris Masone

2012-08-29 10:37:08 -0700

[diff] [blame]

611

relevant_views = filter(job_status.view_is_relevant, views)

Scott Zawalski

2012-03-06 09:21:44 -0500

[diff] [blame]

612

if not relevant_views:

Zdenek Behan

2012-04-06 17:20:01 +0200

[diff] [blame]

613

# The main suite job most likely failed in SERVER_JOB.

614

relevant_views = views

Scott Zawalski

2012-03-06 09:21:44 -0500

[diff] [blame]

615

Chris Masone

2012-04-30 14:35:28 -0700

[diff] [blame]

616

timings = Timings()

Craig Harrison

2012-08-23 16:48:49 -0700

[diff] [blame]

617

web_links = []

618

buildbot_links = []

Chris Masone

2012-08-29 10:37:08 -0700

[diff] [blame]

619

for view in relevant_views:

620

timings.RecordTiming(view)

621

if job_status.view_is_for_suite_prep(view):

622

view['test_name'] = 'Suite prep'

Chris Masone

3a85064

2012-07-11 11:11:18 -0700

[diff] [blame]

623

beeps

2013-04-10 20:56:09 -0700

[diff] [blame]

624

job_name, experimental = get_view_info(job_id, view, options.build,

625

options.name)[:2]

626

test_view = _full_test_name(job_id, view, options.build,

627

options.name).ljust(width)

Chris Masone

2012-08-29 10:37:08 -0700

[diff] [blame]

628

logging.info("%s%s", test_view, get_pretty_status(view['status']))

beeps

2013-04-26 19:12:46 -0700

[diff] [blame]

629

630

# It's important that we:

631

# a. Use the test name in the view and not the name returned by

632

# full_test_name, as this was the name inserted after the test

633

# ran. Eg: for an aborted test full_test_name will return

634

# experimental_testname but the view and the bug_id keyval will

635

# contain /bulid/suite/experimental_testname.

636

# b. Apply the inverse function that was applied to record the bug

637

# id as a keyval in dynamic_suite, by replacing all '/' with '_'.

638

bug_id = view['job_keyvals'].get(

639

view['test_name'].replace('/', '_')+constants.BUG_KEYVAL)

640

641

link = LogLink(test_view, job_name, bug_id)

Craig Harrison

2012-08-23 16:48:49 -0700

[diff] [blame]

642

web_links.append(link)

Chris Masone

2012-08-29 10:37:08 -0700

[diff] [blame]

643

644

if view['status'] != 'GOOD':

645

logging.info("%s %s: %s", test_view, view['status'],

646

view['reason'])

Craig Harrison

2012-08-23 16:48:49 -0700

[diff] [blame]

647

# Don't show links on the buildbot waterfall for tests with

648

# GOOD status.

649

buildbot_links.append(link)

Chris Masone

8906ab1

2012-07-23 15:37:56 -0700

[diff] [blame]

650

if view['status'] == 'TEST_NA':

651

# Didn't run; nothing to do here!

652

continue

Aviv Keshet

2013-03-21 16:38:31 -0700

[diff] [blame]

653

if code == RETURN_CODES.ERROR:

Zdenek Behan

2012-04-06 17:20:01 +0200

[diff] [blame]

654

# Failed already, no need to worry further.

655

continue

beeps

2013-04-10 20:56:09 -0700

[diff] [blame]

656

657

# Any non experimental test that has a status other than WARN

658

# or GOOD will result in the tree closing. Experimental tests

659

# will not close the tree, even if they have been aborted.

Chris Masone

2012-08-29 10:37:08 -0700

[diff] [blame]

660

if (view['status'] == 'WARN' or

661

(is_fail_status(view['status']) and experimental)):

Zdenek Behan

2012-04-06 17:20:01 +0200

[diff] [blame]

662

# Failures that produce a warning. Either a test with WARN

663

# status or any experimental test failure.

Aviv Keshet

2013-03-21 16:38:31 -0700

[diff] [blame]

664

code = RETURN_CODES.WARNING

Chris Masone

5374c67

2012-03-05 15:11:39 -0800

[diff] [blame]

665

else:

Aviv Keshet

2013-03-21 16:38:31 -0700

[diff] [blame]

666

code = RETURN_CODES.ERROR

667

beeps

2013-03-22 13:15:49 -0700

[diff] [blame]

668

timings.SendResultsToStatsd(options.name, options.build, options.board)

Scott Zawalski

2012-07-02 18:45:07 -0400

[diff] [blame]

669

logging.info(timings)

Craig Harrison

2012-08-23 16:48:49 -0700

[diff] [blame]

670

logging.info('\n'

671

'Links to test logs:')

672

for link in web_links:

Craig Harrison

2012-08-31 10:29:33 -0700

[diff] [blame]

673

logging.info(link.GenerateTextLink())

Aviv Keshet

2013-03-21 16:38:31 -0700

[diff] [blame]

674

675

try:

676

returnmessage = RETURN_CODES.get_string(code)

677

except ValueError:

678

returnmessage = 'UNKNOWN'

679

logging.info('\n'

680

'Will return from run_suite with status: %s',

681

returnmessage)

682

Craig Harrison

2012-08-23 16:48:49 -0700

[diff] [blame]

683

logging.info('\n'

684

'Output below this line is for buildbot consumption:')

685

for link in buildbot_links:

686

logging.info(link.GenerateBuildbotLink())

Chris Masone

2012-02-14 14:18:01 -0800

[diff] [blame]

687

break

Chris Masone

d5939fe

2012-03-13 10:11:06 -0700

[diff] [blame]

688

else:

Scott Zawalski

2012-07-02 18:45:07 -0400

[diff] [blame]

689

logging.info('Created suite job: %r', job_id)

Craig Harrison

2012-08-23 16:48:49 -0700

[diff] [blame]

690

link = LogLink(options.name, '%s-%s' % (job_id, getpass.getuser()))

691

logging.info(link.GenerateBuildbotLink())

Scott Zawalski

2012-07-02 18:45:07 -0400

[diff] [blame]

692

logging.info('--no_wait specified; Exiting.')

Chris Masone