Blame - site_utils/run_suite.py - platform/external/autotest

2012-02-14 14:18:01 -0800

[diff] [blame]

#!/usr/bin/python

#

# Use of this source code is governed by a BSD-style license that can be

5

# found in the LICENSE file.

6

7

"""Tool for running suites of tests and waiting for completion.

8

9

The desired test suite will be scheduled with autotest, and then

10

this tool will block until the job is complete, printing a summary

11

at the end. Error conditions result in exceptions.

12

13

This is intended for use only with Chrome OS test suits that leverage the

14

dynamic suite infrastructure in server/cros/dynamic_suite.py.

15

"""

16

beeps

2013-03-22 13:15:49 -0700

[diff] [blame]

17

import getpass, hashlib, logging, optparse, os, re, sys, time

Chris Masone

2012-09-06 16:00:07 -0700

[diff] [blame]

18

from datetime import datetime

19

Chris Masone

2012-02-14 14:18:01 -0800

[diff] [blame]

20

import common

Chris Masone

2012-09-06 16:00:07 -0700

[diff] [blame]

21

Aviv Keshet

2013-03-21 16:38:31 -0700

[diff] [blame]

22

from autotest_lib.client.common_lib import global_config, error, utils, enum

Chris Masone

44e4d6c

2012-08-15 14:25:53 -0700

[diff] [blame]

23

from autotest_lib.server.cros.dynamic_suite import constants

Chris Masone

b493555

2012-08-14 12:05:54 -0700

[diff] [blame]

24

from autotest_lib.server.cros.dynamic_suite import frontend_wrappers

25

from autotest_lib.server.cros.dynamic_suite import job_status

Chris Masone

2012-08-29 10:37:08 -0700

[diff] [blame]

26

from autotest_lib.server.cros.dynamic_suite.reimager import Reimager

beeps

2013-03-22 13:15:49 -0700

[diff] [blame]

27

from autotest_lib.site_utils.suite_scheduler import base_event

28

from autotest_lib.site_utils.graphite import stats

Chris Masone

2012-02-14 14:18:01 -0800

[diff] [blame]

29

Chris Masone

1120cdf

2012-02-27 17:35:07 -0800

[diff] [blame]

30

CONFIG = global_config.global_config

31

Simran Basi

2012-12-07 16:37:09 -0800

[diff] [blame]

32

# Return code that will be sent back to autotest_rpc_server.py

Aviv Keshet

2013-03-21 16:38:31 -0700

[diff] [blame]

33

RETURN_CODES = enum.Enum('OK', 'ERROR', 'WARNING')

Simran Basi

2012-12-07 16:37:09 -0800

[diff] [blame]

34

Chris Masone

dfa0beba

2012-03-19 11:41:47 -0700

[diff] [blame]

35

Scott Zawalski

2012-07-02 18:45:07 -0400

[diff] [blame]

36

def setup_logging(logfile=None):

37

"""Setup basic logging with all logging info stripped.

38

39

Calls to logging will only show the message. No severity is logged.

40

41

@param logfile: If specified dump output to a file as well.

42

"""

43

screen_handler = logging.StreamHandler()

44

screen_handler.setFormatter(logging.Formatter('%(message)s'))

45

logging.getLogger().addHandler(screen_handler)

46

logging.getLogger().setLevel(logging.INFO)

47

if logfile:

48

file_handler = logging.FileHandler(logfile)

49

file_handler.setLevel(logging.DEBUG)

50

logging.getLogger().addHandler(file_handler)

Chris Masone

dfa0beba

2012-03-19 11:41:47 -0700

[diff] [blame]

51

52

Chris Masone

2012-02-14 14:18:01 -0800

[diff] [blame]

53

def parse_options():

Aviv Keshet

2013-03-21 16:38:31 -0700

[diff] [blame]

54

#pylint: disable-msg=C0111

Zdenek Behan

77290c3

2012-06-26 17:39:47 +0200

[diff] [blame]

55

usage = "usage: %prog [options]"

Chris Masone

2012-02-14 14:18:01 -0800

[diff] [blame]

56

parser = optparse.OptionParser(usage=usage)

57

parser.add_option("-b", "--board", dest="board")

58

parser.add_option("-i", "--build", dest="build")

Chris Masone

359c0fd

2012-03-13 15:18:59 -0700

[diff] [blame]

59

# This should just be a boolean flag, but the autotest "proxy" code

60

# can't handle flags that don't take arguments.

Alex Miller

2012-10-03 12:56:02 -0700

[diff] [blame]

61

parser.add_option("-n", "--no_wait", dest="no_wait", default="False",

62

help='Must pass "True" or "False" if used.')

Scott Zawalski

6565017

2012-02-16 11:48:26 -0500

[diff] [blame]

63

parser.add_option("-p", "--pool", dest="pool", default=None)

Chris Masone

2012-02-14 14:18:01 -0800

[diff] [blame]

64

parser.add_option("-s", "--suite_name", dest="name")

Chris Masone

8ac6671

2012-02-15 14:21:02 -0800

[diff] [blame]

65

parser.add_option("-t", "--timeout_min", dest="timeout_min", default=30)

66

parser.add_option("-d", "--delay_sec", dest="delay_sec", default=10)

Chris Masone

2012-04-11 11:36:48 -0700

[diff] [blame]

67

parser.add_option("-m", "--mock_job_id", dest="mock_job_id",

68

help="Skips running suite; creates report for given ID.")

Alex Miller

05a2fff

2012-09-10 10:14:34 -0700

[diff] [blame]

69

parser.add_option("-u", "--num", dest="num", type="int", default=None,

Chris Masone

8906ab1

2012-07-23 15:37:56 -0700

[diff] [blame]

70

help="Run on at most NUM machines.")

Alex Miller

f43d0eb

2012-10-01 13:43:13 -0700

[diff] [blame]

71

# Same boolean flag issue applies here.

Alex Miller

2012-10-03 12:56:02 -0700

[diff] [blame]

72

parser.add_option("-f", "--file_bugs", dest="file_bugs", default='False',

73

help='File bugs on test failures. Must pass "True" or '

74

'"False" if used.')

Dan Shi

a02181f

2013-01-29 14:03:32 -0800

[diff] [blame]

75

parser.add_option("-l", "--bypass_labstatus", dest="bypass_labstatus",

76

action="store_true", help='Bypass lab status check.')

Alex Miller

f43d0eb

2012-10-01 13:43:13 -0700

[diff] [blame]

77

Chris Masone

2012-02-14 14:18:01 -0800

[diff] [blame]

78

options, args = parser.parse_args()

79

return parser, options, args

80

81

82

def get_pretty_status(status):

Aviv Keshet

2013-03-21 16:38:31 -0700

[diff] [blame]

83

"""

84

Converts a status string into a pretty-for-printing string.

85

86

@param status: Status to convert.

87

88

@return: Returns pretty string.

GOOD -> [ PASSED ]

TEST_NA -> [ INFO ]

other -> [ FAILED ]

"""

Chris Masone

2012-02-14 14:18:01 -0800

[diff] [blame]

93

if status == 'GOOD':

94

return '[ PASSED ]'

Chris Masone

8906ab1

2012-07-23 15:37:56 -0700

[diff] [blame]

95

elif status == 'TEST_NA':

96

return '[ INFO ]'

Chris Masone

2012-02-14 14:18:01 -0800

[diff] [blame]

97

return '[ FAILED ]'

98

Zdenek Behan

2012-04-06 17:20:01 +0200

[diff] [blame]

99

def is_fail_status(status):

Aviv Keshet

2013-03-21 16:38:31 -0700

[diff] [blame]

100

"""

101

Check if the given status corresponds to a failure.

102

103

@param status: The status to check. (string)

104

105

@return: True if status is FAIL or ERROR. False otherwise.

106

"""

Zdenek Behan

2012-04-06 17:20:01 +0200

[diff] [blame]

107

# All the statuses tests can have when they fail.

beeps

2013-04-10 20:56:09 -0700

[diff] [blame]

108

if status in ['FAIL', 'ERROR', 'ABORT']:

Zdenek Behan

2012-04-06 17:20:01 +0200

[diff] [blame]

return True

return False

Chris Masone

2012-02-14 14:18:01 -0800

[diff] [blame]

112

beeps

2013-04-10 20:56:09 -0700

[diff] [blame]

113

def get_view_info(suite_job_id, view, build, suite):

Zdenek Behan

2012-04-06 17:20:01 +0200

[diff] [blame]

114

"""

115

Parse a view for the slave job name and job_id.

116

117

@param suite_job_id: The job id of our master suite job.

118

@param view: Test result view.

beeps

2013-04-10 20:56:09 -0700

[diff] [blame]

119

@param build: build passed in via the -b option.

120

eg: lumpy-release/R28-3947.0.0

121

@param suite: suite passed in via the -s option.

122

eg: dummy

123

@return A tuple job_name, experimental, name of the slave test run

124

described by view. eg:

125

experimental_dummy_Pass fails: (1130-owner, True, dummy_Pass)

126

experimental_dummy_Pass aborts: (1130-owner, True,

127

experimental_dummy_Pass)

128

dummy_Fail: (1130-owner, False, dummy_Fail.Error)

Zdenek Behan

2012-04-06 17:20:01 +0200

[diff] [blame]

129

"""

130

# By default, we are the main suite job since there is no

131

# keyval entry for our job_name.

132

job_name = '%s-%s' % (suite_job_id, getpass.getuser())

133

experimental = False

beeps

2013-04-10 20:56:09 -0700

[diff] [blame]

134

test_name = ''

Zdenek Behan

2012-04-06 17:20:01 +0200

[diff] [blame]

135

if 'job_keyvals' in view:

beeps

2013-04-10 20:56:09 -0700

[diff] [blame]

136

# For a test invocation like:

137

# NAME = "dummy_Fail"

138

# job.run_test('dummy_Fail', tag='Error', to_throw='TestError')

139

# we will:

140

# Record a keyval of the jobs test_name field: dummy_Fail

141

# On success, yield a tko status with the tagged name:

142

# dummy_Fail.Error

143

# On abort, yield a status (not from tko) with the job name:

144

# /build/suite/dummy_Fail.Error

145

# Note the last 2 options include the tag. The tag is seperated

146

# from the rest of the name with a '.'. The tag or test name can

147

# also include a /, and we must isolate the tag before we compare it

148

# to the hashed keyval. Based on this we have the following cases:

149

# 1. Regular test failure with or without a tag '.': std_job_name is

150

# set to the view test_name, after removing the tag.

151

# 2. Regular test Aborts: we know that dynamic_suite inserted a name

152

# like build/suite/test.name (eg:

153

# lumpy-release/R28-3947.0.0/dummy/dummy_Fail.Error), so we

154

# intersect the build/suite/ string we already have with the

155

# test_name in the view. The name of the aborted test is

156

# instrumental in generating the job_name, which is used in

157

# creating a link to the logs.

158

# 3. Experimental tests, Aborts and Failures: The test view

159

# corresponding to the afe_job_id of the suite job contains

160

# stubs for each test in this suite. The names of these jobs

161

# will contain an experimental prefix if they were aborted;

162

# If they failed the same names will not contain an experimental

163

# prefix but we would have hashed the name with a prefix. Eg:

164

# Test name = experimental_pass

165

# keyval contains: hash(experimental_pass)

166

# Fail/Pass view['test_name'] = pass

167

# Abort view['test_name'] = board/build/experimental_pass

168

# So we need to add the experimental prefix only if the test was

169

# aborted. Everything else is the same as [2].

170

# 4. Experimental server job failures: eg verify passes, something on

171

# the DUT crashes, the experimental server job fails to ssh in. We

172

# need to manually set the experimental flag in this case because the

173

# server job name isn't recorded in the keyvals. For a normal suite

174

# the views will contain: SERVER_JOB, try_new_image, test_name. i.e

175

# the test server jobs should be handled transparently and only the

176

# suite server job should appear in the view. If a server job fails

177

# (for an experimental test or otherwise) we insert the server job

178

# entry into the tko database instead. Put more generally we insert

179

# the last stage we knew about into the db record associated with

180

# that suites afe_job_id. This could lead to a view containing:

181

# SERVER_JOB, try_new_image,

182

# lumpy-release/R28-4008.0.0/bvt/experimental_pass_SERVER_JOB.

Chris Masone

2012-08-29 10:37:08 -0700

[diff] [blame]

183

if view['test_name'].startswith(Reimager.JOB_NAME):

184

std_job_name = Reimager.JOB_NAME

Chris Masone

2012-08-29 10:37:08 -0700

[diff] [blame]

185

else:

beeps

2013-04-10 20:56:09 -0700

[diff] [blame]

186

# Neither of these operations will stomp on a pristine string.

187

test_name = view['test_name'].replace('%s/%s/'% (build, suite), '')

188

std_job_name = test_name.split('.')[0]

Chris Masone

2012-05-21 16:08:39 -0700

[diff] [blame]

189

beeps

2013-04-10 20:56:09 -0700

[diff] [blame]

190

if (job_status.view_is_for_infrastructure_fail(view) and

191

std_job_name.startswith(constants.EXPERIMENTAL_PREFIX)):

192

experimental = True

193

194

if std_job_name.startswith(constants.EXPERIMENTAL_PREFIX):

195

exp_job_name = std_job_name

196

else:

197

exp_job_name = constants.EXPERIMENTAL_PREFIX + std_job_name

Chris Masone

2012-08-29 10:37:08 -0700

[diff] [blame]

198

std_job_hash = hashlib.md5(std_job_name).hexdigest()

199

exp_job_hash = hashlib.md5(exp_job_name).hexdigest()

Chris Masone

2012-05-21 16:08:39 -0700

[diff] [blame]

200

beeps

2013-04-10 20:56:09 -0700

[diff] [blame]

201

# In the experimental abort case both these clauses can evaluate

202

# to True.

Chris Masone

2012-05-21 16:08:39 -0700

[diff] [blame]

203

if std_job_hash in view['job_keyvals']:

204

job_name = view['job_keyvals'][std_job_hash]

beeps

2013-04-10 20:56:09 -0700

[diff] [blame]

205

if exp_job_hash in view['job_keyvals']:

Chris Masone

2012-05-21 16:08:39 -0700

[diff] [blame]

206

experimental = True

207

job_name = view['job_keyvals'][exp_job_hash]

208

beeps

2013-04-10 20:56:09 -0700

[diff] [blame]

209

# If the name being returned is the test name it needs to include the tag

210

return job_name, experimental, std_job_name if not test_name else test_name

Zdenek Behan

2012-04-06 17:20:01 +0200

[diff] [blame]

211

212

Craig Harrison

2012-08-23 16:48:49 -0700

[diff] [blame]

213

class LogLink(object):

214

"""

beeps

2013-04-26 19:12:46 -0700

[diff] [blame]

215

Link to a log. Since we create bugs for failures the

216

link text will include a link to the bug too.

Craig Harrison

2012-08-23 16:48:49 -0700

[diff] [blame]

217

218

@var anchor: the link text.

219

@var url: the link url.

220

"""

beeps

2013-04-26 19:12:46 -0700

[diff] [blame]

221

def __init__(self, anchor, job_string, bug_id=None):

Craig Harrison

2012-08-23 16:48:49 -0700

[diff] [blame]

222

"""

223

Initialize the LogLink by generating the log URL.

224

225

@param anchor: the link text.

226

@param job_string: the job whose logs we'd like to link to.

beeps

2013-04-26 19:12:46 -0700

[diff] [blame]

227

@param bug_id: the bug id, if one was filed for this failure.

Craig Harrison

2012-08-23 16:48:49 -0700

[diff] [blame]

228

"""

229

self.anchor = anchor

230

host = CONFIG.get_config_value('SERVER', 'hostname', type=str)

231

pattern = CONFIG.get_config_value('CROS', 'log_url_pattern', type=str)

232

self.url = pattern % (host, job_string)

beeps

2013-04-26 19:12:46 -0700

[diff] [blame]

self.bug_id = bug_id

beeps

2013-05-08 21:49:48 -0700

[diff] [blame^]

236

def GenerateBugLink(self):

beeps

2013-04-26 19:12:46 -0700

[diff] [blame]

237

"""

238

@return: A plain text link to the bug filed, if any.

239

"""

240

if self.bug_id:

241

crbug_url = CONFIG.get_config_value('BUG_REPORTING', 'tracker_url')

beeps

c4fb147

2013-05-08 21:49:48 -0700

[diff] [blame^]

242

return '%s%s'% (crbug_url, self.bug_id)

beeps

2013-04-26 19:12:46 -0700

[diff] [blame]

243

return ''

Craig Harrison

2012-08-23 16:48:49 -0700

[diff] [blame]

244

245

246

def GenerateBuildbotLink(self):

247

"""

248

Generate a link to the job's logs, for consumption by buildbot.

249

250

@return A link formatted for the buildbot log annotator.

251

"""

beeps

c4fb147

2013-05-08 21:49:48 -0700

[diff] [blame^]

252

bug_link = self.GenerateBugLink()

253

url = bug_link if bug_link else self.url

254

return "@@@STEP_LINK@%s@%s@@@"% (self.anchor.strip(), url)

Craig Harrison

2012-08-23 16:48:49 -0700

[diff] [blame]

255

256

Craig Harrison

2012-08-31 10:29:33 -0700

[diff] [blame]

257

def GenerateTextLink(self):

Craig Harrison

2012-08-23 16:48:49 -0700

[diff] [blame]

258

"""

Craig Harrison

2012-08-31 10:29:33 -0700

[diff] [blame]

259

Generate a link to the job's logs, for consumption by a human.

Craig Harrison

2012-08-23 16:48:49 -0700

[diff] [blame]

260

Craig Harrison

2012-08-31 10:29:33 -0700

[diff] [blame]

261

@return A link formatted for human readability.

Craig Harrison

2012-08-23 16:48:49 -0700

[diff] [blame]

262

"""

Craig Harrison

2012-08-31 10:29:33 -0700

[diff] [blame]

263

return "%s%s" % (self.anchor, self.url)

Craig Harrison

2012-08-23 16:48:49 -0700

[diff] [blame]

264

265

Chris Masone

2012-04-30 14:35:28 -0700

[diff] [blame]

266

class Timings(object):

267

"""Timings for important events during a suite.

268

269

All timestamps are datetime.datetime objects.

270

271

@var suite_start_time: the time the suite started.

272

@var reimage_start_time: the time we started reimaging devices.

273

@var reimage_end_time: the time we finished reimaging devices.

274

@var tests_start_time: the time the first test started running.

275

"""

beeps

2013-03-22 13:15:49 -0700

[diff] [blame]

276

277

# Recorded in create_suite_job as we're staging the components of a

278

# build on the devserver. Only the artifacts necessary to start

279

# installing images onto DUT's will be staged when we record

280

# payload_end_time, the remaining artifacts are downloaded after we kick

281

# off the reimaging job, at which point we record artifact_end_time.

Chris Masone

2012-05-01 16:52:31 -0700

[diff] [blame]

282

download_start_time = None

283

payload_end_time = None

284

artifact_end_time = None

beeps

2013-03-22 13:15:49 -0700

[diff] [blame]

285

286

# The test_start_time, but taken off the view that corresponds to the

287

# suite instead of an individual test.

Chris Masone

2012-04-30 14:35:28 -0700

[diff] [blame]

288

suite_start_time = None

beeps

2013-03-22 13:15:49 -0700

[diff] [blame]

289

290

# reimaging_times is a dictionary mapping a host name to it's start and

291

# end reimage timings. RecordTiming is invoked with test views, that

292

# correspond to tests in a suite; these tests might've run across

293

# different hosts. Each view that RecordTiming is invoked with creates a

294

# new entry in reimaging_times; When it's time to log reimage timings we

295

# iterate over this dict and create a reimaging_info string. This is a

296

# one time operation and only happens after all the TestViews in a suite

297

# are added to the reimaging_times dictionary.

298

# reimaging_times eg: {'hostname': (start_time, end_time)}

299

reimage_times = {}

300

301

# Earliest and Latest tests in the set of TestViews passed to us.

Chris Masone

2012-04-30 14:35:28 -0700

[diff] [blame]

302

tests_start_time = None

303

tests_end_time = None

304

305

beeps

2013-03-22 13:15:49 -0700

[diff] [blame]

306

def _GetDatetime(self, timing_string, timing_string_format):

307

"""

308

Formats the timing_string according to the timing_string_format.

309

310

@param timing_string: A datetime timing string.

311

@param timing_string_format: Format of the time in timing_string.

312

@return: A datetime object for the given timing string.

313

"""

314

try:

315

return datetime.strptime(timing_string, timing_string_format)

except TypeError:

return None

Chris Masone

2012-08-29 10:37:08 -0700

[diff] [blame]

320

def RecordTiming(self, view):

321

"""Given a test report view, extract and record pertinent time info.

Chris Masone

2012-04-30 14:35:28 -0700

[diff] [blame]

322

323

get_detailed_test_views() returns a list of entries that provide

324

info about the various parts of a suite run. This method can take

325

any one of these entries and look up timestamp info we might want

326

and record it.

327

Chris Masone

2012-09-06 16:00:07 -0700

[diff] [blame]

328

If timestamps are unavailable, datetime.datetime.min/max will be used.

329

Chris Masone

2012-08-29 10:37:08 -0700

[diff] [blame]

330

@param view: a view dict, as returned by get_detailed_test_views().

Chris Masone

2012-04-30 14:35:28 -0700

[diff] [blame]

331

"""

Chris Masone

2012-09-06 16:00:07 -0700

[diff] [blame]

332

start_candidate = datetime.min

333

end_candidate = datetime.max

334

if view['test_started_time']:

335

start_candidate = datetime.strptime(view['test_started_time'],

336

job_status.TIME_FMT)

337

if view['test_finished_time']:

338

end_candidate = datetime.strptime(view['test_finished_time'],

339

job_status.TIME_FMT)

340

Chris Masone

2012-08-29 10:37:08 -0700

[diff] [blame]

341

if job_status.view_is_for_suite_prep(view):

Chris Masone

2012-04-30 14:35:28 -0700

[diff] [blame]

342

self.suite_start_time = start_candidate

Chris Masone

2012-08-29 10:37:08 -0700

[diff] [blame]

343

elif view['test_name'].startswith(Reimager.JOB_NAME):

344

if '-' in view['test_name']:

345

hostname = view['test_name'].split('-', 1)[1]

Chris Masone

f76ebba

2012-06-28 14:56:22 -0700

[diff] [blame]

346

else:

347

hostname = ''

Chris Masone

2012-06-28 08:45:30 -0700

[diff] [blame]

348

self.reimage_times[hostname] = (start_candidate, end_candidate)

Chris Masone

2012-04-30 14:35:28 -0700

[diff] [blame]

349

else:

350

self._UpdateFirstTestStartTime(start_candidate)

351

self._UpdateLastTestEndTime(end_candidate)

Chris Masone

2012-08-29 10:37:08 -0700

[diff] [blame]

352

if 'job_keyvals' in view:

353

keyvals = view['job_keyvals']

beeps

2013-03-22 13:15:49 -0700

[diff] [blame]

354

self.download_start_time = self._GetDatetime(

355

keyvals.get(constants.DOWNLOAD_STARTED_TIME),

356

job_status.TIME_FMT)

357

358

self.payload_end_time = self._GetDatetime(

359

keyvals.get(constants.PAYLOAD_FINISHED_TIME),

360

job_status.TIME_FMT)

361

362

self.artifact_end_time = self._GetDatetime(

363

keyvals.get(constants.ARTIFACT_FINISHED_TIME),

364

job_status.TIME_FMT)

Chris Masone

44e4d6c

2012-08-15 14:25:53 -0700

[diff] [blame]

365

Chris Masone

2012-04-30 14:35:28 -0700

[diff] [blame]

366

367

def _UpdateFirstTestStartTime(self, candidate):

368

"""Update self.tests_start_time, iff candidate is an earlier time.

369

370

@param candidate: a datetime.datetime object.

371

"""

372

if not self.tests_start_time or candidate < self.tests_start_time:

373

self.tests_start_time = candidate

374

375

376

def _UpdateLastTestEndTime(self, candidate):

377

"""Update self.tests_end_time, iff candidate is a later time.

378

379

@param candidate: a datetime.datetime object.

380

"""

381

if not self.tests_end_time or candidate > self.tests_end_time:

382

self.tests_end_time = candidate

383

384

385

def __str__(self):

Chris Masone

2012-06-28 08:45:30 -0700

[diff] [blame]

386

reimaging_info = ''

387

for host, (start, end) in self.reimage_times.iteritems():

388

reimaging_info += ('Reimaging %s started at %s\n'

389

'Reimaging %s ended at %s\n' % (host, start,

390

host, end))

Chris Masone

2012-04-30 14:35:28 -0700

[diff] [blame]

391

return ('\n'

392

'Suite timings:\n'

Chris Masone

2012-05-01 16:52:31 -0700

[diff] [blame]

393

'Downloads started at %s\n'

394

'Payload downloads ended at %s\n'

Chris Masone

2012-04-30 14:35:28 -0700

[diff] [blame]

395

'Suite started at %s\n'

Chris Masone

2012-06-28 08:45:30 -0700

[diff] [blame]

396

'%s'

Chris Masone

2012-05-01 16:52:31 -0700

[diff] [blame]

397

'Artifact downloads ended (at latest) at %s\n'

Chris Masone

2012-04-30 14:35:28 -0700

[diff] [blame]

398

'Testing started at %s\n'

Chris Masone

2012-05-01 16:52:31 -0700

[diff] [blame]

399

'Testing ended at %s\n' % (self.download_start_time,

400

self.payload_end_time,

401

self.suite_start_time,

Chris Masone

2012-06-28 08:45:30 -0700

[diff] [blame]

402

reimaging_info,

Chris Masone

2012-05-01 16:52:31 -0700

[diff] [blame]

403

self.artifact_end_time,

Chris Masone

2012-04-30 14:35:28 -0700

[diff] [blame]

404

self.tests_start_time,

405

self.tests_end_time))

406

407

beeps

2013-03-22 13:15:49 -0700

[diff] [blame]

408

def _GetDataKeyForStatsd(self, suite, build, board):

409

"""

410

Constructs the key used for logging statsd timing data.

411

412

@param suite: scheduled suite that we want to record the results of.

413

@param build: The build string. This string should have a consistent

414

format eg: x86-mario-release/R26-3570.0.0. If the format of this

415

string changes such that we can't determine build_type or branch

416

we give up and use the parametes we're sure of instead (suite,

417

board). eg:

418

1. build = x86-alex-pgo-release/R26-3570.0.0

419

branch = 26

420

build_type = pgo-release

421

2. build = lumpy-paladin/R28-3993.0.0-rc5

422

branch = 28

423

build_type = paladin

424

@param board: The board that this suite ran on.

425

@return: The key used to log timing information in statsd.

426

"""

427

try:

428

_board, build_type, branch = base_event.ParseBuildName(build)[:3]

429

except base_event.ParseBuildNameException as e:

430

logging.error(str(e))

431

branch = 'Unknown'

432

build_type = 'Unknown'

433

else:

434

embedded_str = re.search(r'x86-\w+-(.*)', _board)

435

if embedded_str:

436

build_type = embedded_str.group(1) + '-' + build_type

data_key_dict = {

'board': board,

'branch': branch,

'build_type': build_type,

442

'suite': suite,

443

}

444

return ('run_suite.%(board)s.%(build_type)s.%(branch)s.%(suite)s'

% data_key_dict)

def SendResultsToStatsd(self, suite, build, board):

449

"""

450

Sends data to statsd.

451

452

1. Makes a data_key of the form: run_suite.$board.$branch.$suite

453

eg: stats/gauges/<hostname>/run_suite/<board>/<branch>/<suite>/

454

2. Computes timings for several start and end event pairs.

455

3. Computes timings for reimage events for all hosts.

456

4. Sends all timing values to statsd.

457

458

@param suite: scheduled suite that we want to record the results of.

459

@param build: the build that this suite ran on.

460

eg: 'lumpy-release/R26-3570.0.0'

461

@param board: the board that this suite ran on.

462

"""

463

if sys.version_info < (2, 7):

464

logging.error('Sending run_suite perf data to statsd requires'

465

'python 2.7 or greater.')

466

return

467

468

data_key = self._GetDataKeyForStatsd(suite, build, board)

469

470

# Since we don't want to try subtracting corrupted datetime values

471

# we catch TypeErrors in _GetDatetime and insert None instead. This

472

# means that even if, say, keyvals.get(constants.ARTIFACT_FINISHED_TIME)

473

# returns a corrupt value the member artifact_end_time is set to None.

474

if self.download_start_time:

475

if self.payload_end_time:

476

stats.Timer(data_key).send('payload_download_time',

477

(self.payload_end_time -

478

self.download_start_time).total_seconds())

479

480

if self.artifact_end_time:

481

stats.Timer(data_key).send('artifact_download_time',

482

(self.artifact_end_time -

483

self.download_start_time).total_seconds())

484

485

if self.tests_end_time:

486

if self.suite_start_time:

487

stats.Timer(data_key).send('suite_run_time',

488

(self.tests_end_time -

489

self.suite_start_time).total_seconds())

490

491

if self.tests_start_time:

492

stats.Timer(data_key).send('tests_run_time',

493

(self.tests_end_time -

494

self.tests_start_time).total_seconds())

495

496

# The reimage times are the start and end times of the reimage

497

# job (taken directly from the view), converted to datetime objects.

498

# If the reimage job never ran we won't enter the loop and if it didn't

499

# finish for some reason the start and end times are still initialized

500

# to valid datetimes.

501

for host, (start, end) in self.reimage_times.iteritems():

502

if start and end:

503

stats.Timer(data_key).send(host.replace('.', '_'),

504

(end - start).total_seconds())

505

506

beeps

2013-04-10 20:56:09 -0700

[diff] [blame]

507

def _full_test_name(job_id, view, build, suite):

508

"""

509

Generates the full test name for printing to logs and generating a link to

510

the results.

Craig Harrison

2012-08-31 10:29:33 -0700

[diff] [blame]

511

512

@param job_id: the job id.

513

@param view: the view for which we are generating the name.

beeps

2013-04-10 20:56:09 -0700

[diff] [blame]

514

@param build: the build for this invocation of run_suite.

515

@param suite: the suite for this invocation of run_suite.

Craig Harrison

2012-08-31 10:29:33 -0700

[diff] [blame]

516

@return The test name, possibly with a descriptive prefix appended.

517

"""

beeps

2013-04-10 20:56:09 -0700

[diff] [blame]

518

experimental, test_name = get_view_info(job_id, view, build, suite)[1:]

519

520

# If an experimental test is aborted get_view_info returns a name which

521

# includes the prefix.

522

prefix = constants.EXPERIMENTAL_PREFIX if (experimental and

523

not test_name.startswith(constants.EXPERIMENTAL_PREFIX)) else ''

524

return prefix + test_name

Craig Harrison

2012-08-31 10:29:33 -0700

[diff] [blame]

525

526

Chris Masone

2012-02-14 14:18:01 -0800

[diff] [blame]

527

def main():

Aviv Keshet

2013-03-21 16:38:31 -0700

[diff] [blame]

528

"""

529

Entry point for run_suite script.

530

"""

Chris Masone

2012-02-14 14:18:01 -0800

[diff] [blame]

531

parser, options, args = parse_options()

Chris Masone

3a85064

2012-07-11 11:11:18 -0700

[diff] [blame]

532

log_name = 'run_suite-default.log'

Chris Masone

2012-04-11 11:36:48 -0700

[diff] [blame]

533

if not options.mock_job_id:

Zdenek Behan

77290c3

2012-06-26 17:39:47 +0200

[diff] [blame]

534

if args:

535

print 'Unknown arguments: ' + str(args)

536

parser.print_help()

537

return

538

if not options.build:

539

print 'Need to specify which build to use'

540

parser.print_help()

541

return

542

if not options.board:

543

print 'Need to specify board'

parser.print_help()

return

if not options.name:

print 'Need to specify suite name'

Chris Masone

2012-04-11 11:36:48 -0700

[diff] [blame]

548

parser.print_help()

549

return

Chris Masone

3a85064

2012-07-11 11:11:18 -0700

[diff] [blame]

550

# convert build name from containing / to containing only _

551

log_name = 'run_suite-%s.log' % options.build.replace('/', '_')

552

log_dir = os.path.join(common.autotest_dir, 'logs')

553

if os.path.exists(log_dir):

554

log_name = os.path.join(log_dir, log_name)

Alex Miller

8e75d0d

2012-07-31 15:13:32 -0700

[diff] [blame]

555

if options.num is not None and options.num < 1:

556

print 'Number of machines must be more than 0, if specified.'

557

parser.print_help()

558

return

Alex Miller

2012-10-03 12:56:02 -0700

[diff] [blame]

559

if options.no_wait != 'True' and options.no_wait != 'False':

560

print 'Please specify "True" or "False" for --no_wait.'

561

parser.print_help()

562

return

563

if options.file_bugs != 'True' and options.file_bugs != 'False':

564

print 'Please specify "True" or "False" for --file_bugs.'

565

parser.print_help()

566

return

Scott Zawalski

2012-07-02 18:45:07 -0400

[diff] [blame]

567

setup_logging(logfile=log_name)

Chris Masone

dfa0beba

2012-03-19 11:41:47 -0700

[diff] [blame]

568

Simran Basi

2012-12-07 16:37:09 -0800

[diff] [blame]

569

try:

Dan Shi

a02181f

2013-01-29 14:03:32 -0800

[diff] [blame]

570

if not options.bypass_labstatus:

571

utils.check_lab_status(options.board)

Simran Basi

41bfae4

2013-01-09 10:50:47 -0800

[diff] [blame]

572

except (error.LabIsDownException, error.BoardIsDisabledException) as e:

573

logging.debug('Error Message: %s', e)

Simran Basi

2012-12-07 16:37:09 -0800

[diff] [blame]

574

print str(e)

Aviv Keshet

2013-03-21 16:38:31 -0700

[diff] [blame]

575

return RETURN_CODES.WARNING

Simran Basi

2012-12-07 16:37:09 -0800

[diff] [blame]

576

Chris Masone

8ac6671

2012-02-15 14:21:02 -0800

[diff] [blame]

577

afe = frontend_wrappers.RetryingAFE(timeout_min=options.timeout_min,

578

delay_sec=options.delay_sec)

Chris Masone

359c0fd

2012-03-13 15:18:59 -0700

[diff] [blame]

579

Alex Miller

2012-10-03 12:56:02 -0700

[diff] [blame]

580

wait = (options.no_wait == 'False')

581

file_bugs = (options.file_bugs == 'True')

Chris Masone

2012-04-11 11:36:48 -0700

[diff] [blame]

582

if options.mock_job_id:

583

job_id = int(options.mock_job_id)

584

else:

Alex Miller

8e75d0d

2012-07-31 15:13:32 -0700

[diff] [blame]

585

job_id = afe.run('create_suite_job', suite_name=options.name,

586

board=options.board, build=options.build,

Alex Miller

f43d0eb

2012-10-01 13:43:13 -0700

[diff] [blame]

587

check_hosts=wait, pool=options.pool, num=options.num,

588

file_bugs=file_bugs)

Chris Masone

8ac6671

2012-02-15 14:21:02 -0800

[diff] [blame]

589

TKO = frontend_wrappers.RetryingTKO(timeout_min=options.timeout_min,

590

delay_sec=options.delay_sec)

Scott Zawalski

2012-07-02 18:45:07 -0400

[diff] [blame]

591

logging.info('Started suite job: %s', job_id)

Simran Basi

2012-12-07 16:37:09 -0800

[diff] [blame]

592

Aviv Keshet

2013-03-21 16:38:31 -0700

[diff] [blame]

593

code = RETURN_CODES.OK

Chris Masone

359c0fd

2012-03-13 15:18:59 -0700

[diff] [blame]

594

while wait and True:

Chris Masone

2012-02-14 14:18:01 -0800

[diff] [blame]

595

if not afe.get_jobs(id=job_id, finished=True):

596

time.sleep(1)

597

continue

Scott Zawalski

2012-03-06 09:21:44 -0500

[diff] [blame]

598

views = TKO.run('get_detailed_test_views', afe_job_id=job_id)

beeps

2013-04-10 20:56:09 -0700

[diff] [blame]

599

width = max((len(_full_test_name(job_id, view, options.build,

600

options.name)) for view in views)) + 3

Scott Zawalski

2012-03-06 09:21:44 -0500

[diff] [blame]

601

Chris Masone

2012-08-29 10:37:08 -0700

[diff] [blame]

602

relevant_views = filter(job_status.view_is_relevant, views)

Scott Zawalski

2012-03-06 09:21:44 -0500

[diff] [blame]

603

if not relevant_views:

Zdenek Behan

2012-04-06 17:20:01 +0200

[diff] [blame]

604

# The main suite job most likely failed in SERVER_JOB.

605

relevant_views = views

Scott Zawalski

2012-03-06 09:21:44 -0500

[diff] [blame]

606

Chris Masone

2012-04-30 14:35:28 -0700

[diff] [blame]

607

timings = Timings()

Craig Harrison

2012-08-23 16:48:49 -0700

[diff] [blame]

608

web_links = []

609

buildbot_links = []

Chris Masone

2012-08-29 10:37:08 -0700

[diff] [blame]

610

for view in relevant_views:

611

timings.RecordTiming(view)

612

if job_status.view_is_for_suite_prep(view):

613

view['test_name'] = 'Suite prep'

Chris Masone

3a85064

2012-07-11 11:11:18 -0700

[diff] [blame]

614

beeps

2013-04-10 20:56:09 -0700

[diff] [blame]

615

job_name, experimental = get_view_info(job_id, view, options.build,

616

options.name)[:2]

617

test_view = _full_test_name(job_id, view, options.build,

618

options.name).ljust(width)

Chris Masone

2012-08-29 10:37:08 -0700

[diff] [blame]

619

logging.info("%s%s", test_view, get_pretty_status(view['status']))

beeps

2013-04-26 19:12:46 -0700

[diff] [blame]

620

621

# It's important that we:

622

# a. Use the test name in the view and not the name returned by

623

# full_test_name, as this was the name inserted after the test

624

# ran. Eg: for an aborted test full_test_name will return

625

# experimental_testname but the view and the bug_id keyval will

626

# contain /bulid/suite/experimental_testname.

627

# b. Apply the inverse function that was applied to record the bug

628

# id as a keyval in dynamic_suite, by replacing all '/' with '_'.

629

bug_id = view['job_keyvals'].get(

630

view['test_name'].replace('/', '_')+constants.BUG_KEYVAL)

631

632

link = LogLink(test_view, job_name, bug_id)

Craig Harrison

2012-08-23 16:48:49 -0700

[diff] [blame]

633

web_links.append(link)

Chris Masone

2012-08-29 10:37:08 -0700

[diff] [blame]

634

635

if view['status'] != 'GOOD':

636

logging.info("%s %s: %s", test_view, view['status'],

637

view['reason'])

Craig Harrison

2012-08-23 16:48:49 -0700

[diff] [blame]

638

# Don't show links on the buildbot waterfall for tests with

639

# GOOD status.

640

buildbot_links.append(link)

Chris Masone

8906ab1

2012-07-23 15:37:56 -0700

[diff] [blame]

641

if view['status'] == 'TEST_NA':

642

# Didn't run; nothing to do here!

643

continue

Aviv Keshet

2013-03-21 16:38:31 -0700

[diff] [blame]

644

if code == RETURN_CODES.ERROR:

Zdenek Behan

2012-04-06 17:20:01 +0200

[diff] [blame]

645

# Failed already, no need to worry further.

646

continue

beeps

2013-04-10 20:56:09 -0700

[diff] [blame]

647

648

# Any non experimental test that has a status other than WARN

649

# or GOOD will result in the tree closing. Experimental tests

650

# will not close the tree, even if they have been aborted.

Chris Masone

2012-08-29 10:37:08 -0700

[diff] [blame]

651

if (view['status'] == 'WARN' or

652

(is_fail_status(view['status']) and experimental)):

Zdenek Behan

2012-04-06 17:20:01 +0200

[diff] [blame]

653

# Failures that produce a warning. Either a test with WARN

654

# status or any experimental test failure.

Aviv Keshet

2013-03-21 16:38:31 -0700

[diff] [blame]

655

code = RETURN_CODES.WARNING

Chris Masone

5374c67

2012-03-05 15:11:39 -0800

[diff] [blame]

656

else:

Aviv Keshet

2013-03-21 16:38:31 -0700

[diff] [blame]

657

code = RETURN_CODES.ERROR

658

beeps

2013-03-22 13:15:49 -0700

[diff] [blame]

659

timings.SendResultsToStatsd(options.name, options.build, options.board)

Scott Zawalski

2012-07-02 18:45:07 -0400

[diff] [blame]

660

logging.info(timings)

Craig Harrison

2012-08-23 16:48:49 -0700

[diff] [blame]

661

logging.info('\n'

662

'Links to test logs:')

663

for link in web_links:

Craig Harrison

2012-08-31 10:29:33 -0700

[diff] [blame]

664

logging.info(link.GenerateTextLink())

Aviv Keshet

2013-03-21 16:38:31 -0700

[diff] [blame]

665

666

try:

667

returnmessage = RETURN_CODES.get_string(code)

668

except ValueError:

669

returnmessage = 'UNKNOWN'

670

logging.info('\n'

671

'Will return from run_suite with status: %s',

672

returnmessage)

673

Craig Harrison

2012-08-23 16:48:49 -0700

[diff] [blame]

674

logging.info('\n'

675

'Output below this line is for buildbot consumption:')

676

for link in buildbot_links:

677

logging.info(link.GenerateBuildbotLink())

Chris Masone

2012-02-14 14:18:01 -0800

[diff] [blame]

678

break

Chris Masone

d5939fe

2012-03-13 10:11:06 -0700

[diff] [blame]

679

else:

Scott Zawalski

2012-07-02 18:45:07 -0400

[diff] [blame]

680

logging.info('Created suite job: %r', job_id)

Craig Harrison

2012-08-23 16:48:49 -0700

[diff] [blame]

681

link = LogLink(options.name, '%s-%s' % (job_id, getpass.getuser()))

682

logging.info(link.GenerateBuildbotLink())

Scott Zawalski

2012-07-02 18:45:07 -0400

[diff] [blame]

683

logging.info('--no_wait specified; Exiting.')

Chris Masone