Blame - site_utils/run_suite.py - platform/external/autotest

2012-02-14 14:18:01 -0800

[diff] [blame]

#!/usr/bin/python

#

# Use of this source code is governed by a BSD-style license that can be

5

# found in the LICENSE file.

6

7

"""Tool for running suites of tests and waiting for completion.

8

9

The desired test suite will be scheduled with autotest, and then

10

this tool will block until the job is complete, printing a summary

11

at the end. Error conditions result in exceptions.

12

13

This is intended for use only with Chrome OS test suits that leverage the

14

dynamic suite infrastructure in server/cros/dynamic_suite.py.

15

"""

16

beeps

2013-03-22 13:15:49 -0700

[diff] [blame]

17

import getpass, hashlib, logging, optparse, os, re, sys, time

Chris Masone

2012-09-06 16:00:07 -0700

[diff] [blame]

18

from datetime import datetime

19

Chris Masone

2012-02-14 14:18:01 -0800

[diff] [blame]

20

import common

Chris Masone

2012-09-06 16:00:07 -0700

[diff] [blame]

21

Aviv Keshet

2013-03-21 16:38:31 -0700

[diff] [blame]

22

from autotest_lib.client.common_lib import global_config, error, utils, enum

Chris Masone

44e4d6c

2012-08-15 14:25:53 -0700

[diff] [blame]

23

from autotest_lib.server.cros.dynamic_suite import constants

Chris Masone

b493555

2012-08-14 12:05:54 -0700

[diff] [blame]

24

from autotest_lib.server.cros.dynamic_suite import frontend_wrappers

25

from autotest_lib.server.cros.dynamic_suite import job_status

Chris Masone

2012-08-29 10:37:08 -0700

[diff] [blame]

26

from autotest_lib.server.cros.dynamic_suite.reimager import Reimager

beeps

2013-03-22 13:15:49 -0700

[diff] [blame]

27

from autotest_lib.site_utils.suite_scheduler import base_event

28

from autotest_lib.site_utils.graphite import stats

Chris Masone

2012-02-14 14:18:01 -0800

[diff] [blame]

29

Chris Masone

1120cdf

2012-02-27 17:35:07 -0800

[diff] [blame]

30

CONFIG = global_config.global_config

31

Simran Basi

2012-12-07 16:37:09 -0800

[diff] [blame]

32

# Return code that will be sent back to autotest_rpc_server.py

Aviv Keshet

2013-03-21 16:38:31 -0700

[diff] [blame]

33

RETURN_CODES = enum.Enum('OK', 'ERROR', 'WARNING')

Simran Basi

2012-12-07 16:37:09 -0800

[diff] [blame]

34

Chris Masone

dfa0beba

2012-03-19 11:41:47 -0700

[diff] [blame]

35

Scott Zawalski

2012-07-02 18:45:07 -0400

[diff] [blame]

36

def setup_logging(logfile=None):

37

"""Setup basic logging with all logging info stripped.

38

39

Calls to logging will only show the message. No severity is logged.

40

41

@param logfile: If specified dump output to a file as well.

42

"""

43

screen_handler = logging.StreamHandler()

44

screen_handler.setFormatter(logging.Formatter('%(message)s'))

45

logging.getLogger().addHandler(screen_handler)

46

logging.getLogger().setLevel(logging.INFO)

47

if logfile:

48

file_handler = logging.FileHandler(logfile)

49

file_handler.setLevel(logging.DEBUG)

50

logging.getLogger().addHandler(file_handler)

Chris Masone

dfa0beba

2012-03-19 11:41:47 -0700

[diff] [blame]

51

52

Chris Masone

2012-02-14 14:18:01 -0800

[diff] [blame]

53

def parse_options():

Aviv Keshet

2013-03-21 16:38:31 -0700

[diff] [blame]

54

#pylint: disable-msg=C0111

Zdenek Behan

77290c3

2012-06-26 17:39:47 +0200

[diff] [blame]

55

usage = "usage: %prog [options]"

Chris Masone

2012-02-14 14:18:01 -0800

[diff] [blame]

56

parser = optparse.OptionParser(usage=usage)

57

parser.add_option("-b", "--board", dest="board")

58

parser.add_option("-i", "--build", dest="build")

Chris Masone

359c0fd

2012-03-13 15:18:59 -0700

[diff] [blame]

59

# This should just be a boolean flag, but the autotest "proxy" code

60

# can't handle flags that don't take arguments.

Alex Miller

2012-10-03 12:56:02 -0700

[diff] [blame]

61

parser.add_option("-n", "--no_wait", dest="no_wait", default="False",

62

help='Must pass "True" or "False" if used.')

Scott Zawalski

6565017

2012-02-16 11:48:26 -0500

[diff] [blame]

63

parser.add_option("-p", "--pool", dest="pool", default=None)

Chris Masone

2012-02-14 14:18:01 -0800

[diff] [blame]

64

parser.add_option("-s", "--suite_name", dest="name")

Chris Masone

8ac6671

2012-02-15 14:21:02 -0800

[diff] [blame]

65

parser.add_option("-t", "--timeout_min", dest="timeout_min", default=30)

66

parser.add_option("-d", "--delay_sec", dest="delay_sec", default=10)

Chris Masone

2012-04-11 11:36:48 -0700

[diff] [blame]

67

parser.add_option("-m", "--mock_job_id", dest="mock_job_id",

68

help="Skips running suite; creates report for given ID.")

Alex Miller

05a2fff

2012-09-10 10:14:34 -0700

[diff] [blame]

69

parser.add_option("-u", "--num", dest="num", type="int", default=None,

Chris Masone

8906ab1

2012-07-23 15:37:56 -0700

[diff] [blame]

70

help="Run on at most NUM machines.")

Alex Miller

f43d0eb

2012-10-01 13:43:13 -0700

[diff] [blame]

71

# Same boolean flag issue applies here.

Alex Miller

2012-10-03 12:56:02 -0700

[diff] [blame]

72

parser.add_option("-f", "--file_bugs", dest="file_bugs", default='False',

73

help='File bugs on test failures. Must pass "True" or '

74

'"False" if used.')

Dan Shi

a02181f

2013-01-29 14:03:32 -0800

[diff] [blame]

75

parser.add_option("-l", "--bypass_labstatus", dest="bypass_labstatus",

76

action="store_true", help='Bypass lab status check.')

Alex Miller

f43d0eb

2012-10-01 13:43:13 -0700

[diff] [blame]

77

Chris Masone

2012-02-14 14:18:01 -0800

[diff] [blame]

78

options, args = parser.parse_args()

79

return parser, options, args

80

81

82

def get_pretty_status(status):

Aviv Keshet

2013-03-21 16:38:31 -0700

[diff] [blame]

83

"""

84

Converts a status string into a pretty-for-printing string.

85

86

@param status: Status to convert.

87

88

@return: Returns pretty string.

GOOD -> [ PASSED ]

TEST_NA -> [ INFO ]

other -> [ FAILED ]

"""

Chris Masone

2012-02-14 14:18:01 -0800

[diff] [blame]

93

if status == 'GOOD':

94

return '[ PASSED ]'

Chris Masone

8906ab1

2012-07-23 15:37:56 -0700

[diff] [blame]

95

elif status == 'TEST_NA':

96

return '[ INFO ]'

Chris Masone

2012-02-14 14:18:01 -0800

[diff] [blame]

97

return '[ FAILED ]'

98

Zdenek Behan

2012-04-06 17:20:01 +0200

[diff] [blame]

99

def is_fail_status(status):

Aviv Keshet

2013-03-21 16:38:31 -0700

[diff] [blame]

100

"""

101

Check if the given status corresponds to a failure.

102

103

@param status: The status to check. (string)

104

105

@return: True if status is FAIL or ERROR. False otherwise.

106

"""

Zdenek Behan

2012-04-06 17:20:01 +0200

[diff] [blame]

107

# All the statuses tests can have when they fail.

beeps

2013-04-10 20:56:09 -0700

[diff] [blame]

108

if status in ['FAIL', 'ERROR', 'ABORT']:

Zdenek Behan

2012-04-06 17:20:01 +0200

[diff] [blame]

return True

return False

Chris Masone

2012-02-14 14:18:01 -0800

[diff] [blame]

112

beeps

2013-04-10 20:56:09 -0700

[diff] [blame]

113

def get_view_info(suite_job_id, view, build, suite):

Zdenek Behan

2012-04-06 17:20:01 +0200

[diff] [blame]

114

"""

115

Parse a view for the slave job name and job_id.

116

117

@param suite_job_id: The job id of our master suite job.

118

@param view: Test result view.

beeps

2013-04-10 20:56:09 -0700

[diff] [blame]

119

@param build: build passed in via the -b option.

120

eg: lumpy-release/R28-3947.0.0

121

@param suite: suite passed in via the -s option.

122

eg: dummy

123

@return A tuple job_name, experimental, name of the slave test run

124

described by view. eg:

125

experimental_dummy_Pass fails: (1130-owner, True, dummy_Pass)

126

experimental_dummy_Pass aborts: (1130-owner, True,

127

experimental_dummy_Pass)

128

dummy_Fail: (1130-owner, False, dummy_Fail.Error)

Zdenek Behan

2012-04-06 17:20:01 +0200

[diff] [blame]

129

"""

130

# By default, we are the main suite job since there is no

131

# keyval entry for our job_name.

132

job_name = '%s-%s' % (suite_job_id, getpass.getuser())

133

experimental = False

beeps

2013-04-10 20:56:09 -0700

[diff] [blame]

134

test_name = ''

Zdenek Behan

2012-04-06 17:20:01 +0200

[diff] [blame]

135

if 'job_keyvals' in view:

beeps

2013-04-10 20:56:09 -0700

[diff] [blame]

136

# For a test invocation like:

137

# NAME = "dummy_Fail"

138

# job.run_test('dummy_Fail', tag='Error', to_throw='TestError')

139

# we will:

140

# Record a keyval of the jobs test_name field: dummy_Fail

141

# On success, yield a tko status with the tagged name:

142

# dummy_Fail.Error

143

# On abort, yield a status (not from tko) with the job name:

144

# /build/suite/dummy_Fail.Error

145

# Note the last 2 options include the tag. The tag is seperated

146

# from the rest of the name with a '.'. The tag or test name can

147

# also include a /, and we must isolate the tag before we compare it

148

# to the hashed keyval. Based on this we have the following cases:

149

# 1. Regular test failure with or without a tag '.': std_job_name is

150

# set to the view test_name, after removing the tag.

151

# 2. Regular test Aborts: we know that dynamic_suite inserted a name

152

# like build/suite/test.name (eg:

153

# lumpy-release/R28-3947.0.0/dummy/dummy_Fail.Error), so we

154

# intersect the build/suite/ string we already have with the

155

# test_name in the view. The name of the aborted test is

156

# instrumental in generating the job_name, which is used in

157

# creating a link to the logs.

158

# 3. Experimental tests, Aborts and Failures: The test view

159

# corresponding to the afe_job_id of the suite job contains

160

# stubs for each test in this suite. The names of these jobs

161

# will contain an experimental prefix if they were aborted;

162

# If they failed the same names will not contain an experimental

163

# prefix but we would have hashed the name with a prefix. Eg:

164

# Test name = experimental_pass

165

# keyval contains: hash(experimental_pass)

166

# Fail/Pass view['test_name'] = pass

167

# Abort view['test_name'] = board/build/experimental_pass

168

# So we need to add the experimental prefix only if the test was

169

# aborted. Everything else is the same as [2].

170

# 4. Experimental server job failures: eg verify passes, something on

171

# the DUT crashes, the experimental server job fails to ssh in. We

172

# need to manually set the experimental flag in this case because the

173

# server job name isn't recorded in the keyvals. For a normal suite

174

# the views will contain: SERVER_JOB, try_new_image, test_name. i.e

175

# the test server jobs should be handled transparently and only the

176

# suite server job should appear in the view. If a server job fails

177

# (for an experimental test or otherwise) we insert the server job

178

# entry into the tko database instead. Put more generally we insert

179

# the last stage we knew about into the db record associated with

180

# that suites afe_job_id. This could lead to a view containing:

181

# SERVER_JOB, try_new_image,

182

# lumpy-release/R28-4008.0.0/bvt/experimental_pass_SERVER_JOB.

Chris Masone

2012-08-29 10:37:08 -0700

[diff] [blame]

183

if view['test_name'].startswith(Reimager.JOB_NAME):

184

std_job_name = Reimager.JOB_NAME

Chris Masone

2012-08-29 10:37:08 -0700

[diff] [blame]

185

else:

beeps

2013-04-10 20:56:09 -0700

[diff] [blame]

186

# Neither of these operations will stomp on a pristine string.

187

test_name = view['test_name'].replace('%s/%s/'% (build, suite), '')

188

std_job_name = test_name.split('.')[0]

Chris Masone

2012-05-21 16:08:39 -0700

[diff] [blame]

189

beeps

2013-04-10 20:56:09 -0700

[diff] [blame]

190

if (job_status.view_is_for_infrastructure_fail(view) and

191

std_job_name.startswith(constants.EXPERIMENTAL_PREFIX)):

192

experimental = True

193

194

if std_job_name.startswith(constants.EXPERIMENTAL_PREFIX):

195

exp_job_name = std_job_name

196

else:

197

exp_job_name = constants.EXPERIMENTAL_PREFIX + std_job_name

Chris Masone

2012-08-29 10:37:08 -0700

[diff] [blame]

198

std_job_hash = hashlib.md5(std_job_name).hexdigest()

199

exp_job_hash = hashlib.md5(exp_job_name).hexdigest()

Chris Masone

2012-05-21 16:08:39 -0700

[diff] [blame]

200

beeps

2013-04-10 20:56:09 -0700

[diff] [blame]

201

# In the experimental abort case both these clauses can evaluate

202

# to True.

Chris Masone

2012-05-21 16:08:39 -0700

[diff] [blame]

203

if std_job_hash in view['job_keyvals']:

204

job_name = view['job_keyvals'][std_job_hash]

beeps

2013-04-10 20:56:09 -0700

[diff] [blame]

205

if exp_job_hash in view['job_keyvals']:

Chris Masone

2012-05-21 16:08:39 -0700

[diff] [blame]

206

experimental = True

207

job_name = view['job_keyvals'][exp_job_hash]

208

beeps

2013-04-10 20:56:09 -0700

[diff] [blame]

209

# If the name being returned is the test name it needs to include the tag

210

return job_name, experimental, std_job_name if not test_name else test_name

Zdenek Behan

2012-04-06 17:20:01 +0200

[diff] [blame]

211

212

Craig Harrison

2012-08-23 16:48:49 -0700

[diff] [blame]

213

class LogLink(object):

214

"""

beeps

2013-04-26 19:12:46 -0700

[diff] [blame]

215

Link to a log. Since we create bugs for failures the

216

link text will include a link to the bug too.

Craig Harrison

2012-08-23 16:48:49 -0700

[diff] [blame]

217

218

@var anchor: the link text.

219

@var url: the link url.

220

"""

beeps

2013-04-26 19:12:46 -0700

[diff] [blame]

221

def __init__(self, anchor, job_string, bug_id=None):

Craig Harrison

2012-08-23 16:48:49 -0700

[diff] [blame]

222

"""

223

Initialize the LogLink by generating the log URL.

224

225

@param anchor: the link text.

226

@param job_string: the job whose logs we'd like to link to.

beeps

2013-04-26 19:12:46 -0700

[diff] [blame]

227

@param bug_id: the bug id, if one was filed for this failure.

Craig Harrison

2012-08-23 16:48:49 -0700

[diff] [blame]

228

"""

229

self.anchor = anchor

230

host = CONFIG.get_config_value('SERVER', 'hostname', type=str)

231

pattern = CONFIG.get_config_value('CROS', 'log_url_pattern', type=str)

232

self.url = pattern % (host, job_string)

beeps

2013-04-26 19:12:46 -0700

[diff] [blame]

self.bug_id = bug_id

def GenerateBugString(self):

237

"""

238

@return: A plain text link to the bug filed, if any.

239

"""

240

if self.bug_id:

241

crbug_url = CONFIG.get_config_value('BUG_REPORTING', 'tracker_url')

242

return ' (%s/%s filed)'% (crbug_url, self.bug_id)

243

return ''

Craig Harrison

2012-08-23 16:48:49 -0700

[diff] [blame]

244

245

246

def GenerateBuildbotLink(self):

247

"""

248

Generate a link to the job's logs, for consumption by buildbot.

249

250

@return A link formatted for the buildbot log annotator.

251

"""

beeps

2013-04-26 19:12:46 -0700

[diff] [blame]

252

anchor_string = self.anchor.strip() + self.GenerateBugString()

253

return "@@@STEP_LINK@%s@%s@@@"% (anchor_string, self.url)

Craig Harrison

2012-08-23 16:48:49 -0700

[diff] [blame]

254

255

Craig Harrison

2012-08-31 10:29:33 -0700

[diff] [blame]

256

def GenerateTextLink(self):

Craig Harrison

2012-08-23 16:48:49 -0700

[diff] [blame]

257

"""

Craig Harrison

2012-08-31 10:29:33 -0700

[diff] [blame]

258

Generate a link to the job's logs, for consumption by a human.

Craig Harrison

2012-08-23 16:48:49 -0700

[diff] [blame]

259

Craig Harrison

2012-08-31 10:29:33 -0700

[diff] [blame]

260

@return A link formatted for human readability.

Craig Harrison

2012-08-23 16:48:49 -0700

[diff] [blame]

261

"""

Craig Harrison

2012-08-31 10:29:33 -0700

[diff] [blame]

262

return "%s%s" % (self.anchor, self.url)

Craig Harrison

2012-08-23 16:48:49 -0700

[diff] [blame]

263

264

Chris Masone

2012-04-30 14:35:28 -0700

[diff] [blame]

265

class Timings(object):

266

"""Timings for important events during a suite.

267

268

All timestamps are datetime.datetime objects.

269

270

@var suite_start_time: the time the suite started.

271

@var reimage_start_time: the time we started reimaging devices.

272

@var reimage_end_time: the time we finished reimaging devices.

273

@var tests_start_time: the time the first test started running.

274

"""

beeps

2013-03-22 13:15:49 -0700

[diff] [blame]

275

276

# Recorded in create_suite_job as we're staging the components of a

277

# build on the devserver. Only the artifacts necessary to start

278

# installing images onto DUT's will be staged when we record

279

# payload_end_time, the remaining artifacts are downloaded after we kick

280

# off the reimaging job, at which point we record artifact_end_time.

Chris Masone

2012-05-01 16:52:31 -0700

[diff] [blame]

281

download_start_time = None

282

payload_end_time = None

283

artifact_end_time = None

beeps

2013-03-22 13:15:49 -0700

[diff] [blame]

284

285

# The test_start_time, but taken off the view that corresponds to the

286

# suite instead of an individual test.

Chris Masone

2012-04-30 14:35:28 -0700

[diff] [blame]

287

suite_start_time = None

beeps

2013-03-22 13:15:49 -0700

[diff] [blame]

288

289

# reimaging_times is a dictionary mapping a host name to it's start and

290

# end reimage timings. RecordTiming is invoked with test views, that

291

# correspond to tests in a suite; these tests might've run across

292

# different hosts. Each view that RecordTiming is invoked with creates a

293

# new entry in reimaging_times; When it's time to log reimage timings we

294

# iterate over this dict and create a reimaging_info string. This is a

295

# one time operation and only happens after all the TestViews in a suite

296

# are added to the reimaging_times dictionary.

297

# reimaging_times eg: {'hostname': (start_time, end_time)}

298

reimage_times = {}

299

300

# Earliest and Latest tests in the set of TestViews passed to us.

Chris Masone

2012-04-30 14:35:28 -0700

[diff] [blame]

301

tests_start_time = None

302

tests_end_time = None

303

304

beeps

2013-03-22 13:15:49 -0700

[diff] [blame]

305

def _GetDatetime(self, timing_string, timing_string_format):

306

"""

307

Formats the timing_string according to the timing_string_format.

308

309

@param timing_string: A datetime timing string.

310

@param timing_string_format: Format of the time in timing_string.

311

@return: A datetime object for the given timing string.

312

"""

313

try:

314

return datetime.strptime(timing_string, timing_string_format)

except TypeError:

return None

Chris Masone

2012-08-29 10:37:08 -0700

[diff] [blame]

319

def RecordTiming(self, view):

320

"""Given a test report view, extract and record pertinent time info.

Chris Masone

2012-04-30 14:35:28 -0700

[diff] [blame]

321

322

get_detailed_test_views() returns a list of entries that provide

323

info about the various parts of a suite run. This method can take

324

any one of these entries and look up timestamp info we might want

325

and record it.

326

Chris Masone

2012-09-06 16:00:07 -0700

[diff] [blame]

327

If timestamps are unavailable, datetime.datetime.min/max will be used.

328

Chris Masone

2012-08-29 10:37:08 -0700

[diff] [blame]

329

@param view: a view dict, as returned by get_detailed_test_views().

Chris Masone

2012-04-30 14:35:28 -0700

[diff] [blame]

330

"""

Chris Masone

2012-09-06 16:00:07 -0700

[diff] [blame]

331

start_candidate = datetime.min

332

end_candidate = datetime.max

333

if view['test_started_time']:

334

start_candidate = datetime.strptime(view['test_started_time'],

335

job_status.TIME_FMT)

336

if view['test_finished_time']:

337

end_candidate = datetime.strptime(view['test_finished_time'],

338

job_status.TIME_FMT)

339

Chris Masone

2012-08-29 10:37:08 -0700

[diff] [blame]

340

if job_status.view_is_for_suite_prep(view):

Chris Masone

2012-04-30 14:35:28 -0700

[diff] [blame]

341

self.suite_start_time = start_candidate

Chris Masone

2012-08-29 10:37:08 -0700

[diff] [blame]

342

elif view['test_name'].startswith(Reimager.JOB_NAME):

343

if '-' in view['test_name']:

344

hostname = view['test_name'].split('-', 1)[1]

Chris Masone

f76ebba

2012-06-28 14:56:22 -0700

[diff] [blame]

345

else:

346

hostname = ''

Chris Masone

2012-06-28 08:45:30 -0700

[diff] [blame]

347

self.reimage_times[hostname] = (start_candidate, end_candidate)

Chris Masone

2012-04-30 14:35:28 -0700

[diff] [blame]

348

else:

349

self._UpdateFirstTestStartTime(start_candidate)

350

self._UpdateLastTestEndTime(end_candidate)

Chris Masone

2012-08-29 10:37:08 -0700

[diff] [blame]

351

if 'job_keyvals' in view:

352

keyvals = view['job_keyvals']

beeps

2013-03-22 13:15:49 -0700

[diff] [blame]

353

self.download_start_time = self._GetDatetime(

354

keyvals.get(constants.DOWNLOAD_STARTED_TIME),

355

job_status.TIME_FMT)

356

357

self.payload_end_time = self._GetDatetime(

358

keyvals.get(constants.PAYLOAD_FINISHED_TIME),

359

job_status.TIME_FMT)

360

361

self.artifact_end_time = self._GetDatetime(

362

keyvals.get(constants.ARTIFACT_FINISHED_TIME),

363

job_status.TIME_FMT)

Chris Masone

44e4d6c

2012-08-15 14:25:53 -0700

[diff] [blame]

364

Chris Masone

2012-04-30 14:35:28 -0700

[diff] [blame]

365

366

def _UpdateFirstTestStartTime(self, candidate):

367

"""Update self.tests_start_time, iff candidate is an earlier time.

368

369

@param candidate: a datetime.datetime object.

370

"""

371

if not self.tests_start_time or candidate < self.tests_start_time:

372

self.tests_start_time = candidate

373

374

375

def _UpdateLastTestEndTime(self, candidate):

376

"""Update self.tests_end_time, iff candidate is a later time.

377

378

@param candidate: a datetime.datetime object.

379

"""

380

if not self.tests_end_time or candidate > self.tests_end_time:

381

self.tests_end_time = candidate

382

383

384

def __str__(self):

Chris Masone

2012-06-28 08:45:30 -0700

[diff] [blame]

385

reimaging_info = ''

386

for host, (start, end) in self.reimage_times.iteritems():

387

reimaging_info += ('Reimaging %s started at %s\n'

388

'Reimaging %s ended at %s\n' % (host, start,

389

host, end))

Chris Masone

2012-04-30 14:35:28 -0700

[diff] [blame]

390

return ('\n'

391

'Suite timings:\n'

Chris Masone

2012-05-01 16:52:31 -0700

[diff] [blame]

392

'Downloads started at %s\n'

393

'Payload downloads ended at %s\n'

Chris Masone

2012-04-30 14:35:28 -0700

[diff] [blame]

394

'Suite started at %s\n'

Chris Masone

2012-06-28 08:45:30 -0700

[diff] [blame]

395

'%s'

Chris Masone

2012-05-01 16:52:31 -0700

[diff] [blame]

396

'Artifact downloads ended (at latest) at %s\n'

Chris Masone

2012-04-30 14:35:28 -0700

[diff] [blame]

397

'Testing started at %s\n'

Chris Masone

2012-05-01 16:52:31 -0700

[diff] [blame]

398

'Testing ended at %s\n' % (self.download_start_time,

399

self.payload_end_time,

400

self.suite_start_time,

Chris Masone

2012-06-28 08:45:30 -0700

[diff] [blame]

401

reimaging_info,

Chris Masone

2012-05-01 16:52:31 -0700

[diff] [blame]

402

self.artifact_end_time,

Chris Masone

2012-04-30 14:35:28 -0700

[diff] [blame]

403

self.tests_start_time,

404

self.tests_end_time))

405

406

beeps

2013-03-22 13:15:49 -0700

[diff] [blame]

407

def _GetDataKeyForStatsd(self, suite, build, board):

408

"""

409

Constructs the key used for logging statsd timing data.

410

411

@param suite: scheduled suite that we want to record the results of.

412

@param build: The build string. This string should have a consistent

413

format eg: x86-mario-release/R26-3570.0.0. If the format of this

414

string changes such that we can't determine build_type or branch

415

we give up and use the parametes we're sure of instead (suite,

416

board). eg:

417

1. build = x86-alex-pgo-release/R26-3570.0.0

418

branch = 26

419

build_type = pgo-release

420

2. build = lumpy-paladin/R28-3993.0.0-rc5

421

branch = 28

422

build_type = paladin

423

@param board: The board that this suite ran on.

424

@return: The key used to log timing information in statsd.

425

"""

426

try:

427

_board, build_type, branch = base_event.ParseBuildName(build)[:3]

428

except base_event.ParseBuildNameException as e:

429

logging.error(str(e))

430

branch = 'Unknown'

431

build_type = 'Unknown'

432

else:

433

embedded_str = re.search(r'x86-\w+-(.*)', _board)

434

if embedded_str:

435

build_type = embedded_str.group(1) + '-' + build_type

data_key_dict = {

'board': board,

'branch': branch,

'build_type': build_type,

441

'suite': suite,

442

}

443

return ('run_suite.%(board)s.%(build_type)s.%(branch)s.%(suite)s'

% data_key_dict)

def SendResultsToStatsd(self, suite, build, board):

448

"""

449

Sends data to statsd.

450

451

1. Makes a data_key of the form: run_suite.$board.$branch.$suite

452

eg: stats/gauges/<hostname>/run_suite/<board>/<branch>/<suite>/

453

2. Computes timings for several start and end event pairs.

454

3. Computes timings for reimage events for all hosts.

455

4. Sends all timing values to statsd.

456

457

@param suite: scheduled suite that we want to record the results of.

458

@param build: the build that this suite ran on.

459

eg: 'lumpy-release/R26-3570.0.0'

460

@param board: the board that this suite ran on.

461

"""

462

if sys.version_info < (2, 7):

463

logging.error('Sending run_suite perf data to statsd requires'

464

'python 2.7 or greater.')

465

return

466

467

data_key = self._GetDataKeyForStatsd(suite, build, board)

468

469

# Since we don't want to try subtracting corrupted datetime values

470

# we catch TypeErrors in _GetDatetime and insert None instead. This

471

# means that even if, say, keyvals.get(constants.ARTIFACT_FINISHED_TIME)

472

# returns a corrupt value the member artifact_end_time is set to None.

473

if self.download_start_time:

474

if self.payload_end_time:

475

stats.Timer(data_key).send('payload_download_time',

476

(self.payload_end_time -

477

self.download_start_time).total_seconds())

478

479

if self.artifact_end_time:

480

stats.Timer(data_key).send('artifact_download_time',

481

(self.artifact_end_time -

482

self.download_start_time).total_seconds())

483

484

if self.tests_end_time:

485

if self.suite_start_time:

486

stats.Timer(data_key).send('suite_run_time',

487

(self.tests_end_time -

488

self.suite_start_time).total_seconds())

489

490

if self.tests_start_time:

491

stats.Timer(data_key).send('tests_run_time',

492

(self.tests_end_time -

493

self.tests_start_time).total_seconds())

494

495

# The reimage times are the start and end times of the reimage

496

# job (taken directly from the view), converted to datetime objects.

497

# If the reimage job never ran we won't enter the loop and if it didn't

498

# finish for some reason the start and end times are still initialized

499

# to valid datetimes.

500

for host, (start, end) in self.reimage_times.iteritems():

501

if start and end:

502

stats.Timer(data_key).send(host.replace('.', '_'),

503

(end - start).total_seconds())

504

505

beeps

2013-04-10 20:56:09 -0700

[diff] [blame]

506

def _full_test_name(job_id, view, build, suite):

507

"""

508

Generates the full test name for printing to logs and generating a link to

509

the results.

Craig Harrison

2012-08-31 10:29:33 -0700

[diff] [blame]

510

511

@param job_id: the job id.

512

@param view: the view for which we are generating the name.

beeps

2013-04-10 20:56:09 -0700

[diff] [blame]

513

@param build: the build for this invocation of run_suite.

514

@param suite: the suite for this invocation of run_suite.

Craig Harrison

2012-08-31 10:29:33 -0700

[diff] [blame]

515

@return The test name, possibly with a descriptive prefix appended.

516

"""

beeps

2013-04-10 20:56:09 -0700

[diff] [blame]

517

experimental, test_name = get_view_info(job_id, view, build, suite)[1:]

518

519

# If an experimental test is aborted get_view_info returns a name which

520

# includes the prefix.

521

prefix = constants.EXPERIMENTAL_PREFIX if (experimental and

522

not test_name.startswith(constants.EXPERIMENTAL_PREFIX)) else ''

523

return prefix + test_name

Craig Harrison

2012-08-31 10:29:33 -0700

[diff] [blame]

524

525

Chris Masone

2012-02-14 14:18:01 -0800

[diff] [blame]

526

def main():

Aviv Keshet

2013-03-21 16:38:31 -0700

[diff] [blame]

527

"""

528

Entry point for run_suite script.

529

"""

Chris Masone

2012-02-14 14:18:01 -0800

[diff] [blame]

530

parser, options, args = parse_options()

Chris Masone

3a85064

2012-07-11 11:11:18 -0700

[diff] [blame]

531

log_name = 'run_suite-default.log'

Chris Masone

2012-04-11 11:36:48 -0700

[diff] [blame]

532

if not options.mock_job_id:

Zdenek Behan

77290c3

2012-06-26 17:39:47 +0200

[diff] [blame]

533

if args:

534

print 'Unknown arguments: ' + str(args)

535

parser.print_help()

536

return

537

if not options.build:

538

print 'Need to specify which build to use'

539

parser.print_help()

540

return

541

if not options.board:

542

print 'Need to specify board'

parser.print_help()

return

if not options.name:

print 'Need to specify suite name'

Chris Masone

2012-04-11 11:36:48 -0700

[diff] [blame]

547

parser.print_help()

548

return

Chris Masone

3a85064

2012-07-11 11:11:18 -0700

[diff] [blame]

549

# convert build name from containing / to containing only _

550

log_name = 'run_suite-%s.log' % options.build.replace('/', '_')

551

log_dir = os.path.join(common.autotest_dir, 'logs')

552

if os.path.exists(log_dir):

553

log_name = os.path.join(log_dir, log_name)

Alex Miller

8e75d0d

2012-07-31 15:13:32 -0700

[diff] [blame]

554

if options.num is not None and options.num < 1:

555

print 'Number of machines must be more than 0, if specified.'

556

parser.print_help()

557

return

Alex Miller

2012-10-03 12:56:02 -0700

[diff] [blame]

558

if options.no_wait != 'True' and options.no_wait != 'False':

559

print 'Please specify "True" or "False" for --no_wait.'

560

parser.print_help()

561

return

562

if options.file_bugs != 'True' and options.file_bugs != 'False':

563

print 'Please specify "True" or "False" for --file_bugs.'

564

parser.print_help()

565

return

Scott Zawalski

2012-07-02 18:45:07 -0400

[diff] [blame]

566

setup_logging(logfile=log_name)

Chris Masone

dfa0beba

2012-03-19 11:41:47 -0700

[diff] [blame]

567

Simran Basi

2012-12-07 16:37:09 -0800

[diff] [blame]

568

try:

Dan Shi

a02181f

2013-01-29 14:03:32 -0800

[diff] [blame]

569

if not options.bypass_labstatus:

570

utils.check_lab_status(options.board)

Simran Basi

41bfae4

2013-01-09 10:50:47 -0800

[diff] [blame]

571

except (error.LabIsDownException, error.BoardIsDisabledException) as e:

572

logging.debug('Error Message: %s', e)

Simran Basi

2012-12-07 16:37:09 -0800

[diff] [blame]

573

print str(e)

Aviv Keshet

2013-03-21 16:38:31 -0700

[diff] [blame]

574

return RETURN_CODES.WARNING

Simran Basi

2012-12-07 16:37:09 -0800

[diff] [blame]

575

Chris Masone

8ac6671

2012-02-15 14:21:02 -0800

[diff] [blame]

576

afe = frontend_wrappers.RetryingAFE(timeout_min=options.timeout_min,

577

delay_sec=options.delay_sec)

Chris Masone

359c0fd

2012-03-13 15:18:59 -0700

[diff] [blame]

578

Alex Miller

2012-10-03 12:56:02 -0700

[diff] [blame]

579

wait = (options.no_wait == 'False')

580

file_bugs = (options.file_bugs == 'True')

Chris Masone

2012-04-11 11:36:48 -0700

[diff] [blame]

581

if options.mock_job_id:

582

job_id = int(options.mock_job_id)

583

else:

Alex Miller

8e75d0d

2012-07-31 15:13:32 -0700

[diff] [blame]

584

job_id = afe.run('create_suite_job', suite_name=options.name,

585

board=options.board, build=options.build,

Alex Miller

f43d0eb

2012-10-01 13:43:13 -0700

[diff] [blame]

586

check_hosts=wait, pool=options.pool, num=options.num,

587

file_bugs=file_bugs)

Chris Masone

8ac6671

2012-02-15 14:21:02 -0800

[diff] [blame]

588

TKO = frontend_wrappers.RetryingTKO(timeout_min=options.timeout_min,

589

delay_sec=options.delay_sec)

Scott Zawalski

2012-07-02 18:45:07 -0400

[diff] [blame]

590

logging.info('Started suite job: %s', job_id)

Simran Basi

2012-12-07 16:37:09 -0800

[diff] [blame]

591

Aviv Keshet

2013-03-21 16:38:31 -0700

[diff] [blame]

592

code = RETURN_CODES.OK

Chris Masone

359c0fd

2012-03-13 15:18:59 -0700

[diff] [blame]

593

while wait and True:

Chris Masone

2012-02-14 14:18:01 -0800

[diff] [blame]

594

if not afe.get_jobs(id=job_id, finished=True):

595

time.sleep(1)

596

continue

Scott Zawalski

2012-03-06 09:21:44 -0500

[diff] [blame]

597

views = TKO.run('get_detailed_test_views', afe_job_id=job_id)

beeps

2013-04-10 20:56:09 -0700

[diff] [blame]

598

width = max((len(_full_test_name(job_id, view, options.build,

599

options.name)) for view in views)) + 3

Scott Zawalski

2012-03-06 09:21:44 -0500

[diff] [blame]

600

Chris Masone

2012-08-29 10:37:08 -0700

[diff] [blame]

601

relevant_views = filter(job_status.view_is_relevant, views)

Scott Zawalski

2012-03-06 09:21:44 -0500

[diff] [blame]

602

if not relevant_views:

Zdenek Behan

2012-04-06 17:20:01 +0200

[diff] [blame]

603

# The main suite job most likely failed in SERVER_JOB.

604

relevant_views = views

Scott Zawalski

2012-03-06 09:21:44 -0500

[diff] [blame]

605

Chris Masone

2012-04-30 14:35:28 -0700

[diff] [blame]

606

timings = Timings()

Craig Harrison

2012-08-23 16:48:49 -0700

[diff] [blame]

607

web_links = []

608

buildbot_links = []

Chris Masone

2012-08-29 10:37:08 -0700

[diff] [blame]

609

for view in relevant_views:

610

timings.RecordTiming(view)

611

if job_status.view_is_for_suite_prep(view):

612

view['test_name'] = 'Suite prep'

Chris Masone

3a85064

2012-07-11 11:11:18 -0700

[diff] [blame]

613

beeps

2013-04-10 20:56:09 -0700

[diff] [blame]

614

job_name, experimental = get_view_info(job_id, view, options.build,

615

options.name)[:2]

616

test_view = _full_test_name(job_id, view, options.build,

617

options.name).ljust(width)

Chris Masone

2012-08-29 10:37:08 -0700

[diff] [blame]

618

logging.info("%s%s", test_view, get_pretty_status(view['status']))

beeps

2013-04-26 19:12:46 -0700

[diff] [blame]

619

620

# It's important that we:

621

# a. Use the test name in the view and not the name returned by

622

# full_test_name, as this was the name inserted after the test

623

# ran. Eg: for an aborted test full_test_name will return

624

# experimental_testname but the view and the bug_id keyval will

625

# contain /bulid/suite/experimental_testname.

626

# b. Apply the inverse function that was applied to record the bug

627

# id as a keyval in dynamic_suite, by replacing all '/' with '_'.

628

bug_id = view['job_keyvals'].get(

629

view['test_name'].replace('/', '_')+constants.BUG_KEYVAL)

630

631

link = LogLink(test_view, job_name, bug_id)

Craig Harrison

2012-08-23 16:48:49 -0700

[diff] [blame]

632

web_links.append(link)

Chris Masone

2012-08-29 10:37:08 -0700

[diff] [blame]

633

634

if view['status'] != 'GOOD':

635

logging.info("%s %s: %s", test_view, view['status'],

636

view['reason'])

Craig Harrison

2012-08-23 16:48:49 -0700

[diff] [blame]

637

# Don't show links on the buildbot waterfall for tests with

638

# GOOD status.

639

buildbot_links.append(link)

Chris Masone

8906ab1

2012-07-23 15:37:56 -0700

[diff] [blame]

640

if view['status'] == 'TEST_NA':

641

# Didn't run; nothing to do here!

642

continue

Aviv Keshet

2013-03-21 16:38:31 -0700

[diff] [blame]

643

if code == RETURN_CODES.ERROR:

Zdenek Behan

2012-04-06 17:20:01 +0200

[diff] [blame]

644

# Failed already, no need to worry further.

645

continue

beeps

2013-04-10 20:56:09 -0700

[diff] [blame]

646

647

# Any non experimental test that has a status other than WARN

648

# or GOOD will result in the tree closing. Experimental tests

649

# will not close the tree, even if they have been aborted.

Chris Masone

2012-08-29 10:37:08 -0700

[diff] [blame]

650

if (view['status'] == 'WARN' or

651

(is_fail_status(view['status']) and experimental)):

Zdenek Behan

2012-04-06 17:20:01 +0200

[diff] [blame]

652

# Failures that produce a warning. Either a test with WARN

653

# status or any experimental test failure.

Aviv Keshet

2013-03-21 16:38:31 -0700

[diff] [blame]

654

code = RETURN_CODES.WARNING

Chris Masone

5374c67

2012-03-05 15:11:39 -0800

[diff] [blame]

655

else:

Aviv Keshet

2013-03-21 16:38:31 -0700

[diff] [blame]

656

code = RETURN_CODES.ERROR

657

beeps

2013-03-22 13:15:49 -0700

[diff] [blame]

658

timings.SendResultsToStatsd(options.name, options.build, options.board)

Scott Zawalski

2012-07-02 18:45:07 -0400

[diff] [blame]

659

logging.info(timings)

Craig Harrison

2012-08-23 16:48:49 -0700

[diff] [blame]

660

logging.info('\n'

661

'Links to test logs:')

662

for link in web_links:

Craig Harrison

2012-08-31 10:29:33 -0700

[diff] [blame]

663

logging.info(link.GenerateTextLink())

Aviv Keshet

2013-03-21 16:38:31 -0700

[diff] [blame]

664

665

try:

666

returnmessage = RETURN_CODES.get_string(code)

667

except ValueError:

668

returnmessage = 'UNKNOWN'

669

logging.info('\n'

670

'Will return from run_suite with status: %s',

671

returnmessage)

672

Craig Harrison

2012-08-23 16:48:49 -0700

[diff] [blame]

673

logging.info('\n'

674

'Output below this line is for buildbot consumption:')

675

for link in buildbot_links:

676

logging.info(link.GenerateBuildbotLink())

Chris Masone

2012-02-14 14:18:01 -0800

[diff] [blame]

677

break

Chris Masone

d5939fe

2012-03-13 10:11:06 -0700

[diff] [blame]

678

else:

Scott Zawalski

2012-07-02 18:45:07 -0400

[diff] [blame]

679

logging.info('Created suite job: %r', job_id)

Craig Harrison

2012-08-23 16:48:49 -0700

[diff] [blame]

680

link = LogLink(options.name, '%s-%s' % (job_id, getpass.getuser()))

681

logging.info(link.GenerateBuildbotLink())

Scott Zawalski

2012-07-02 18:45:07 -0400

[diff] [blame]

682

logging.info('--no_wait specified; Exiting.')

Chris Masone