Blame - site_utils/run_suite.py - platform/external/autotest

2012-02-14 14:18:01 -0800

[diff] [blame]

#!/usr/bin/python

#

# Use of this source code is governed by a BSD-style license that can be

5

# found in the LICENSE file.

6

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

7

Chris Masone

2012-02-14 14:18:01 -0800

[diff] [blame]

8

"""Tool for running suites of tests and waiting for completion.

9

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

10

The desired test suite will be scheduled with autotest. By default,

Chris Masone

2012-02-14 14:18:01 -0800

[diff] [blame]

11

this tool will block until the job is complete, printing a summary

12

at the end. Error conditions result in exceptions.

13

14

This is intended for use only with Chrome OS test suits that leverage the

15

dynamic suite infrastructure in server/cros/dynamic_suite.py.

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

16

17

This script exits with one of the following codes:

18

0 - OK: Suite finished successfully

19

1 - ERROR: Test(s) failed, or hits its own timeout

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

20

2 - WARNING: Test(s) raised a warning or passed on retry, none failed/timed out.

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

21

3 - INFRA_FAILURE: Infrastructure related issues, e.g.

22

* Lab is down

23

* Too many duts (defined as a constant) in repair failed status

24

* Suite job issues, like bug in dynamic suite,

25

user aborted the suite, lose a drone/all devservers/rpc server,

26

0 tests ran, etc.

27

4 - SUITE_TIMEOUT: Suite timed out, some tests ran,

28

none failed by the time the suite job was aborted. This will cover,

29

but not limited to, the following cases:

30

* A devserver failure that manifests as a timeout

31

* No DUTs available midway through a suite

32

* Provision/Reset/Cleanup took longer time than expected for new image

33

* A regression in scheduler tick time.

Chris Masone

2012-02-14 14:18:01 -0800

[diff] [blame]

34

"""

35

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

36

Prashanth B

2014-03-14 12:36:29 -0700

[diff] [blame]

37

import datetime as datetime_base

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

38

import getpass, logging, optparse, os, re, sys, time

Chris Masone

2012-09-06 16:00:07 -0700

[diff] [blame]

39

from datetime import datetime

40

Chris Masone

2012-02-14 14:18:01 -0800

[diff] [blame]

41

import common

Chris Masone

2012-09-06 16:00:07 -0700

[diff] [blame]

42

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

43

from autotest_lib.client.common_lib import error

J. Richard Barnette

3cbd76b

2013-11-27 12:11:25 -0800

[diff] [blame]

44

from autotest_lib.client.common_lib import global_config, enum

45

from autotest_lib.client.common_lib import priorities

Michael Liang

da8c60a

2014-06-03 13:24:51 -0700

[diff] [blame]

46

from autotest_lib.client.common_lib.cros.graphite import stats

Prashanth B

2014-03-14 12:36:29 -0700

[diff] [blame]

47

from autotest_lib.frontend.afe.json_rpc import proxy

J. Richard Barnette

3cbd76b

2013-11-27 12:11:25 -0800

[diff] [blame]

48

from autotest_lib.server import utils

Chris Masone

44e4d6c

2012-08-15 14:25:53 -0700

[diff] [blame]

49

from autotest_lib.server.cros.dynamic_suite import constants

Chris Masone

b493555

2012-08-14 12:05:54 -0700

[diff] [blame]

50

from autotest_lib.server.cros.dynamic_suite import frontend_wrappers

51

from autotest_lib.server.cros.dynamic_suite import job_status

Prashanth B

2014-03-14 12:36:29 -0700

[diff] [blame]

52

from autotest_lib.server.cros.dynamic_suite import reporting_utils

J. Richard Barnette

e7b98bb

2013-08-21 16:34:16 -0700

[diff] [blame]

53

from autotest_lib.server.cros.dynamic_suite import tools

Prashanth B

2014-03-14 12:36:29 -0700

[diff] [blame]

54

from autotest_lib.site_utils import diagnosis_utils

Chris Masone

2012-02-14 14:18:01 -0800

[diff] [blame]

55

Chris Masone

1120cdf

2012-02-27 17:35:07 -0800

[diff] [blame]

56

CONFIG = global_config.global_config

57

Simran Basi

2012-12-07 16:37:09 -0800

[diff] [blame]

58

# Return code that will be sent back to autotest_rpc_server.py

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

59

RETURN_CODES = enum.Enum(

60

'OK', 'ERROR', 'WARNING', 'INFRA_FAILURE', 'SUITE_TIMEOUT')

61

# The severity of return code. If multiple codes

62

# apply, the script should always return the severest one.

63

# E.g. if we have a test failure and the suite also timed out,

64

# we should return 'ERROR'.

65

SEVERITY = {RETURN_CODES.OK: 0,

66

RETURN_CODES.WARNING: 1,

67

RETURN_CODES.INFRA_FAILURE: 2,

68

RETURN_CODES.SUITE_TIMEOUT: 3,

69

RETURN_CODES.ERROR: 4}

70

71

72

def get_worse_code(code1, code2):

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

73

"""Compare the severity of two codes and return the worse code.

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

74

75

@param code1: An enum value of RETURN_CODES

76

@param code2: An enum value of RETURN_CODES

77

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

78

@returns: the more severe one between code1 and code2.

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

79

80

"""

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

81

return code1 if SEVERITY[code1] >= SEVERITY[code2] else code2

Simran Basi

2012-12-07 16:37:09 -0800

[diff] [blame]

82

Chris Masone

dfa0beba

2012-03-19 11:41:47 -0700

[diff] [blame]

83

Scott Zawalski

2012-07-02 18:45:07 -0400

[diff] [blame]

84

def setup_logging(logfile=None):

85

"""Setup basic logging with all logging info stripped.

86

87

Calls to logging will only show the message. No severity is logged.

88

89

@param logfile: If specified dump output to a file as well.

90

"""

Dan Shi

8a2dddd

2013-08-09 12:58:44 -0700

[diff] [blame]

91

# Remove all existing handlers. client/common_lib/logging_config adds

92

# a StreamHandler to logger when modules are imported, e.g.,

93

# autotest_lib.client.bin.utils. A new StreamHandler will be added here to

94

# log only messages, not severity.

95

logging.getLogger().handlers = []

96

Scott Zawalski

2012-07-02 18:45:07 -0400

[diff] [blame]

97

screen_handler = logging.StreamHandler()

98

screen_handler.setFormatter(logging.Formatter('%(message)s'))

99

logging.getLogger().addHandler(screen_handler)

100

logging.getLogger().setLevel(logging.INFO)

101

if logfile:

102

file_handler = logging.FileHandler(logfile)

103

file_handler.setLevel(logging.DEBUG)

104

logging.getLogger().addHandler(file_handler)

Chris Masone

dfa0beba

2012-03-19 11:41:47 -0700

[diff] [blame]

105

106

Chris Masone

2012-02-14 14:18:01 -0800

[diff] [blame]

107

def parse_options():

Aviv Keshet

2013-03-21 16:38:31 -0700

[diff] [blame]

108

#pylint: disable-msg=C0111

Zdenek Behan

77290c3

2012-06-26 17:39:47 +0200

[diff] [blame]

109

usage = "usage: %prog [options]"

Chris Masone

2012-02-14 14:18:01 -0800

[diff] [blame]

110

parser = optparse.OptionParser(usage=usage)

111

parser.add_option("-b", "--board", dest="board")

112

parser.add_option("-i", "--build", dest="build")

Chris Masone

359c0fd

2012-03-13 15:18:59 -0700

[diff] [blame]

113

# This should just be a boolean flag, but the autotest "proxy" code

114

# can't handle flags that don't take arguments.

Alex Miller

ab33ddb

2012-10-03 12:56:02 -0700

[diff] [blame]

115

parser.add_option("-n", "--no_wait", dest="no_wait", default="False",

116

help='Must pass "True" or "False" if used.')

Alex Miller

0032e93

2013-10-23 12:52:58 -0700

[diff] [blame]

117

# If you really want no pool, --pool="" will do it. USE WITH CARE.

118

parser.add_option("-p", "--pool", dest="pool", default="suites")

Chris Masone

2012-02-14 14:18:01 -0800

[diff] [blame]

119

parser.add_option("-s", "--suite_name", dest="name")

Simran Basi

25effe3

2013-11-26 13:02:11 -0800

[diff] [blame]

120

parser.add_option("-a", "--afe_timeout_mins", dest="afe_timeout_mins",

121

default=30)

122

parser.add_option("-t", "--timeout_mins", dest="timeout_mins",

123

default=1440)

Chris Masone

8ac6671

2012-02-15 14:21:02 -0800

[diff] [blame]

124

parser.add_option("-d", "--delay_sec", dest="delay_sec", default=10)

Chris Masone

986459e

2012-04-11 11:36:48 -0700

[diff] [blame]

125

parser.add_option("-m", "--mock_job_id", dest="mock_job_id",

126

help="Skips running suite; creates report for given ID.")

Alex Miller

05a2fff

2012-09-10 10:14:34 -0700

[diff] [blame]

127

parser.add_option("-u", "--num", dest="num", type="int", default=None,

Chris Masone

8906ab1

2012-07-23 15:37:56 -0700

[diff] [blame]

128

help="Run on at most NUM machines.")

Alex Miller

f43d0eb

2012-10-01 13:43:13 -0700

[diff] [blame]

129

# Same boolean flag issue applies here.

Alex Miller

ab33ddb

2012-10-03 12:56:02 -0700

[diff] [blame]

130

parser.add_option("-f", "--file_bugs", dest="file_bugs", default='False',

131

help='File bugs on test failures. Must pass "True" or '

132

'"False" if used.')

Dan Shi

a02181f

2013-01-29 14:03:32 -0800

[diff] [blame]

133

parser.add_option("-l", "--bypass_labstatus", dest="bypass_labstatus",

134

action="store_true", help='Bypass lab status check.')

Alex Miller

88762a8

2013-09-04 15:41:28 -0700

[diff] [blame]

135

# We allow either a number or a string for the priority. This way, if you

136

# know what you're doing, one can specify a custom priority level between

137

# other levels.

138

parser.add_option("-r", "--priority", dest="priority",

139

default=priorities.Priority.DEFAULT,

140

action="store", help="Priority of suite")

Fang Deng

2014-05-15 15:41:50 -0700

[diff] [blame]

141

parser.add_option('--retry', dest='retry', default='False',

142

action='store', help='Enable test retry. '

143

'Must pass "True" or "False" if used.')

Dan Shi

8de6d1b

2014-06-12 09:10:37 -0700

[diff] [blame]

144

parser.add_option('--minimum_duts', dest='minimum_duts', type=int,

145

default=0, action='store',

146

help='Minimum Number of available machines required to '

147

'run the suite. Default is set to 0, which means do not '

148

'force the check of available machines before running '

149

'the suite.')

Aviv Keshet

7cd1231

2013-07-25 10:25:55 -0700

[diff] [blame]

150

parser.add_option("--suite_args", dest="suite_args",

151

default=None, action="store",

152

help="Argument string for suite control file.")

Alex Miller

f43d0eb

2012-10-01 13:43:13 -0700

[diff] [blame]

153

Chris Masone

2012-02-14 14:18:01 -0800

[diff] [blame]

154

options, args = parser.parse_args()

155

return parser, options, args

156

157

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

158

def verify_options_and_args(options, args):

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

159

"""Verify the validity of options and args.

160

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

161

@param options: The parsed options to verify.

162

@param args: The parsed args to verify.

163

164

@returns: True if verification passes, False otherwise.

165

166

"""

167

if not options.mock_job_id:

168

if args:

169

print 'Unknown arguments: ' + str(args)

170

return False

171

if not options.build:

172

print 'Need to specify which build to use'

173

return False

174

if not options.board:

175

print 'Need to specify board'

176

return False

177

if not options.name:

178

print 'Need to specify suite name'

179

return False

180

if options.num is not None and options.num < 1:

181

print 'Number of machines must be more than 0, if specified.'

182

return False

183

if options.no_wait != 'True' and options.no_wait != 'False':

184

print 'Please specify "True" or "False" for --no_wait.'

185

return False

186

if options.file_bugs != 'True' and options.file_bugs != 'False':

187

print 'Please specify "True" or "False" for --file_bugs.'

188

return False

Fang Deng

2014-05-15 15:41:50 -0700

[diff] [blame]

189

if options.retry != 'True' and options.retry != 'False':

190

print 'Please specify "True" or "False" for --retry'

191

return False

192

if options.no_wait == 'True' and options.retry == 'True':

193

print 'Test retry is not available when using --no_wait=True'

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

return True

Chris Masone

2012-02-14 14:18:01 -0800

[diff] [blame]

197

def get_pretty_status(status):

Aviv Keshet

2013-03-21 16:38:31 -0700

[diff] [blame]

198

"""

199

Converts a status string into a pretty-for-printing string.

200

201

@param status: Status to convert.

202

203

@return: Returns pretty string.

GOOD -> [ PASSED ]

TEST_NA -> [ INFO ]

other -> [ FAILED ]

"""

Chris Masone

2012-02-14 14:18:01 -0800

[diff] [blame]

208

if status == 'GOOD':

209

return '[ PASSED ]'

Chris Masone

8906ab1

2012-07-23 15:37:56 -0700

[diff] [blame]

210

elif status == 'TEST_NA':

211

return '[ INFO ]'

Chris Masone

2012-02-14 14:18:01 -0800

[diff] [blame]

212

return '[ FAILED ]'

213

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

214

Chris Masone

2012-02-14 14:18:01 -0800

[diff] [blame]

215

Craig Harrison

2012-08-23 16:48:49 -0700

[diff] [blame]

216

class LogLink(object):

J. Richard Barnette

2013-08-23 11:24:21 -0700

[diff] [blame]

217

"""Information needed to record a link in the logs.

Craig Harrison

2012-08-23 16:48:49 -0700

[diff] [blame]

218

J. Richard Barnette

2013-08-23 11:24:21 -0700

[diff] [blame]

219

Depending on context and the information provided at

220

construction time, the link may point to either to log files for

221

a job, or to a bug filed for a failure in the job.

Craig Harrison

2012-08-23 16:48:49 -0700

[diff] [blame]

222

J. Richard Barnette

2013-08-23 11:24:21 -0700

[diff] [blame]

223

@var anchor The link text.

224

@var url The link url.

225

@var bug_id Id of a bug to link to, or None.

226

"""

227

228

_BUG_URL_PREFIX = CONFIG.get_config_value('BUG_REPORTING',

229

'tracker_url')

J. Richard Barnette

2013-08-23 11:24:21 -0700

[diff] [blame]

230

_URL_PATTERN = CONFIG.get_config_value('CROS',

231

'log_url_pattern', type=str)

232

233

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

234

def __init__(self, anchor, server, job_string, bug_info=None, reason=None,

235

retry_count=0):

J. Richard Barnette

2013-08-23 11:24:21 -0700

[diff] [blame]

236

"""Initialize the LogLink by generating the log URL.

237

238

@param anchor The link text.

Alex Miller

2013-10-30 15:18:57 -0700

[diff] [blame]

239

@param server The hostname of the server this suite ran on.

J. Richard Barnette

2013-08-23 11:24:21 -0700

[diff] [blame]

240

@param job_string The job whose logs we'd like to link to.

241

@param bug_info Info about the bug, if one was filed.

Fang Deng

2014-02-24 17:51:24 -0800

[diff] [blame]

242

@param reason A string representing the reason of failure if any.

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

243

@param retry_count How many times the test has been retried.

Craig Harrison

2012-08-23 16:48:49 -0700

[diff] [blame]

244

"""

245

self.anchor = anchor

Alex Miller

2013-10-30 15:18:57 -0700

[diff] [blame]

246

self.url = self._URL_PATTERN % (server, job_string)

Fang Deng

2014-02-24 17:51:24 -0800

[diff] [blame]

247

self.reason = reason

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

248

self.retry_count = retry_count

J. Richard Barnette

2013-08-23 11:24:21 -0700

[diff] [blame]

249

if bug_info:

250

self.bug_id, self.bug_count = bug_info

251

else:

252

self.bug_id = None

253

self.bug_count = None

Craig Harrison

2012-08-23 16:48:49 -0700

[diff] [blame]

254

255

256

def GenerateBuildbotLink(self):

J. Richard Barnette

2013-08-23 11:24:21 -0700

[diff] [blame]

257

"""Generate a link formatted to meet buildbot expectations.

258

259

If there is a bug associated with this link, report that;

260

otherwise report a link to the job logs.

Craig Harrison

2012-08-23 16:48:49 -0700

[diff] [blame]

261

262

@return A link formatted for the buildbot log annotator.

263

"""

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

264

info_strings = []

265

if self.retry_count > 0:

266

info_strings.append('retry_count: %d' % self.retry_count)

267

J. Richard Barnette

2013-08-23 11:24:21 -0700

[diff] [blame]

268

if self.bug_id:

269

url = '%s%s' % (self._BUG_URL_PREFIX, self.bug_id)

beeps

ad4daf8

2013-09-26 10:07:33 -0700

[diff] [blame]

270

if self.bug_count is None:

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

271

bug_info = 'unknown number of reports'

beeps

ad4daf8

2013-09-26 10:07:33 -0700

[diff] [blame]

272

elif self.bug_count == 1:

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

273

bug_info = 'new report'

J. Richard Barnette

2013-08-23 11:24:21 -0700

[diff] [blame]

274

else:

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

275

bug_info = '%s reports' % self.bug_count

276

info_strings.append(bug_info)

J. Richard Barnette

2013-08-23 11:24:21 -0700

[diff] [blame]

277

else:

278

url = self.url

Fang Deng

2014-02-24 17:51:24 -0800

[diff] [blame]

279

280

if self.reason:

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

281

info_strings.append(self.reason.strip())

282

283

if info_strings:

284

info = ', '.join(info_strings)

285

anchor_text = '%(anchor)s: %(info)s' % {

286

'anchor': self.anchor.strip(), 'info': info}

287

else:

288

anchor_text = self.anchor.strip()

Fang Deng

2014-02-24 17:51:24 -0800

[diff] [blame]

289

290

return '@@@STEP_LINK@%s@%s@@@'% (anchor_text, url)

Craig Harrison

2012-08-23 16:48:49 -0700

[diff] [blame]

291

292

Craig Harrison

d845157

2012-08-31 10:29:33 -0700

[diff] [blame]

293

def GenerateTextLink(self):

J. Richard Barnette

2013-08-23 11:24:21 -0700

[diff] [blame]

294

"""Generate a link to the job's logs, for consumption by a human.

Craig Harrison

2012-08-23 16:48:49 -0700

[diff] [blame]

295

Craig Harrison

d845157

2012-08-31 10:29:33 -0700

[diff] [blame]

296

@return A link formatted for human readability.

Craig Harrison

2012-08-23 16:48:49 -0700

[diff] [blame]

297

"""

Fang Deng

2014-02-24 17:51:24 -0800

[diff] [blame]

298

return '%s%s' % (self.anchor, self.url)

Craig Harrison

2012-08-23 16:48:49 -0700

[diff] [blame]

299

300

Chris Masone

2012-04-30 14:35:28 -0700

[diff] [blame]

301

class Timings(object):

302

"""Timings for important events during a suite.

303

304

All timestamps are datetime.datetime objects.

305

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

306

@var suite_job_id: the afe job id of the suite job for which

307

we are recording the timing for.

308

@var download_start_time: the time the devserver starts staging

309

the build artifacts. Recorded in create_suite_job.

310

@var payload_end_time: the time when the artifacts only necessary to start

311

installsing images onto DUT's are staged.

312

Recorded in create_suite_job.

313

@var artifact_end_time: the remaining artifacts are downloaded after we kick

314

off the reimaging job, at which point we record

315

artifact_end_time. Recorded in dynamic_suite.py.

Chris Masone

2012-04-30 14:35:28 -0700

[diff] [blame]

316

@var suite_start_time: the time the suite started.

Chris Masone

2012-04-30 14:35:28 -0700

[diff] [blame]

317

@var tests_start_time: the time the first test started running.

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

318

@var tests_end_time: the time the last test finished running.

Chris Masone

2012-04-30 14:35:28 -0700

[diff] [blame]

319

"""

beeps

2013-03-22 13:15:49 -0700

[diff] [blame]

320

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

321

def __init__(self, suite_job_id):

322

self.suite_job_id = suite_job_id

323

# Timings related to staging artifacts on devserver.

324

self.download_start_time = None

325

self.payload_end_time = None

326

self.artifact_end_time = None

beeps

2013-03-22 13:15:49 -0700

[diff] [blame]

327

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

328

# The test_start_time, but taken off the view that corresponds to the

329

# suite instead of an individual test.

330

self.suite_start_time = None

beeps

2013-03-22 13:15:49 -0700

[diff] [blame]

331

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

332

# Earliest and Latest tests in the set of TestViews passed to us.

333

self.tests_start_time = None

334

self.tests_end_time = None

335

Chris Masone

2012-04-30 14:35:28 -0700

[diff] [blame]

336

337

beeps

2013-03-22 13:15:49 -0700

[diff] [blame]

338

def _GetDatetime(self, timing_string, timing_string_format):

339

"""

340

Formats the timing_string according to the timing_string_format.

341

342

@param timing_string: A datetime timing string.

343

@param timing_string_format: Format of the time in timing_string.

344

@return: A datetime object for the given timing string.

345

"""

346

try:

347

return datetime.strptime(timing_string, timing_string_format)

except TypeError:

return None

Chris Masone

2012-08-29 10:37:08 -0700

[diff] [blame]

352

def RecordTiming(self, view):

353

"""Given a test report view, extract and record pertinent time info.

Chris Masone

2012-04-30 14:35:28 -0700

[diff] [blame]

354

355

get_detailed_test_views() returns a list of entries that provide

356

info about the various parts of a suite run. This method can take

357

any one of these entries and look up timestamp info we might want

358

and record it.

359

Chris Masone

2012-09-06 16:00:07 -0700

[diff] [blame]

360

If timestamps are unavailable, datetime.datetime.min/max will be used.

361

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

362

@param view: A TestView object.

Chris Masone

2012-04-30 14:35:28 -0700

[diff] [blame]

363

"""

Chris Masone

2012-09-06 16:00:07 -0700

[diff] [blame]

364

start_candidate = datetime.min

365

end_candidate = datetime.max

366

if view['test_started_time']:

367

start_candidate = datetime.strptime(view['test_started_time'],

368

job_status.TIME_FMT)

369

if view['test_finished_time']:

370

end_candidate = datetime.strptime(view['test_finished_time'],

371

job_status.TIME_FMT)

372

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

373

if view.get_testname() == TestView.SUITE_PREP:

Chris Masone

2012-04-30 14:35:28 -0700

[diff] [blame]

374

self.suite_start_time = start_candidate

Chris Masone

2012-04-30 14:35:28 -0700

[diff] [blame]

375

else:

376

self._UpdateFirstTestStartTime(start_candidate)

377

self._UpdateLastTestEndTime(end_candidate)

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

378

if view['afe_job_id'] == self.suite_job_id and 'job_keyvals' in view:

Chris Masone

d9f13c5

2012-08-29 10:37:08 -0700

[diff] [blame]

379

keyvals = view['job_keyvals']

beeps

2013-03-22 13:15:49 -0700

[diff] [blame]

380

self.download_start_time = self._GetDatetime(

381

keyvals.get(constants.DOWNLOAD_STARTED_TIME),

382

job_status.TIME_FMT)

383

384

self.payload_end_time = self._GetDatetime(

385

keyvals.get(constants.PAYLOAD_FINISHED_TIME),

386

job_status.TIME_FMT)

387

388

self.artifact_end_time = self._GetDatetime(

389

keyvals.get(constants.ARTIFACT_FINISHED_TIME),

390

job_status.TIME_FMT)

Chris Masone

44e4d6c

2012-08-15 14:25:53 -0700

[diff] [blame]

391

Chris Masone

2012-04-30 14:35:28 -0700

[diff] [blame]

392

393

def _UpdateFirstTestStartTime(self, candidate):

394

"""Update self.tests_start_time, iff candidate is an earlier time.

395

396

@param candidate: a datetime.datetime object.

397

"""

398

if not self.tests_start_time or candidate < self.tests_start_time:

399

self.tests_start_time = candidate

400

401

402

def _UpdateLastTestEndTime(self, candidate):

403

"""Update self.tests_end_time, iff candidate is a later time.

404

405

@param candidate: a datetime.datetime object.

406

"""

407

if not self.tests_end_time or candidate > self.tests_end_time:

408

self.tests_end_time = candidate

def __str__(self):

return ('\n'

'Suite timings:\n'

Chris Masone

2012-05-01 16:52:31 -0700

[diff] [blame]

414

'Downloads started at %s\n'

415

'Payload downloads ended at %s\n'

Chris Masone

2012-04-30 14:35:28 -0700

[diff] [blame]

416

'Suite started at %s\n'

Chris Masone

2012-05-01 16:52:31 -0700

[diff] [blame]

417

'Artifact downloads ended (at latest) at %s\n'

Chris Masone

2012-04-30 14:35:28 -0700

[diff] [blame]

418

'Testing started at %s\n'

Chris Masone

2012-05-01 16:52:31 -0700

[diff] [blame]

419

'Testing ended at %s\n' % (self.download_start_time,

420

self.payload_end_time,

421

self.suite_start_time,

Chris Masone

2012-05-01 16:52:31 -0700

[diff] [blame]

422

self.artifact_end_time,

Chris Masone

2012-04-30 14:35:28 -0700

[diff] [blame]

423

self.tests_start_time,

424

self.tests_end_time))

425

426

beeps

2013-03-22 13:15:49 -0700

[diff] [blame]

427

def _GetDataKeyForStatsd(self, suite, build, board):

428

"""

429

Constructs the key used for logging statsd timing data.

430

431

@param suite: scheduled suite that we want to record the results of.

432

@param build: The build string. This string should have a consistent

433

format eg: x86-mario-release/R26-3570.0.0. If the format of this

434

string changes such that we can't determine build_type or branch

435

we give up and use the parametes we're sure of instead (suite,

436

board). eg:

437

1. build = x86-alex-pgo-release/R26-3570.0.0

438

branch = 26

439

build_type = pgo-release

440

2. build = lumpy-paladin/R28-3993.0.0-rc5

441

branch = 28

442

build_type = paladin

443

@param board: The board that this suite ran on.

444

@return: The key used to log timing information in statsd.

445

"""

446

try:

J. Richard Barnette

3cbd76b

2013-11-27 12:11:25 -0800

[diff] [blame]

447

_board, build_type, branch = utils.ParseBuildName(build)[:3]

448

except utils.ParseBuildNameException as e:

beeps

2013-03-22 13:15:49 -0700

[diff] [blame]

449

logging.error(str(e))

450

branch = 'Unknown'

451

build_type = 'Unknown'

452

else:

453

embedded_str = re.search(r'x86-\w+-(.*)', _board)

454

if embedded_str:

455

build_type = embedded_str.group(1) + '-' + build_type

data_key_dict = {

'board': board,

'branch': branch,

'build_type': build_type,

461

'suite': suite,

462

}

463

return ('run_suite.%(board)s.%(build_type)s.%(branch)s.%(suite)s'

% data_key_dict)

def SendResultsToStatsd(self, suite, build, board):

468

"""

469

Sends data to statsd.

470

471

1. Makes a data_key of the form: run_suite.$board.$branch.$suite

472

eg: stats/gauges/<hostname>/run_suite/<board>/<branch>/<suite>/

473

2. Computes timings for several start and end event pairs.

Alex Miller

9a1987a

2013-08-21 15:51:16 -0700

[diff] [blame]

474

3. Sends all timing values to statsd.

beeps

2013-03-22 13:15:49 -0700

[diff] [blame]

475

476

@param suite: scheduled suite that we want to record the results of.

477

@param build: the build that this suite ran on.

478

eg: 'lumpy-release/R26-3570.0.0'

479

@param board: the board that this suite ran on.

480

"""

481

if sys.version_info < (2, 7):

482

logging.error('Sending run_suite perf data to statsd requires'

483

'python 2.7 or greater.')

484

return

485

486

data_key = self._GetDataKeyForStatsd(suite, build, board)

487

488

# Since we don't want to try subtracting corrupted datetime values

489

# we catch TypeErrors in _GetDatetime and insert None instead. This

490

# means that even if, say, keyvals.get(constants.ARTIFACT_FINISHED_TIME)

491

# returns a corrupt value the member artifact_end_time is set to None.

492

if self.download_start_time:

493

if self.payload_end_time:

494

stats.Timer(data_key).send('payload_download_time',

495

(self.payload_end_time -

496

self.download_start_time).total_seconds())

497

498

if self.artifact_end_time:

499

stats.Timer(data_key).send('artifact_download_time',

500

(self.artifact_end_time -

501

self.download_start_time).total_seconds())

502

503

if self.tests_end_time:

504

if self.suite_start_time:

505

stats.Timer(data_key).send('suite_run_time',

506

(self.tests_end_time -

507

self.suite_start_time).total_seconds())

508

509

if self.tests_start_time:

510

stats.Timer(data_key).send('tests_run_time',

511

(self.tests_end_time -

512

self.tests_start_time).total_seconds())

513

beeps

2013-03-22 13:15:49 -0700

[diff] [blame]

514

Alex Miller

2013-10-30 15:18:57 -0700

[diff] [blame]

515

_DEFAULT_AUTOTEST_INSTANCE = CONFIG.get_config_value(

516

'SERVER', 'hostname', type=str)

517

518

519

def instance_for_pool(pool_name):

520

"""

521

Return the hostname of the server that should be used to service a suite

522

for the specified pool.

523

524

@param pool_name: The pool (without 'pool:' to schedule the suite against.

525

@return: The correct host that should be used to service this suite run.

526

"""

527

return CONFIG.get_config_value(

528

'POOL_INSTANCE_SHARDING', pool_name,

529

default=_DEFAULT_AUTOTEST_INSTANCE)

530

531

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

532

class TestView(object):

533

"""Represents a test view and provides a set of helper functions."""

534

535

536

SUITE_PREP = 'Suite prep'

537

538

539

def __init__(self, view, suite_job_id, suite_name, build):

540

"""Init a TestView object representing a tko test view.

541

542

@param view: A dictionary representing a tko test view.

543

@param suite_job_id: The id of the suite job

544

that kicks off the test.

545

@param suite_name: The name of the suite

546

that the test belongs to.

547

@param build: The build for which the test is run.

548

"""

549

self.view = view

550

self.suite_job_id = suite_job_id

551

self.suite_name = suite_name

552

self.build = build

553

self.is_suite_view = suite_job_id == view['afe_job_id']

554

# This is the test name that will be shown in the output.

self.testname = None

def __getitem__(self, key):

559

"""Overload __getitem__ so that we can still use []

560

561

@param key: A key of the tko test view.

562

563

@returns: The value of an attribute in the view.

564

565

"""

566

return self.view[key]

567

568

569

def __setitem__(self, key, item):

570

"""Overload __setitem so that we can still use []

571

572

@param key: A key of the tko test view.

573

@param item: A value to set

574

575

"""

576

self.view[key] = item

def __iter__(self):

"""Overload __iter__ so that it supports 'in' operator."""

581

return iter(self.view)

582

583

584

def get_testname(self):

585

"""Get test name that should be shown in the output.

586

587

Formalize the test_name we got from the test view.

588

589

Remove 'build/suite' prefix if any. And append 'experimental' prefix

590

for experimental tests if their names do not start with 'experimental'.

591

592

If one runs a test in control file via the following code,

593

job.runtest('my_Test', tag='tag')

594

for most of the cases, view['test_name'] would look like 'my_Test.tag'.

595

If this is the case, this method will just return the original

596

test name, i.e. 'my_Test.tag'.

597

598

There are four special cases.

599

1) A test view is for the suite job's SERVER_JOB.

600

In this case, this method will return 'Suite prep'.

601

602

2) A test view is of a child job and for a SERVER_JOB or CLIENT_JOB.

603

In this case, we will take the job name, remove the build/suite

604

prefix from the job name, and append the rest to 'SERVER_JOB'

605

or 'CLIENT_JOB' as a prefix. So the names returned by this

606

method will look like:

607

'experimental_Telemetry Smoothness Measurement_SERVER_JOB'

608

'experimental_dummy_Pass_SERVER_JOB'

609

'dummy_Fail_SERVER_JOB'

610

611

2) A test view is of a suite job and its status is ABORT.

612

In this case, the view['test_name'] is the child job's name.

613

If it is an experimental test, 'experimental' will be part

614

of the name. For instance,

615

'lumpy-release/R35-5712.0.0/perf_v2/

616

experimental_Telemetry Smoothness Measurement'

617

'lumpy-release/R35-5712.0.0/dummy/experimental_dummy_Pass'

618

'lumpy-release/R35-5712.0.0/dummy/dummy_Fail'

619

The above names will be converted to the following:

620

'experimental_Telemetry Smoothness Measurement'

621

'experimental_dummy_Pass'

622

'dummy_Fail'

623

624

3) A test view's status is of a suite job and its status is TEST_NA.

625

In this case, the view['test_name'] is the NAME field of the control

626

file. If it is an experimental test, 'experimental' will part of

627

the name. For instance,

628

'experimental_Telemetry Smoothness Measurement'

629

'experimental_dummy_Pass'

630

'dummy_Fail'

631

This method will not modify these names.

632

633

@returns: Test name after normalization.

634

635

"""

636

if self.testname is not None:

637

return self.testname

638

639

if (self.is_suite_view and

640

self.view['test_name'].startswith('SERVER_JOB')):

641

# Rename suite job's SERVER_JOB to 'Suite prep'.

642

self.testname = self.SUITE_PREP

643

return self.testname

644

645

if (self.view['test_name'].startswith('SERVER_JOB') or

646

self.view['test_name'].startswith('CLIENT_JOB')):

647

# Append job name as a prefix for SERVER_JOB and CLIENT_JOB

648

testname= '%s_%s' % (self.view['job_name'], self.view['test_name'])

649

else:

650

testname = self.view['test_name']

651

experimental = self.is_experimental()

652

# Remove the build and suite name from testname if any.

653

testname = tools.get_test_name(

654

self.build, self.suite_name, testname)

655

# If an experimental test was aborted, testname

656

# would include the 'experimental' prefix already.

657

prefix = constants.EXPERIMENTAL_PREFIX if (

658

experimental and not

659

testname.startswith(constants.EXPERIMENTAL_PREFIX)) else ''

660

self.testname = prefix + testname

return self.testname

def is_relevant_suite_view(self):

665

"""Checks whether this is a suite view we should care about.

666

667

@returns: True if it is relevant. False otherwise.

668

"""

669

return (self.get_testname() == self.SUITE_PREP or

670

(self.is_suite_view and

671

not self.view['test_name'].startswith('CLIENT_JOB') and

672

not self.view['subdir']))

def is_test(self):

"""Return whether the view is for an actual test.

677

678

@returns True if the view is for an actual test.

679

False if the view is for SERVER_JOB or CLIENT_JOB.

680

681

"""

682

return not (self.view['test_name'].startswith('SERVER_JOB') or

683

self.view['test_name'].startswith('CLIENT_JOB'))

def is_retry(self):

"""Check whether the view is for a retry.

688

689

@returns: True, if the view is for a retry; False otherwise.

690

691

"""

692

return self.view['job_keyvals'].get('retry_original_job_id') is not None

693

694

695

def is_experimental(self):

696

"""Check whether a test view is for an experimental test.

697

698

@returns: True if it is for an experimental test, False otherwise.

699

700

"""

701

return (self.view['job_keyvals'].get('experimental') == 'True' or

702

tools.get_test_name(self.build, self.suite_name,

703

self.view['test_name']).startswith('experimental'))

704

705

706

def is_timeout_and_not_run(self):

707

"""Check whether the view is for a test that timed out and did not run.

708

709

A child job that was aborted before it got a chance to

710

run will have a view assoicated to suite job.

711

This mostly happens when a job timed out due to

712

starving for hosts (unless aborted manually by user,

713

which we don't consider here).

714

715

@returns: True if the test represented by the view

716

timed out and did not run; False otherwise.

717

718

"""

719

return (self.is_suite_view and self.view['status'] == 'ABORT')

720

721

722

def is_in_fail_status(self):

723

"""

724

Check if the given test's status corresponds to a failure.

725

726

@returns: True if the test's status is FAIL or ERROR. False otherwise.

727

728

"""

729

# All the statuses tests can have when they fail.

730

return self.view['status'] in ['FAIL', 'ERROR', 'ABORT']

731

732

733

def get_buildbot_link_reason(self):

734

"""Generate the buildbot link reason for the test.

735

736

@returns: A string representing the reason.

737

738

"""

739

return ('%s: %s' % (self.view['status'], self.view['reason'])

740

if self.view['reason'] else self.view['status'])

741

742

743

def get_job_id_owner_str(self):

744

"""Generate the job_id_owner string for a test.

745

746

@returns: A string which looks like 135036-username

747

748

"""

749

return '%s-%s' % (self.view['afe_job_id'], getpass.getuser())

750

751

752

def get_bug_info(self, suite_job_keyvals):

753

"""Get the bug info from suite_job_keyvals.

754

755

If a bug has been filed for the test, its bug info (bug id and counts)

756

will be stored in the suite job's keyvals. This method attempts to

757

retrieve bug info of the test from |suite_job_keyvals|. It will return

758

None if no bug info is found. No need to check bug info if the view is

759

SUITE_PREP.

760

761

@param suite_job_keyvals: The job keyval dictionary of the suite job.

762

All the bug info about child jobs are stored in

763

suite job's keyvals.

764

765

@returns: None if there is no bug info, or a pair with the

766

id of the bug, and the count of the number of

767

times the bug has been seen.

768

769

"""

770

if self.get_testname() == self.SUITE_PREP:

771

return None

772

if (self.view['test_name'].startswith('SERVER_JOB') or

773

self.view['test_name'].startswith('CLIENT_JOB')):

774

# Append job name as a prefix for SERVER_JOB and CLIENT_JOB

775

testname= '%s_%s' % (self.view['job_name'], self.view['test_name'])

776

else:

777

testname = self.view['test_name']

778

779

return tools.get_test_failure_bug_info(

780

suite_job_keyvals, self.view['afe_job_id'],

testname)

def should_display_buildbot_link(self):

785

"""Check whether a buildbot link should show for this view.

786

787

Show links on the buildbot waterfall if

788

1) it is a normal test that are not in GOOD/TEST_NA status

789

and did not time out before running; or

790

2) this is a retry.

791

792

@returns: True if we should show the buildbot link.

793

False otherwise.

794

"""

795

is_bad_status = (self.view['status'] != 'GOOD' and

796

self.view['status'] != 'TEST_NA')

797

is_timeout = self.is_timeout_and_not_run()

798

return (is_bad_status and not is_timeout) or self.is_retry()

799

800

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

801

class ResultCollector(object):

802

"""Collect test results of a suite.

803

804

Once a suite job has finished, use this class to collect test results.

805

`run` is the core method that is to be called first. Then the caller

806

could retrieve information like return code, return message, is_aborted,

807

and timings by accessing the collector's public attributes. And output

808

the test results and links by calling the 'output_*' methods.

809

810

Here is a overview of what `run` method does.

811

812

1) Collect the suite job's results from tko_test_view_2.

813

For the suite job, we only pull test views without a 'subdir'.

814

A NULL subdir indicates that the test was _not_ executed. This could be

815

that no child job was scheduled for this test or the child job got

816

aborted before starts running.

817

(Note 'SERVER_JOB'/'CLIENT_JOB' are handled specially)

818

819

2) Collect the child jobs' results from tko_test_view_2.

820

For child jobs, we pull all the test views associated with them.

821

(Note 'SERVER_JOB'/'CLIENT_JOB' are handled speically)

822

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

823

3) Generate web and buildbot links.

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

824

4) Compute timings of the suite run.

825

5) Compute the return code based on test results.

826

827

@var _instance_server: The hostname of the server that is used

828

to service the suite.

829

@var _afe: The afe rpc client.

830

@var _tko: The tko rpc client.

831

@var _build: The build for which the suite is run,

832

e.g. 'lumpy-release/R35-5712.0.0'

833

@var _suite_name: The suite name, e.g. 'bvt', 'dummy'.

834

@var _suite_job_id: The job id of the suite for which we are going to

835

collect results.

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

836

@var _suite_views: A list of TestView objects, representing relevant

837

test views of the suite job.

838

@var _child_views: A list of TestView objects, representing test views

839

of the child jobs.

840

@var _test_views: A list of TestView objects, representing all test views

841

from _suite_views and _child_views.

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

842

@var _web_links: A list of web links pointing to the results of jobs.

843

@var _buildbot_links: A list of buildbot links for non-passing tests.

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

844

@var _max_testname_width: Max width of all test names.

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

845

@var return_code: The exit code that should be returned by run_suite.

846

@var return_message: Any message that should be displayed to explain

847

the return code.

848

@var is_aborted: Whether the suite was aborted or not.

849

True, False or None (aborting status is unknown yet)

850

@var timings: A Timing object that records the suite's timings.

"""

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

855

def __init__(self, instance_server, afe, tko, build,

856

suite_name, suite_job_id):

857

self._instance_server = instance_server

self._afe = afe

self._tko = tko

self._build = build

self._suite_name = suite_name

862

self._suite_job_id = suite_job_id

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

863

self._suite_views = []

864

self._child_views = []

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

865

self._test_views = []

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

866

self._retry_counts = {}

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

867

self._web_links = []

868

self._buildbot_links = []

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

869

self._max_testname_width = 0

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

870

self.return_code = None

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

871

self.return_message = ''

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

872

self.is_aborted = None

self.timings = None

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

876

def _fetch_relevant_test_views_of_suite(self):

877

"""Fetch relevant test views of the suite job.

878

879

For the suite job, there will be a test view for SERVER_JOB, and views

880

for results of its child jobs. For example, assume we've ceated

881

a suite job (afe_job_id: 40) that runs dummy_Pass, dummy_Fail,

882

dummy_Pass.bluetooth. Assume dummy_Pass was aborted before running while

883

dummy_Path.bluetooth got TEST_NA as no duts have bluetooth.

884

So the suite job's test views would look like

885

_____________________________________________________________________

886

887

10 | 1000 |SERVER_JOB |---- |40 |GOOD

888

11 | 1000 |dummy_Pass |NULL |40 |ABORT

889

12 | 1000 |dummy_Fail.Fail |41-onwer/...|40 |FAIL

890

13 | 1000 |dummy_Fail.Error |42-owner/...|40 |ERROR

891

14 | 1000 |dummy_Pass.bluetooth|NULL |40 |TEST_NA

892

893

For a suite job, we only care about

894

a) The test view for the suite job's SERVER_JOB

895

b) The test views for real tests without a subdir. A NULL subdir

896

indicates that a test didn't get executed.

897

So, for the above example, we only keep test views whose test_idxs

898

are 10, 11, 14.

899

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

900

@returns: A list of TestView objects, representing relevant

901

test views of the suite job.

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

902

903

"""

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

904

views = self._tko.run(call='get_detailed_test_views',

905

afe_job_id=self._suite_job_id)

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

906

relevant_views = []

907

for v in views:

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

908

v = TestView(v, self._suite_job_id, self._suite_name, self._build)

909

if v.is_relevant_suite_view():

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

910

relevant_views.append(v)

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

911

if v.is_timeout_and_not_run():

912

v['reason'] = 'Timed out, did not run.'

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

913

return relevant_views

914

915

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

916

def _compute_retry_count(self, view):

917

"""Return how many times the test has been retried.

918

919

@param view: A TestView instance.

920

@returns: An int value indicating the retry count.

921

922

"""

923

old_job = view['job_keyvals'].get('retry_original_job_id')

count = 0

while old_job:

count += 1

views = self._tko.run(

928

call='get_detailed_test_views', afe_job_id=old_job)

929

old_job = (views[0]['job_keyvals'].get('retry_original_job_id')

if views else None)

return count

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

934

def _fetch_test_views_of_child_jobs(self):

935

"""Fetch test views of child jobs.

936

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

937

@returns: A tuple (child_views, retry_counts)

938

child_views is list of TestView objects, representing

939

all valid views. retry_counts is a dictionary that maps

940

test_idx to retry counts. It only stores retry

941

counts that are greater than 0.

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

942

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

943

"""

944

child_job_ids = set(job.id for job in

945

self._afe.get_jobs(

946

parent_job_id=self._suite_job_id))

947

child_views = []

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

948

retry_counts = {}

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

949

for job_id in child_job_ids:

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

950

views = [TestView(v, self._suite_job_id,

951

self._suite_name, self._build)

952

for v in self._tko.run(

953

call='get_detailed_test_views', afe_job_id=job_id,

954

invalid=0)]

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

955

contains_test_failure = any(

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

956

v.is_test() and v['status'] != 'GOOD' for v in views)

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

957

for v in views:

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

958

if (v.is_test() or

959

v['status'] != 'GOOD' and not contains_test_failure):

960

# For normal test view, just keep it.

961

# For SERVER_JOB or CLIENT_JOB, only keep it

962

# if it fails and no other test failure.

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

963

child_views.append(v)

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

964

retry_count = self._compute_retry_count(v)

965

if retry_count > 0:

966

retry_counts[v['test_idx']] = retry_count

967

return child_views, retry_counts

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

968

969

970

def _generate_web_and_buildbot_links(self):

971

"""Generate web links and buildbot links."""

972

# TODO(fdeng): If a job was aborted before it reaches Running

973

# state, we read the test view from the suite job

974

# and thus this method generates a link pointing to the

975

# suite job's page for the aborted job. Need a fix.

976

self._web_links = []

977

self._buildbot_links = []

978

# Bug info are stored in the suite job's keyvals.

979

suite_job_keyvals = self._suite_views[0]['job_keyvals']

980

for v in self._test_views:

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

981

retry_count = self._retry_counts.get(v['test_idx'], 0)

982

bug_info = v.get_bug_info(suite_job_keyvals)

983

job_id_owner = v.get_job_id_owner_str()

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

984

link = LogLink(

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

985

anchor=v.get_testname().ljust(

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

986

self._max_testname_width),

987

server=self._instance_server,

988

job_string=job_id_owner,

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

989

bug_info=bug_info, retry_count=retry_count)

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

990

self._web_links.append(link)

991

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

992

if v.should_display_buildbot_link():

993

link.reason = v.get_buildbot_link_reason()

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

994

self._buildbot_links.append(link)

995

996

997

def _record_timings(self):

998

"""Record suite timings."""

999

self.timings = Timings(self._suite_job_id)

1000

for v in self._test_views:

1001

self.timings.RecordTiming(v)

1002

1003

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1004

def _get_return_msg(self, code, tests_passed_after_retry):

1005

"""Return the proper message for a given return code.

1006

1007

@param code: An enum value of RETURN_CODES

1008

@param test_passed_after_retry: True/False, indicating

1009

whether there are test(s) that have passed after retry.

1010

1011

@returns: A string, representing the message.

1012

1013

"""

1014

if code == RETURN_CODES.INFRA_FAILURE:

1015

return 'Suite job failed'

1016

elif code == RETURN_CODES.SUITE_TIMEOUT:

1017

return ('Some test(s) was aborted before running,'

1018

' suite must have timed out.')

1019

elif code == RETURN_CODES.WARNING:

1020

if tests_passed_after_retry:

1021

return 'Some test(s) passed after retry.'

1022

else:

1023

return 'Some test(s) raised a warning.'

1024

elif code == RETURN_CODES.ERROR:

1025

return 'Some test(s) failed.'

else:

return ''

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1030

def _compute_return_code(self):

1031

"""Compute the exit code based on test results."""

1032

code = RETURN_CODES.OK

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1033

message = ''

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1034

tests_passed_after_retry = False

1035

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1036

for v in self._test_views:

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1037

if v.is_experimental():

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1038

continue

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1039

if (v.get_testname() == TestView.SUITE_PREP

1040

and v.is_in_fail_status()):

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1041

# Suite job failed (with status FAIL/ERROR/ABORT).

1042

# If the suite job was aborted, it may or may not be

1043

# caused by timing out. Here we only categorize it as

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1044

# INFRA_FAILURE which is less severe than SUITE_TIMEOUT.

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1045

# Suite timing out will be diagnosed later using

1046

# other methods which will upgrade the severity accordingly.

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1047

code = get_worse_code(code, RETURN_CODES.INFRA_FAILURE)

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1048

elif v.is_timeout_and_not_run():

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1049

code = get_worse_code(code, RETURN_CODES.SUITE_TIMEOUT)

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1050

elif v['status'] == 'WARN':

1051

code = get_worse_code(code, RETURN_CODES.WARNING)

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1052

elif v.is_in_fail_status():

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1053

code = get_worse_code(code, RETURN_CODES.ERROR)

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1054

elif v.is_retry():

1055

code = get_worse_code(code, RETURN_CODES.WARNING)

1056

tests_passed_after_retry = True

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1057

else:

1058

code = get_worse_code(code, RETURN_CODES.OK)

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1059

self.return_code = code

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1060

self.return_message = self._get_return_msg(

1061

code, tests_passed_after_retry)

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1062

1063

1064

def output_results(self):

1065

"""Output test results, timings and web links."""

1066

# Output test results

1067

for v in self._test_views:

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1068

display_name = v.get_testname().ljust(self._max_testname_width)

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1069

logging.info('%s%s', display_name,

1070

get_pretty_status(v['status']))

1071

if v['status'] != 'GOOD':

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1072

logging.info('%s %s: %s', display_name, v['status'],

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1073

v['reason'])

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1074

if v.is_retry():

1075

retry_count = self._retry_counts.get(v['test_idx'], 0)

1076

logging.info('%s retry_count: %s',

1077

display_name, retry_count)

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1078

# Output suite timings

1079

logging.info(self.timings)

1080

# Output links to test logs

1081

logging.info('\nLinks to test logs:')

1082

for link in self._web_links:

1083

logging.info(link.GenerateTextLink())

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1084

logging.info('\n')

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1085

1086

1087

def output_buildbot_links(self):

1088

"""Output buildbot links."""

1089

for link in self._buildbot_links:

1090

logging.info(link.GenerateBuildbotLink())

def run(self):

"""Collect test results.

1095

1096

This method goes through the following steps:

1097

Fetch relevent test views of the suite job.

1098

Fetch test views of child jobs

1099

Check whether the suite was aborted.

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1100

Generate links.

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1101

Calculate suite timings.

1102

Compute return code based on the test result.

1103

1104

"""

1105

self._suite_views = self._fetch_relevant_test_views_of_suite()

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1106

self._child_views, self._retry_counts = (

1107

self._fetch_test_views_of_child_jobs())

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1108

self._test_views = self._suite_views + self._child_views

1109

# For hostless job in Starting status, there is no test view associated.

1110

# This can happen when a suite job in Starting status is aborted. When

1111

# the scheduler hits some limit, e.g., max_hostless_jobs_per_drone,

1112

# max_jobs_started_per_cycle, a suite job can stays in Starting status.

1113

if not self._test_views:

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1114

self.return_code = RETURN_CODES.INFRA_FAILURE

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1115

self.return_message = 'No test view was found.'

1116

return

1117

self.is_aborted = any([view['job_keyvals'].get('aborted_by')

1118

for view in self._suite_views])

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1119

self._max_testname_width = max(

1120

[len(v.get_testname()) for v in self._test_views]) + 3

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1121

self._generate_web_and_buildbot_links()

1122

self._record_timings()

1123

self._compute_return_code()

1124

1125

Chris Masone

2012-02-14 14:18:01 -0800

[diff] [blame]

1126

def main():

Aviv Keshet

2013-03-21 16:38:31 -0700

[diff] [blame]

1127

"""

1128

Entry point for run_suite script.

1129

"""

Chris Masone

2012-02-14 14:18:01 -0800

[diff] [blame]

1130

parser, options, args = parse_options()

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1131

if not verify_options_and_args(options, args):

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

parser.print_help()

return

Chris Masone

2012-07-11 11:11:18 -0700

[diff] [blame]

1135

log_name = 'run_suite-default.log'

Chris Masone

986459e

2012-04-11 11:36:48 -0700

[diff] [blame]

1136

if not options.mock_job_id:

Chris Masone

3a85064

2012-07-11 11:11:18 -0700

[diff] [blame]

1137

# convert build name from containing / to containing only _

1138

log_name = 'run_suite-%s.log' % options.build.replace('/', '_')

1139

log_dir = os.path.join(common.autotest_dir, 'logs')

1140

if os.path.exists(log_dir):

1141

log_name = os.path.join(log_dir, log_name)

Alex Miller

88762a8

2013-09-04 15:41:28 -0700

[diff] [blame]

1142

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1143

setup_logging(logfile=log_name)

Alex Miller

88762a8

2013-09-04 15:41:28 -0700

[diff] [blame]

1144

try:

1145

priority = int(options.priority)

1146

except ValueError:

1147

try:

1148

priority = priorities.Priority.get_value(options.priority)

1149

except AttributeError:

1150

print 'Unknown priority level %s. Try one of %s.' % (

1151

options.priority, ', '.join(priorities.Priority.names))

1152

Simran Basi

2012-12-07 16:37:09 -0800

[diff] [blame]

1153

try:

Dan Shi

a02181f

2013-01-29 14:03:32 -0800

[diff] [blame]

1154

if not options.bypass_labstatus:

J. Richard Barnette

abbe096

2013-12-10 18:15:44 -0800

[diff] [blame]

1155

utils.check_lab_status(options.build)

1156

except utils.TestLabException as e:

Alex Miller

c3262f0

2013-11-15 11:27:19 -0800

[diff] [blame]

1157

logging.warning('Error Message: %s', e)

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1158

return RETURN_CODES.INFRA_FAILURE

Simran Basi

2012-12-07 16:37:09 -0800

[diff] [blame]

1159

Alex Miller

2013-10-30 15:18:57 -0700

[diff] [blame]

1160

instance_server = instance_for_pool(options.pool)

1161

afe = frontend_wrappers.RetryingAFE(server=instance_server,

Simran Basi

25effe3

2013-11-26 13:02:11 -0800

[diff] [blame]

1162

timeout_min=options.afe_timeout_mins,

Chris Masone

8ac6671

2012-02-15 14:21:02 -0800

[diff] [blame]

1163

delay_sec=options.delay_sec)

Alex Miller

2013-10-30 15:18:57 -0700

[diff] [blame]

1164

logging.info('Autotest instance: %s', instance_server)

Chris Masone

359c0fd

2012-03-13 15:18:59 -0700

[diff] [blame]

1165

Dan Shi

20952c1

2014-05-14 17:07:38 -0700

[diff] [blame]

1166

rpc_helper = diagnosis_utils.RPCHelper(afe)

1167

1168

try:

Dan Shi

94234cb

2014-05-23 20:04:31 -0700

[diff] [blame]

1169

rpc_helper.check_dut_availability(options.board, options.pool,

Dan Shi

8de6d1b

2014-06-12 09:10:37 -0700

[diff] [blame]

1170

options.minimum_duts)

Dan Shi

20952c1

2014-05-14 17:07:38 -0700

[diff] [blame]

1171

except utils.TestLabException as e:

1172

logging.warning('Not enough DUTs to run this suite: %s', e)

1173

return RETURN_CODES.INFRA_FAILURE

1174

Dan Shi

9512241

2013-11-12 16:20:33 -0800

[diff] [blame]

1175

wait = options.no_wait == 'False'

1176

file_bugs = options.file_bugs == 'True'

Fang Deng

2014-05-15 15:41:50 -0700

[diff] [blame]

1177

retry = options.retry == 'True'

Prashanth B

2014-03-14 12:36:29 -0700

[diff] [blame]

1178

logging.info('%s Submitted create_suite_job rpc',

1179

diagnosis_utils.JobTimer.format_time(datetime.now()))

Chris Masone

986459e

2012-04-11 11:36:48 -0700

[diff] [blame]

1180

if options.mock_job_id:

1181

job_id = int(options.mock_job_id)

1182

else:

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1183

try:

1184

job_id = afe.run('create_suite_job', name=options.name,

1185

board=options.board, build=options.build,

1186

check_hosts=wait, pool=options.pool,

1187

num=options.num,

1188

file_bugs=file_bugs, priority=priority,

1189

suite_args=options.suite_args,

1190

wait_for_results=wait,

Fang Deng

2014-05-15 15:41:50 -0700

[diff] [blame]

1191

timeout_mins=options.timeout_mins,

1192

job_retry=retry)

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1193

except (error.CrosDynamicSuiteException,

1194

error.RPCException, proxy.JSONRPCException) as e:

1195

logging.warning('Error Message: %s', e)

1196

return RETURN_CODES.INFRA_FAILURE

1197

Prashanth B

2014-03-14 12:36:29 -0700

[diff] [blame]

1198

job_timer = diagnosis_utils.JobTimer(

1199

time.time(), float(options.timeout_mins))

1200

logging.info('%s Created suite job: %s',

1201

job_timer.format_time(job_timer.job_created_time),

1202

reporting_utils.link_job(

1203

job_id, instance_server=instance_server))

1204

Alex Miller

2013-10-30 15:18:57 -0700

[diff] [blame]

1205

TKO = frontend_wrappers.RetryingTKO(server=instance_server,

Simran Basi

25effe3

2013-11-26 13:02:11 -0800

[diff] [blame]

1206

timeout_min=options.afe_timeout_mins,

Chris Masone

8ac6671

2012-02-15 14:21:02 -0800

[diff] [blame]

1207

delay_sec=options.delay_sec)

Aviv Keshet

2013-03-21 16:38:31 -0700

[diff] [blame]

1208

code = RETURN_CODES.OK

Dan Shi

20952c1

2014-05-14 17:07:38 -0700

[diff] [blame]

1209

J. Richard Barnette

712eb40

2013-08-13 18:03:00 -0700

[diff] [blame]

1210

if wait:

1211

while not afe.get_jobs(id=job_id, finished=True):

Prashanth B

2014-03-14 12:36:29 -0700

[diff] [blame]

1212

# Note that this call logs output, preventing buildbot's

1213

# 9000 second silent timeout from kicking in. Let there be no

1214

# doubt, this is a hack. The timeout is from upstream buildbot and

1215

# this is the easiest work around.

1216

if job_timer.first_past_halftime():

1217

rpc_helper.diagnose_job(job_id)

Prashanth B

a7be207

2014-07-15 15:03:21 -0700

[diff] [blame^]

1218

if job_timer.debug_output_timer.poll():

1219

logging.info('The suite job has another %s till timeout.',

Prashanth B

2014-03-14 12:36:29 -0700

[diff] [blame]

1220

job_timer.timeout_hours - job_timer.elapsed_time())

Alex Miller

764227d

2013-11-15 10:28:56 -0800

[diff] [blame]

1221

time.sleep(10)

J. Richard Barnette

712eb40

2013-08-13 18:03:00 -0700

[diff] [blame]

1222

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1223

# Start collecting test results.

1224

collector = ResultCollector(instance_server=instance_server,

1225

afe=afe, tko=TKO, build=options.build,

1226

suite_name=options.name,

1227

suite_job_id=job_id)

1228

collector.run()

1229

# Output test results, timings, web links.

1230

collector.output_results()

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1231

code = collector.return_code

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1232

return_message = collector.return_message

1233

if not options.mock_job_id:

1234

# Send timings to statsd. Do not record stats if the suite was

1235

# aborted (either by a user or through the golo rpc).

1236

# Also do not record stats if is_aborted is None, indicating

1237

# aborting status is unknown yet.

1238

if collector.is_aborted == False:

1239

collector.timings.SendResultsToStatsd(

1240

options.name, options.build, options.board)

1241

# There is a minor race condition here where we might have aborted

1242

# for some reason other than a timeout, and the job_timer thinks

1243

# it's a timeout because of the jitter in waiting for results.

1244

# The consequence would be that run_suite exits with code

1245

# SUITE_TIMEOUT while it should have returned INFRA_FAILURE

1246

# instead, which should happen very rarely.

1247

is_suite_timeout = job_timer.is_suite_timeout()

1248

if collector.is_aborted == True and is_suite_timeout:

1249

# There are two possible cases when a suite times out.

1250

# 1. the suite job was aborted due to timing out

1251

# 2. the suite job succeeded, but some child jobs

1252

# were already aborted before the suite job exited.

1253

# The case 2 was handled by ResultCollector,

1254

# here we handle case 1.

1255

old_code = code

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1256

code = get_worse_code(

1257

code, RETURN_CODES.SUITE_TIMEOUT)

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1258

if old_code != code:

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1259

return_message = 'Suite job timed out.'

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1260

logging.info('Upgrade return code from %s to %s '

1261

'because suite job has timed out.',

1262

RETURN_CODES.get_string(old_code),

1263

RETURN_CODES.get_string(code))

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1264

if is_suite_timeout:

1265

logging.info('\nAttempting to diagnose pool: %s', options.pool)

1266

stats.Counter('run_suite_timeouts').increment()

1267

try:

1268

# Add some jitter to make up for any latency in

1269

# aborting the suite or checking for results.

1270

cutoff = (job_timer.timeout_hours +

1271

datetime_base.timedelta(hours=0.3))

1272

rpc_helper.diagnose_pool(

1273

options.board, options.pool, cutoff)

1274

except proxy.JSONRPCException as e:

1275

logging.warning('Unable to diagnose suite abort.')

1276

1277

# And output return message.

1278

code_str = RETURN_CODES.get_string(code)

1279

logging.info('Will return from run_suite with status: %s', code_str)

1280

if return_message:

1281

logging.info('Reason: %s', return_message)

Prashanth B

2014-03-14 12:36:29 -0700

[diff] [blame]

1282

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1283

logging.info('\nOutput below this line is for buildbot consumption:')

1284

collector.output_buildbot_links()

Chris Masone

d5939fe

2012-03-13 10:11:06 -0700

[diff] [blame]

1285

else:

Scott Zawalski

2012-07-02 18:45:07 -0400

[diff] [blame]

1286

logging.info('Created suite job: %r', job_id)

Alex Miller

a05498f

2013-11-01 16:16:21 -0700

[diff] [blame]

1287

link = LogLink(options.name, instance_server,

1288

'%s-%s' % (job_id, getpass.getuser()))

Craig Harrison

2012-08-23 16:48:49 -0700

[diff] [blame]

1289

logging.info(link.GenerateBuildbotLink())

Scott Zawalski

2012-07-02 18:45:07 -0400

[diff] [blame]

1290

logging.info('--no_wait specified; Exiting.')

Chris Masone

2012-02-14 14:18:01 -0800

[diff] [blame]

1291

return code

1292

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1293

Chris Masone