Blame - site_utils/run_suite.py - platform/external/autotest

2012-02-14 14:18:01 -0800

[diff] [blame]

#!/usr/bin/python

#

# Use of this source code is governed by a BSD-style license that can be

5

# found in the LICENSE file.

6

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

7

Chris Masone

2012-02-14 14:18:01 -0800

[diff] [blame]

8

"""Tool for running suites of tests and waiting for completion.

9

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

10

The desired test suite will be scheduled with autotest. By default,

Chris Masone

2012-02-14 14:18:01 -0800

[diff] [blame]

11

this tool will block until the job is complete, printing a summary

12

at the end. Error conditions result in exceptions.

13

14

This is intended for use only with Chrome OS test suits that leverage the

15

dynamic suite infrastructure in server/cros/dynamic_suite.py.

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

16

17

This script exits with one of the following codes:

18

0 - OK: Suite finished successfully

19

1 - ERROR: Test(s) failed, or hits its own timeout

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

20

2 - WARNING: Test(s) raised a warning or passed on retry, none failed/timed out.

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

21

3 - INFRA_FAILURE: Infrastructure related issues, e.g.

22

* Lab is down

23

* Too many duts (defined as a constant) in repair failed status

24

* Suite job issues, like bug in dynamic suite,

25

user aborted the suite, lose a drone/all devservers/rpc server,

26

0 tests ran, etc.

Fang Deng

2014-09-12 14:16:11 -0700

[diff] [blame]

27

* provision failed

28

TODO(fdeng): crbug.com/413918, reexamine treating all provision

29

failures as INFRA failures.

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

30

4 - SUITE_TIMEOUT: Suite timed out, some tests ran,

31

none failed by the time the suite job was aborted. This will cover,

32

but not limited to, the following cases:

33

* A devserver failure that manifests as a timeout

34

* No DUTs available midway through a suite

35

* Provision/Reset/Cleanup took longer time than expected for new image

36

* A regression in scheduler tick time.

Fang Deng

2014-09-25 10:18:48 -0700

[diff] [blame]

37

5- BOARD_NOT_AVAILABLE: If there is no host for the requested board/pool.

38

6- INVALID_OPTIONS: If options are not valid.

Chris Masone

2012-02-14 14:18:01 -0800

[diff] [blame]

39

"""

40

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

41

Prashanth B

2014-03-14 12:36:29 -0700

[diff] [blame]

42

import datetime as datetime_base

Shuqian Zhao

2015-09-29 14:19:28 -0700

[diff] [blame]

43

import ast, getpass, json, logging, optparse, os, re, sys, time

Chris Masone

2012-09-06 16:00:07 -0700

[diff] [blame]

44

from datetime import datetime

45

Chris Masone

2012-02-14 14:18:01 -0800

[diff] [blame]

46

import common

Shuqian Zhao

2015-08-05 22:56:30 -0700

[diff] [blame]

47

from autotest_lib.client.common_lib import control_data

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

48

from autotest_lib.client.common_lib import error

J. Richard Barnette

3cbd76b

2013-11-27 12:11:25 -0800

[diff] [blame]

49

from autotest_lib.client.common_lib import global_config, enum

50

from autotest_lib.client.common_lib import priorities

Dan Shi

2014-08-10 23:38:40 -0700

[diff] [blame]

51

from autotest_lib.client.common_lib import time_utils

Gabe Black

2015-02-04 23:55:15 -0800

[diff] [blame]

52

from autotest_lib.client.common_lib.cros.graphite import autotest_stats

Prashanth B

2014-05-08 18:01:27 -0700

[diff] [blame]

53

from autotest_lib.client.common_lib.cros import retry

Prashanth B

2014-03-14 12:36:29 -0700

[diff] [blame]

54

from autotest_lib.frontend.afe.json_rpc import proxy

J. Richard Barnette

3cbd76b

2013-11-27 12:11:25 -0800

[diff] [blame]

55

from autotest_lib.server import utils

Dan Shi

2014-10-10 13:38:51 -0700

[diff] [blame]

56

from autotest_lib.server.cros import provision

Chris Masone

44e4d6c

2012-08-15 14:25:53 -0700

[diff] [blame]

57

from autotest_lib.server.cros.dynamic_suite import constants

Chris Masone

b493555

2012-08-14 12:05:54 -0700

[diff] [blame]

58

from autotest_lib.server.cros.dynamic_suite import frontend_wrappers

Prashanth B

2014-03-14 12:36:29 -0700

[diff] [blame]

59

from autotest_lib.server.cros.dynamic_suite import reporting_utils

J. Richard Barnette

e7b98bb

2013-08-21 16:34:16 -0700

[diff] [blame]

60

from autotest_lib.server.cros.dynamic_suite import tools

Prashanth B

2014-03-14 12:36:29 -0700

[diff] [blame]

61

from autotest_lib.site_utils import diagnosis_utils

MK Ryu

2014-10-21 11:58:09 -0700

[diff] [blame]

62

from autotest_lib.site_utils import job_overhead

63

Chris Masone

2012-02-14 14:18:01 -0800

[diff] [blame]

64

Chris Masone

1120cdf

2012-02-27 17:35:07 -0800

[diff] [blame]

65

CONFIG = global_config.global_config

66

Simran Basi

2015-02-03 15:50:18 -0800

[diff] [blame]

67

WMATRIX_RETRY_URL = CONFIG.get_config_value('BUG_REPORTING',

68

'wmatrix_retry_url')

69

Simran Basi

22aa9fe

2012-12-07 16:37:09 -0800

[diff] [blame]

70

# Return code that will be sent back to autotest_rpc_server.py

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

71

RETURN_CODES = enum.Enum(

Fang Deng

2014-09-18 17:52:06 -0700

[diff] [blame]

72

'OK', 'ERROR', 'WARNING', 'INFRA_FAILURE', 'SUITE_TIMEOUT',

Fang Deng

2014-09-25 10:18:48 -0700

[diff] [blame]

73

'BOARD_NOT_AVAILABLE', 'INVALID_OPTIONS')

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

74

# The severity of return code. If multiple codes

75

# apply, the script should always return the severest one.

76

# E.g. if we have a test failure and the suite also timed out,

77

# we should return 'ERROR'.

78

SEVERITY = {RETURN_CODES.OK: 0,

79

RETURN_CODES.WARNING: 1,

Fang Deng

2014-09-12 14:16:11 -0700

[diff] [blame]

80

RETURN_CODES.SUITE_TIMEOUT: 2,

81

RETURN_CODES.INFRA_FAILURE: 3,

Fang Deng

2014-09-25 10:18:48 -0700

[diff] [blame]

82

RETURN_CODES.ERROR: 4}

Simran Basi

bf6ebc9

2016-05-27 15:35:05 -0700

[diff] [blame]

83

ANDROID_BUILD_REGEX = r'.+/.+/P?([0-9]+|LATEST)'

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

84

85

86

def get_worse_code(code1, code2):

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

87

"""Compare the severity of two codes and return the worse code.

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

88

89

@param code1: An enum value of RETURN_CODES

90

@param code2: An enum value of RETURN_CODES

91

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

92

@returns: the more severe one between code1 and code2.

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

93

94

"""

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

95

return code1 if SEVERITY[code1] >= SEVERITY[code2] else code2

Simran Basi

22aa9fe

2012-12-07 16:37:09 -0800

[diff] [blame]

96

Chris Masone

dfa0beba

2012-03-19 11:41:47 -0700

[diff] [blame]

97

Chris Masone

2012-02-14 14:18:01 -0800

[diff] [blame]

98

def parse_options():

Aviv Keshet

2013-03-21 16:38:31 -0700

[diff] [blame]

99

#pylint: disable-msg=C0111

Zdenek Behan

77290c3

2012-06-26 17:39:47 +0200

[diff] [blame]

100

usage = "usage: %prog [options]"

Chris Masone

2012-02-14 14:18:01 -0800

[diff] [blame]

101

parser = optparse.OptionParser(usage=usage)

102

parser.add_option("-b", "--board", dest="board")

103

parser.add_option("-i", "--build", dest="build")

Prashanth Balasubramanian

673016d

2014-11-04 10:40:48 -0800

[diff] [blame]

104

parser.add_option("-w", "--web", dest="web", default=None,

105

help="Address of a webserver to receive suite requests.")

Dan Shi

2015-06-24 10:52:38 -0700

[diff] [blame]

106

parser.add_option('--firmware_rw_build', dest='firmware_rw_build',

107

default=None,

Dan Shi

2014-10-10 13:38:51 -0700

[diff] [blame]

108

help='Firmware build to be installed in dut RW firmware.')

109

parser.add_option('--firmware_ro_build', dest='firmware_ro_build',

110

default=None,

111

help='Firmware build to be installed in dut RO firmware.')

112

parser.add_option('--test_source_build', dest='test_source_build',

113

default=None,

114

help=('Build that contains the test code, '

115

'e.g., it can be the value of `--build`, '

Dan Shi

2015-06-24 10:52:38 -0700

[diff] [blame]

116

'`--firmware_rw_build` or `--firmware_ro_build` '

Dan Shi

2014-10-10 13:38:51 -0700

[diff] [blame]

117

'arguments. Default is None, that is, use the test '

118

'code from `--build` (CrOS image)'))

Chris Masone

359c0fd

2012-03-13 15:18:59 -0700

[diff] [blame]

119

# This should just be a boolean flag, but the autotest "proxy" code

120

# can't handle flags that don't take arguments.

Alex Miller

ab33ddb

2012-10-03 12:56:02 -0700

[diff] [blame]

121

parser.add_option("-n", "--no_wait", dest="no_wait", default="False",

122

help='Must pass "True" or "False" if used.')

Alex Miller

0032e93

2013-10-23 12:52:58 -0700

[diff] [blame]

123

# If you really want no pool, --pool="" will do it. USE WITH CARE.

124

parser.add_option("-p", "--pool", dest="pool", default="suites")

Chris Masone

2012-02-14 14:18:01 -0800

[diff] [blame]

125

parser.add_option("-s", "--suite_name", dest="name")

Fang Deng

2014-09-18 17:52:06 -0700

[diff] [blame]

126

parser.add_option("-a", "--afe_timeout_mins", type="int",

127

dest="afe_timeout_mins", default=30)

128

parser.add_option("-t", "--timeout_mins", type="int",

129

dest="timeout_mins", default=1440)

Simran Basi

441fbc1

2015-01-23 12:28:54 -0800

[diff] [blame]

130

parser.add_option("-x", "--max_runtime_mins", type="int",

131

dest="max_runtime_mins", default=1440)

Fang Deng

2014-09-18 17:52:06 -0700

[diff] [blame]

132

parser.add_option("-d", "--delay_sec", type="int",

133

dest="delay_sec", default=10)

Chris Masone

986459e

2012-04-11 11:36:48 -0700

[diff] [blame]

134

parser.add_option("-m", "--mock_job_id", dest="mock_job_id",

Aviv Keshet

2015-04-10 19:09:58 -0700

[diff] [blame]

135

help="Attach to existing job id for already running "

136

"suite, and creates report.")

137

# NOTE(akeshet): This looks similar to --no_wait, but behaves differently.

138

# --no_wait is passed in to the suite rpc itself and affects the suite,

139

# while this does not.

140

parser.add_option("-c", "--create_and_return", dest="create_and_return",

141

action="store_true",

142

help="Create the suite and print the job id, then "

143

"finish immediately.")

Alex Miller

05a2fff

2012-09-10 10:14:34 -0700

[diff] [blame]

144

parser.add_option("-u", "--num", dest="num", type="int", default=None,

Chris Masone

8906ab1

2012-07-23 15:37:56 -0700

[diff] [blame]

145

help="Run on at most NUM machines.")

Alex Miller

f43d0eb

2012-10-01 13:43:13 -0700

[diff] [blame]

146

# Same boolean flag issue applies here.

Alex Miller

ab33ddb

2012-10-03 12:56:02 -0700

[diff] [blame]

147

parser.add_option("-f", "--file_bugs", dest="file_bugs", default='False',

148

help='File bugs on test failures. Must pass "True" or '

149

'"False" if used.')

Dan Shi

a02181f

2013-01-29 14:03:32 -0800

[diff] [blame]

150

parser.add_option("-l", "--bypass_labstatus", dest="bypass_labstatus",

151

action="store_true", help='Bypass lab status check.')

Alex Miller

88762a8

2013-09-04 15:41:28 -0700

[diff] [blame]

152

# We allow either a number or a string for the priority. This way, if you

153

# know what you're doing, one can specify a custom priority level between

154

# other levels.

155

parser.add_option("-r", "--priority", dest="priority",

156

default=priorities.Priority.DEFAULT,

Aviv Keshet

b0cb753

2016-02-16 10:19:55 -0800

[diff] [blame]

157

action="store",

158

help="Priority of suite. Either numerical value, or "

159

"one of (" + ", ".join(priorities.Priority.names)

160

+ ").")

Fang Deng

058860c

2014-05-15 15:41:50 -0700

[diff] [blame]

161

parser.add_option('--retry', dest='retry', default='False',

162

action='store', help='Enable test retry. '

163

'Must pass "True" or "False" if used.')

Fang Deng

443f195

2015-01-02 14:51:49 -0800

[diff] [blame]

164

parser.add_option('--max_retries', dest='max_retries', default=None,

165

type='int', action='store', help='Maximum retries'

166

'allowed at suite level. No limit if not specified.')

Dan Shi

8de6d1b

2014-06-12 09:10:37 -0700

[diff] [blame]

167

parser.add_option('--minimum_duts', dest='minimum_duts', type=int,

168

default=0, action='store',

Fang Deng

cbc0121

2014-11-25 16:09:46 -0800

[diff] [blame]

169

help='Check that the pool has at least such many '

170

'healthy machines, otherwise suite will not run. '

171

'Default to 0.')

172

parser.add_option('--suite_min_duts', dest='suite_min_duts', type=int,

173

default=0, action='store',

174

help='Preferred minimum number of machines. Scheduler '

175

'will prioritize on getting such many machines for '

176

'the suite when it is competing with another suite '

177

'that has a higher priority but already got minimum '

178

'machines it needs. Default to 0.')

Aviv Keshet

7cd1231

2013-07-25 10:25:55 -0700

[diff] [blame]

179

parser.add_option("--suite_args", dest="suite_args",

180

default=None, action="store",

181

help="Argument string for suite control file.")

Simran Basi

1e10e92

2015-04-16 15:09:56 -0700

[diff] [blame]

182

parser.add_option('--offload_failures_only', dest='offload_failures_only',

183

action='store', default='False',

184

help='Only enable gs_offloading for failed tests. '

185

'Successful tests will be deleted. Must pass "True"'

186

' or "False" if used.')

Shuqian Zhao

ab1bedc

2015-06-02 11:12:28 -0700

[diff] [blame]

187

parser.add_option('--use_suite_attr', dest='use_suite_attr',

188

action='store_true', default=False,

189

help='Advanced. Run the suite based on ATTRIBUTES of '

190

'control files, rather than SUITE.')

Shuqian Zhao

2015-08-05 22:56:30 -0700

[diff] [blame]

191

parser.add_option('--json_dump', dest='json_dump', action='store_true',

192

default=False,

193

help='Dump the output of run_suite to stdout.')

Simran Basi

5ace6f2

2016-01-06 17:30:44 -0800

[diff] [blame]

194

parser.add_option('--run_prod_code', dest='run_prod_code',

195

action='store_true', default=False,

196

help='Run the test code that lives in prod aka the test '

197

'code currently on the lab servers.')

Dan Shi

2016-02-22 12:06:37 -0800

[diff] [blame]

198

parser.add_option('--delay_minutes', type=int, default=0,

199

help=('Delay the creation of test jobs for a given '

200

'number of minutes. This argument can be used to '

201

'force provision jobs being delayed, which helps '

202

'to distribute loads across devservers.'))

Ningning Xia

d964917

2016-04-18 11:40:59 -0700

[diff] [blame]

203

parser.add_option('--skip_duts_check', dest='skip_duts_check', action='store_true',

204

default=False,

205

help='If True, skip minimum available DUTs check')

Chris Masone

2012-02-14 14:18:01 -0800

[diff] [blame]

206

options, args = parser.parse_args()

207

return parser, options, args

208

209

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

210

def verify_options_and_args(options, args):

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

211

"""Verify the validity of options and args.

212

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

213

@param options: The parsed options to verify.

214

@param args: The parsed args to verify.

215

216

@returns: True if verification passes, False otherwise.

217

218

"""

Fang Deng

2015-02-20 14:49:47 -0800

[diff] [blame]

219

if args:

220

print 'Unknown arguments: ' + str(args)

221

return False

222

223

if options.mock_job_id and (

224

not options.build or not options.name or not options.board):

225

print ('When using -m, need to specify build, board and suite '

226

'name which you have used for creating the original job')

227

return False

228

else:

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

229

if not options.build:

230

print 'Need to specify which build to use'

231

return False

232

if not options.board:

233

print 'Need to specify board'

234

return False

235

if not options.name:

236

print 'Need to specify suite name'

237

return False

238

if options.num is not None and options.num < 1:

239

print 'Number of machines must be more than 0, if specified.'

240

return False

241

if options.no_wait != 'True' and options.no_wait != 'False':

242

print 'Please specify "True" or "False" for --no_wait.'

243

return False

244

if options.file_bugs != 'True' and options.file_bugs != 'False':

245

print 'Please specify "True" or "False" for --file_bugs.'

246

return False

Fang Deng

058860c

2014-05-15 15:41:50 -0700

[diff] [blame]

247

if options.retry != 'True' and options.retry != 'False':

248

print 'Please specify "True" or "False" for --retry'

249

return False

Fang Deng

443f195

2015-01-02 14:51:49 -0800

[diff] [blame]

250

if options.retry == 'False' and options.max_retries is not None:

251

print 'max_retries can only be used with --retry=True'

252

return False

Shuqian Zhao

ab1bedc

2015-06-02 11:12:28 -0700

[diff] [blame]

253

if options.use_suite_attr and options.suite_args is not None:

254

print ('The new suite control file cannot parse the suite_args: %s.'

255

'Please not specify any suite_args here.' % options.suite_args)

256

return False

Fang Deng

058860c

2014-05-15 15:41:50 -0700

[diff] [blame]

257

if options.no_wait == 'True' and options.retry == 'True':

258

print 'Test retry is not available when using --no_wait=True'

Dan Shi

2014-10-10 13:38:51 -0700

[diff] [blame]

259

# Default to use the test code in CrOS build.

260

if not options.test_source_build and options.build:

261

options.test_source_build = options.build

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

return True

Shuqian Zhao

2015-06-02 11:12:28 -0700

[diff] [blame]

265

def change_options_for_suite_attr(options):

266

"""Change options to be prepared to run the suite_attr_wrapper.

267

268

If specify 'use_suite_attr' from the cmd line, it indicates to run the

269

new style suite control file, suite_attr_wrapper. Then, change the

270

options.suite_name to 'suite_attr_wrapper', change the options.suite_args to

271

include the arguments needed by suite_attr_wrapper.

272

273

@param options: The verified options.

274

275

@returns: The changed options.

276

277

"""

278

# Convert the suite_name to attribute boolean expression.

279

if type(options.name) is str:

280

attr_filter_val = 'suite:%s' % options.name

281

else:

282

attr_filter_val = ' or '.join(['suite:%s' % x for x in options.name])

283

284

# change the suite_args to be a dict of arguments for suite_attr_wrapper

285

# if suite_args is not None, store the values in 'other_args' of the dict

286

args_dict = {}

287

args_dict['attr_filter'] = attr_filter_val

288

options.suite_args = str(args_dict)

289

options.name = 'suite_attr_wrapper'

return options

Chris Masone

2012-02-14 14:18:01 -0800

[diff] [blame]

294

def get_pretty_status(status):

Aviv Keshet

2013-03-21 16:38:31 -0700

[diff] [blame]

295

"""

296

Converts a status string into a pretty-for-printing string.

297

298

@param status: Status to convert.

299

300

@return: Returns pretty string.

GOOD -> [ PASSED ]

TEST_NA -> [ INFO ]

other -> [ FAILED ]

"""

Chris Masone

2012-02-14 14:18:01 -0800

[diff] [blame]

305

if status == 'GOOD':

306

return '[ PASSED ]'

Chris Masone

8906ab1

2012-07-23 15:37:56 -0700

[diff] [blame]

307

elif status == 'TEST_NA':

308

return '[ INFO ]'

Chris Masone

2012-02-14 14:18:01 -0800

[diff] [blame]

309

return '[ FAILED ]'

310

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

311

Shuqian Zhao

2015-09-29 14:19:28 -0700

[diff] [blame]

312

def get_original_suite_name(suite_name, suite_args):

313

"""Get the original suite name when running suite_attr_wrapper.

314

315

@param suite_name: the name of the suite launched in afe. When it is

316

suite_attr_wrapper, the suite that actually running is

317

specified in the suite_args.

318

@param suite_args: the parsed option which contains the original suite name.

319

320

@returns: the original suite name.

321

322

"""

323

if suite_name == 'suite_attr_wrapper':

324

attrs = ast.literal_eval(suite_args).get('attr_filter', '')

325

suite_list = ([x[6:] for x in re.split('[() ]', attrs)

326

if x and x.startswith('suite:')])

327

return suite_list[0] if suite_list else suite_name

return suite_name

Aviv Keshet

2014-10-09 16:33:09 -0700

[diff] [blame]

331

def GetBuildbotStepLink(anchor_text, url):

332

"""Generate a buildbot formatted link.

333

334

@param anchor_text The link text.

335

@param url The url to link to.

336

"""

Ningning Xia

80256e2

2016-04-05 15:52:32 -0700

[diff] [blame]

337

new_anchor_text = anchor_text.replace('@', '-AT-')

338

return '@@@STEP_LINK@%s@%s@@@' % (new_anchor_text, url)

Aviv Keshet

9afee5e

2014-10-09 16:33:09 -0700

[diff] [blame]

339

Chris Masone

2012-02-14 14:18:01 -0800

[diff] [blame]

340

Craig Harrison

2012-08-23 16:48:49 -0700

[diff] [blame]

341

class LogLink(object):

J. Richard Barnette

2013-08-23 11:24:21 -0700

[diff] [blame]

342

"""Information needed to record a link in the logs.

Craig Harrison

2012-08-23 16:48:49 -0700

[diff] [blame]

343

J. Richard Barnette

2013-08-23 11:24:21 -0700

[diff] [blame]

344

Depending on context and the information provided at

345

construction time, the link may point to either to log files for

346

a job, or to a bug filed for a failure in the job.

Craig Harrison

2012-08-23 16:48:49 -0700

[diff] [blame]

347

J. Richard Barnette

2013-08-23 11:24:21 -0700

[diff] [blame]

348

@var anchor The link text.

349

@var url The link url.

350

@var bug_id Id of a bug to link to, or None.

351

"""

352

353

_BUG_URL_PREFIX = CONFIG.get_config_value('BUG_REPORTING',

354

'tracker_url')

J. Richard Barnette

2013-08-23 11:24:21 -0700

[diff] [blame]

355

_URL_PATTERN = CONFIG.get_config_value('CROS',

356

'log_url_pattern', type=str)

357

Kevin Cheng

2bdd372

2016-03-24 21:30:52 -0700

[diff] [blame]

358

# A list of tests that don't get retried so skip the dashboard.

359

_SKIP_RETRY_DASHBOARD = ['provision']

360

Ningning Xia

2016-04-19 14:06:03 -0700

[diff] [blame]

361

_BUG_LINK_PREFIX = 'Auto-Bug'

362

_LOG_LINK_PREFIX = 'Test-Logs'

363

J. Richard Barnette

2013-08-23 11:24:21 -0700

[diff] [blame]

364

Prashanth Balasubramanian

ae43721

2014-10-27 11:17:26 -0700

[diff] [blame]

365

@classmethod

366

def get_bug_link(cls, bug_id):

367

"""Generate a bug link for the given bug_id.

368

369

@param bug_id: The id of the bug.

370

@return: A link, eg: https://crbug.com/<bug_id>.

371

"""

372

return '%s%s' % (cls._BUG_URL_PREFIX, bug_id)

373

374

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

375

def __init__(self, anchor, server, job_string, bug_info=None, reason=None,

Simran Basi

2015-02-03 15:50:18 -0800

[diff] [blame]

376

retry_count=0, testname=None):

J. Richard Barnette

2013-08-23 11:24:21 -0700

[diff] [blame]

377

"""Initialize the LogLink by generating the log URL.

378

379

@param anchor The link text.

Alex Miller

2013-10-30 15:18:57 -0700

[diff] [blame]

380

@param server The hostname of the server this suite ran on.

J. Richard Barnette

2013-08-23 11:24:21 -0700

[diff] [blame]

381

@param job_string The job whose logs we'd like to link to.

382

@param bug_info Info about the bug, if one was filed.

Fang Deng

2014-02-24 17:51:24 -0800

[diff] [blame]

383

@param reason A string representing the reason of failure if any.

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

384

@param retry_count How many times the test has been retried.

Simran Basi

2015-02-03 15:50:18 -0800

[diff] [blame]

385

@param testname Optional Arg that supplies the testname.

Craig Harrison

2012-08-23 16:48:49 -0700

[diff] [blame]

386

"""

387

self.anchor = anchor

Alex Miller

2013-10-30 15:18:57 -0700

[diff] [blame]

388

self.url = self._URL_PATTERN % (server, job_string)

Fang Deng

2014-02-24 17:51:24 -0800

[diff] [blame]

389

self.reason = reason

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

390

self.retry_count = retry_count

Simran Basi

2015-02-03 15:50:18 -0800

[diff] [blame]

391

self.testname = testname

J. Richard Barnette

2013-08-23 11:24:21 -0700

[diff] [blame]

392

if bug_info:

393

self.bug_id, self.bug_count = bug_info

394

else:

395

self.bug_id = None

396

self.bug_count = None

Craig Harrison

2012-08-23 16:48:49 -0700

[diff] [blame]

397

398

Ningning Xia

2016-04-19 14:06:03 -0700

[diff] [blame]

399

def GenerateBuildbotLinks(self):

J. Richard Barnette

2013-08-23 11:24:21 -0700

[diff] [blame]

400

"""Generate a link formatted to meet buildbot expectations.

401

Ningning Xia

2016-04-19 14:06:03 -0700

[diff] [blame]

402

If there is a bug associated with this link, report a link to the bug

403

and a link to the job logs;

J. Richard Barnette

2013-08-23 11:24:21 -0700

[diff] [blame]

404

otherwise report a link to the job logs.

Craig Harrison

2012-08-23 16:48:49 -0700

[diff] [blame]

405

Ningning Xia

2016-04-19 14:06:03 -0700

[diff] [blame]

406

@return A list of links formatted for the buildbot log annotator.

Craig Harrison

2012-08-23 16:48:49 -0700

[diff] [blame]

407

"""

Ningning Xia

2016-04-19 14:06:03 -0700

[diff] [blame]

408

buildbot_links = []

409

bug_info_strings = []

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

410

info_strings = []

Ningning Xia

2016-04-19 14:06:03 -0700

[diff] [blame]

411

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

412

if self.retry_count > 0:

413

info_strings.append('retry_count: %d' % self.retry_count)

Ningning Xia

2016-04-19 14:06:03 -0700

[diff] [blame]

414

bug_info_strings.append('retry_count: %d' % self.retry_count)

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

415

Ningning Xia

2016-04-19 14:06:03 -0700

[diff] [blame]

416

# Add the bug link to buildbot_links

J. Richard Barnette

2013-08-23 11:24:21 -0700

[diff] [blame]

417

if self.bug_id:

Ningning Xia

2016-04-19 14:06:03 -0700

[diff] [blame]

418

bug_url = self.get_bug_link(self.bug_id)

beeps

ad4daf8

2013-09-26 10:07:33 -0700

[diff] [blame]

419

if self.bug_count is None:

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

420

bug_info = 'unknown number of reports'

beeps

ad4daf8

2013-09-26 10:07:33 -0700

[diff] [blame]

421

elif self.bug_count == 1:

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

422

bug_info = 'new report'

J. Richard Barnette

2013-08-23 11:24:21 -0700

[diff] [blame]

423

else:

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

424

bug_info = '%s reports' % self.bug_count

Ningning Xia

2016-04-19 14:06:03 -0700

[diff] [blame]

425

bug_info_strings.append(bug_info)

Fang Deng

2014-02-24 17:51:24 -0800

[diff] [blame]

426

Ningning Xia

2016-04-19 14:06:03 -0700

[diff] [blame]

427

if self.reason:

428

bug_info_strings.append(self.reason.strip())

429

430

bug_anchor_text = self.get_anchor_text(self._BUG_LINK_PREFIX,

431

bug_info_strings)

432

433

buildbot_links.append(GetBuildbotStepLink(bug_anchor_text,

434

bug_url))

435

436

# Add the log link to buildbot_links

Fang Deng

2014-02-24 17:51:24 -0800

[diff] [blame]

437

if self.reason:

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

438

info_strings.append(self.reason.strip())

439

Ningning Xia

2016-04-19 14:06:03 -0700

[diff] [blame]

440

anchor_text = self.get_anchor_text(self._LOG_LINK_PREFIX,

441

info_strings)

442

buildbot_links.append(GetBuildbotStepLink(anchor_text, self.url))

443

444

return buildbot_links

445

446

447

def get_anchor_text(self, prefix, info_strings):

448

"""Generate the anchor_text given the prefix and info.

449

450

@param prefix The prefix of the anchor text.

451

@param info_strings The infos presented in the anchor text.

452

@return A anchor_text with the right prefix and info strings.

453

"""

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

454

if info_strings:

455

info = ', '.join(info_strings)

Ningning Xia

2016-04-19 14:06:03 -0700

[diff] [blame]

456

anchor_text = '[%(prefix)s]: %(anchor)s: %(info)s' % {

457

'prefix': prefix, 'anchor': self.anchor.strip(),

458

'info': info}

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

459

else:

Ningning Xia

2016-04-19 14:06:03 -0700

[diff] [blame]

460

anchor_text = '[%(prefix)s]: %(anchor)s' % {

461

'prefix': prefix, 'anchor': self.anchor.strip()}

462

return anchor_text

Craig Harrison

2012-08-23 16:48:49 -0700

[diff] [blame]

463

Craig Harrison

d845157

2012-08-31 10:29:33 -0700

[diff] [blame]

464

def GenerateTextLink(self):

J. Richard Barnette

2013-08-23 11:24:21 -0700

[diff] [blame]

465

"""Generate a link to the job's logs, for consumption by a human.

Craig Harrison

2012-08-23 16:48:49 -0700

[diff] [blame]

466

Craig Harrison

d845157

2012-08-31 10:29:33 -0700

[diff] [blame]

467

@return A link formatted for human readability.

Craig Harrison

2012-08-23 16:48:49 -0700

[diff] [blame]

468

"""

Fang Deng

2014-02-24 17:51:24 -0800

[diff] [blame]

469

return '%s%s' % (self.anchor, self.url)

Craig Harrison

2012-08-23 16:48:49 -0700

[diff] [blame]

470

471

Simran Basi

2015-02-03 15:50:18 -0800

[diff] [blame]

472

def GenerateWmatrixRetryLink(self):

473

"""Generate a link to the wmatrix retry dashboard.

474

475

@return A link formatted for the buildbot log annotator.

476

"""

477

if not self.testname:

478

return None

479

Kevin Cheng

2bdd372

2016-03-24 21:30:52 -0700

[diff] [blame]

480

if self.testname in self._SKIP_RETRY_DASHBOARD:

481

return None

482

Aviv Keshet

d03c23c

2016-05-09 12:06:11 -0700

[diff] [blame]

483

return GetBuildbotStepLink('[Flake-Dashboard]: %s' % self.testname,

484

WMATRIX_RETRY_URL % self.testname)

Simran Basi

2015-02-03 15:50:18 -0800

[diff] [blame]

485

486

Chris Masone

2012-04-30 14:35:28 -0700

[diff] [blame]

487

class Timings(object):

488

"""Timings for important events during a suite.

489

490

All timestamps are datetime.datetime objects.

491

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

492

@var suite_job_id: the afe job id of the suite job for which

493

we are recording the timing for.

494

@var download_start_time: the time the devserver starts staging

495

the build artifacts. Recorded in create_suite_job.

496

@var payload_end_time: the time when the artifacts only necessary to start

497

installsing images onto DUT's are staged.

498

Recorded in create_suite_job.

499

@var artifact_end_time: the remaining artifacts are downloaded after we kick

500

off the reimaging job, at which point we record

501

artifact_end_time. Recorded in dynamic_suite.py.

Chris Masone

2012-04-30 14:35:28 -0700

[diff] [blame]

502

@var suite_start_time: the time the suite started.

Chris Masone

2012-04-30 14:35:28 -0700

[diff] [blame]

503

@var tests_start_time: the time the first test started running.

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

504

@var tests_end_time: the time the last test finished running.

Chris Masone

2012-04-30 14:35:28 -0700

[diff] [blame]

505

"""

beeps

2013-03-22 13:15:49 -0700

[diff] [blame]

506

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

507

def __init__(self, suite_job_id):

508

self.suite_job_id = suite_job_id

509

# Timings related to staging artifacts on devserver.

510

self.download_start_time = None

511

self.payload_end_time = None

512

self.artifact_end_time = None

beeps

2013-03-22 13:15:49 -0700

[diff] [blame]

513

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

514

# The test_start_time, but taken off the view that corresponds to the

515

# suite instead of an individual test.

516

self.suite_start_time = None

beeps

2013-03-22 13:15:49 -0700

[diff] [blame]

517

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

518

# Earliest and Latest tests in the set of TestViews passed to us.

519

self.tests_start_time = None

520

self.tests_end_time = None

521

Chris Masone

2012-04-30 14:35:28 -0700

[diff] [blame]

522

Chris Masone

d9f13c5

2012-08-29 10:37:08 -0700

[diff] [blame]

523

def RecordTiming(self, view):

524

"""Given a test report view, extract and record pertinent time info.

Chris Masone

2012-04-30 14:35:28 -0700

[diff] [blame]

525

526

get_detailed_test_views() returns a list of entries that provide

527

info about the various parts of a suite run. This method can take

528

any one of these entries and look up timestamp info we might want

529

and record it.

530

Chris Masone

2012-09-06 16:00:07 -0700

[diff] [blame]

531

If timestamps are unavailable, datetime.datetime.min/max will be used.

532

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

533

@param view: A TestView object.

Chris Masone

2012-04-30 14:35:28 -0700

[diff] [blame]

534

"""

Chris Masone

2012-09-06 16:00:07 -0700

[diff] [blame]

535

start_candidate = datetime.min

536

end_candidate = datetime.max

537

if view['test_started_time']:

Dan Shi

2014-08-10 23:38:40 -0700

[diff] [blame]

538

start_candidate = time_utils.time_string_to_datetime(

539

view['test_started_time'])

Chris Masone

2012-09-06 16:00:07 -0700

[diff] [blame]

540

if view['test_finished_time']:

Dan Shi

2014-08-10 23:38:40 -0700

[diff] [blame]

541

end_candidate = time_utils.time_string_to_datetime(

542

view['test_finished_time'])

Chris Masone

2012-09-06 16:00:07 -0700

[diff] [blame]

543

Shuqian Zhao

2016-02-24 11:27:26 -0800

[diff] [blame]

544

if view.get_testname() == TestView.SUITE_JOB:

Chris Masone

2012-04-30 14:35:28 -0700

[diff] [blame]

545

self.suite_start_time = start_candidate

Chris Masone

2012-04-30 14:35:28 -0700

[diff] [blame]

546

else:

547

self._UpdateFirstTestStartTime(start_candidate)

548

self._UpdateLastTestEndTime(end_candidate)

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

549

if view['afe_job_id'] == self.suite_job_id and 'job_keyvals' in view:

Chris Masone

d9f13c5

2012-08-29 10:37:08 -0700

[diff] [blame]

550

keyvals = view['job_keyvals']

Dan Shi

2014-08-10 23:38:40 -0700

[diff] [blame]

551

self.download_start_time = time_utils.time_string_to_datetime(

552

keyvals.get(constants.DOWNLOAD_STARTED_TIME),

553

handle_type_error=True)

beeps

2013-03-22 13:15:49 -0700

[diff] [blame]

554

Dan Shi

2014-08-10 23:38:40 -0700

[diff] [blame]

555

self.payload_end_time = time_utils.time_string_to_datetime(

556

keyvals.get(constants.PAYLOAD_FINISHED_TIME),

557

handle_type_error=True)

beeps

2013-03-22 13:15:49 -0700

[diff] [blame]

558

Dan Shi

2014-08-10 23:38:40 -0700

[diff] [blame]

559

self.artifact_end_time = time_utils.time_string_to_datetime(

560

keyvals.get(constants.ARTIFACT_FINISHED_TIME),

561

handle_type_error=True)

Chris Masone

44e4d6c

2012-08-15 14:25:53 -0700

[diff] [blame]

562

Chris Masone

2012-04-30 14:35:28 -0700

[diff] [blame]

563

564

def _UpdateFirstTestStartTime(self, candidate):

565

"""Update self.tests_start_time, iff candidate is an earlier time.

566

567

@param candidate: a datetime.datetime object.

568

"""

569

if not self.tests_start_time or candidate < self.tests_start_time:

570

self.tests_start_time = candidate

571

572

573

def _UpdateLastTestEndTime(self, candidate):

574

"""Update self.tests_end_time, iff candidate is a later time.

575

576

@param candidate: a datetime.datetime object.

577

"""

578

if not self.tests_end_time or candidate > self.tests_end_time:

579

self.tests_end_time = candidate

def __str__(self):

return ('\n'

'Suite timings:\n'

Chris Masone

2012-05-01 16:52:31 -0700

[diff] [blame]

585

'Downloads started at %s\n'

586

'Payload downloads ended at %s\n'

Chris Masone

2012-04-30 14:35:28 -0700

[diff] [blame]

587

'Suite started at %s\n'

Chris Masone

2012-05-01 16:52:31 -0700

[diff] [blame]

588

'Artifact downloads ended (at latest) at %s\n'

Chris Masone

2012-04-30 14:35:28 -0700

[diff] [blame]

589

'Testing started at %s\n'

Chris Masone

2012-05-01 16:52:31 -0700

[diff] [blame]

590

'Testing ended at %s\n' % (self.download_start_time,

591

self.payload_end_time,

592

self.suite_start_time,

Chris Masone

2012-05-01 16:52:31 -0700

[diff] [blame]

593

self.artifact_end_time,

Chris Masone

2012-04-30 14:35:28 -0700

[diff] [blame]

594

self.tests_start_time,

595

self.tests_end_time))

596

597

beeps

2013-03-22 13:15:49 -0700

[diff] [blame]

598

def SendResultsToStatsd(self, suite, build, board):

599

"""

600

Sends data to statsd.

601

602

1. Makes a data_key of the form: run_suite.$board.$branch.$suite

603

eg: stats/gauges/<hostname>/run_suite/<board>/<branch>/<suite>/

604

2. Computes timings for several start and end event pairs.

Alex Miller

9a1987a

2013-08-21 15:51:16 -0700

[diff] [blame]

605

3. Sends all timing values to statsd.

beeps

2013-03-22 13:15:49 -0700

[diff] [blame]

606

607

@param suite: scheduled suite that we want to record the results of.

608

@param build: the build that this suite ran on.

609

eg: 'lumpy-release/R26-3570.0.0'

610

@param board: the board that this suite ran on.

611

"""

612

if sys.version_info < (2, 7):

613

logging.error('Sending run_suite perf data to statsd requires'

614

'python 2.7 or greater.')

615

return

616

MK Ryu

c9c0c3f

2014-10-27 14:36:01 -0700

[diff] [blame]

617

# Constructs the key used for logging statsd timing data.

618

data_key = utils.get_data_key('run_suite', suite, build, board)

beeps

2013-03-22 13:15:49 -0700

[diff] [blame]

619

620

# Since we don't want to try subtracting corrupted datetime values

Dan Shi

2014-08-10 23:38:40 -0700

[diff] [blame]

621

# we catch TypeErrors in time_utils.time_string_to_datetime and insert

622

# None instead. This means that even if, say,

623

# keyvals.get(constants.ARTIFACT_FINISHED_TIME) returns a corrupt

624

# value the member artifact_end_time is set to None.

beeps

2013-03-22 13:15:49 -0700

[diff] [blame]

625

if self.download_start_time:

626

if self.payload_end_time:

Gabe Black

2015-02-04 23:55:15 -0800

[diff] [blame]

627

autotest_stats.Timer(data_key).send('payload_download_time',

628

(self.payload_end_time -

629

self.download_start_time).total_seconds())

beeps

2013-03-22 13:15:49 -0700

[diff] [blame]

630

631

if self.artifact_end_time:

Gabe Black

2015-02-04 23:55:15 -0800

[diff] [blame]

632

autotest_stats.Timer(data_key).send('artifact_download_time',

633

(self.artifact_end_time -

634

self.download_start_time).total_seconds())

beeps

2013-03-22 13:15:49 -0700

[diff] [blame]

635

636

if self.tests_end_time:

637

if self.suite_start_time:

Gabe Black

2015-02-04 23:55:15 -0800

[diff] [blame]

638

autotest_stats.Timer(data_key).send('suite_run_time',

639

(self.tests_end_time -

640

self.suite_start_time).total_seconds())

beeps

2013-03-22 13:15:49 -0700

[diff] [blame]

641

642

if self.tests_start_time:

Gabe Black

2015-02-04 23:55:15 -0800

[diff] [blame]

643

autotest_stats.Timer(data_key).send('tests_run_time',

644

(self.tests_end_time -

645

self.tests_start_time).total_seconds())

beeps

2013-03-22 13:15:49 -0700

[diff] [blame]

646

beeps

2013-03-22 13:15:49 -0700

[diff] [blame]

647

Alex Miller

2013-10-30 15:18:57 -0700

[diff] [blame]

648

_DEFAULT_AUTOTEST_INSTANCE = CONFIG.get_config_value(

649

'SERVER', 'hostname', type=str)

650

651

652

def instance_for_pool(pool_name):

653

"""

654

Return the hostname of the server that should be used to service a suite

655

for the specified pool.

656

657

@param pool_name: The pool (without 'pool:' to schedule the suite against.

658

@return: The correct host that should be used to service this suite run.

659

"""

660

return CONFIG.get_config_value(

661

'POOL_INSTANCE_SHARDING', pool_name,

662

default=_DEFAULT_AUTOTEST_INSTANCE)

663

664

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

665

class TestView(object):

666

"""Represents a test view and provides a set of helper functions."""

667

668

Shuqian Zhao

2016-02-24 11:27:26 -0800

[diff] [blame]

669

SUITE_JOB = 'Suite job'

Fang Deng

2014-09-12 14:16:11 -0700

[diff] [blame]

670

INFRA_TESTS = ['provision']

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

671

672

Simran Basi

2015-10-14 19:05:00 -0700

[diff] [blame]

673

def __init__(self, view, afe_job, suite_name, build, user,

674

solo_test_run=False):

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

675

"""Init a TestView object representing a tko test view.

676

677

@param view: A dictionary representing a tko test view.

Fang Deng

2014-06-12 18:21:55 -0700

[diff] [blame]

678

@param afe_job: An instance of frontend.afe.models.Job

679

representing the job that kicked off the test.

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

680

@param suite_name: The name of the suite

681

that the test belongs to.

682

@param build: The build for which the test is run.

Simran Basi

2015-10-12 15:36:45 -0700

[diff] [blame]

683

@param user: The user for which the test is run.

Simran Basi

2015-10-14 19:05:00 -0700

[diff] [blame]

684

@param solo_test_run: This is a solo test run not part of a suite.

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

685

"""

686

self.view = view

Fang Deng

2014-06-12 18:21:55 -0700

[diff] [blame]

687

self.afe_job = afe_job

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

688

self.suite_name = suite_name

689

self.build = build

Simran Basi

2015-10-14 19:05:00 -0700

[diff] [blame]

690

self.is_suite_view = afe_job.parent_job is None and not solo_test_run

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

691

# This is the test name that will be shown in the output.

692

self.testname = None

Simran Basi

2015-10-12 15:36:45 -0700

[diff] [blame]

693

self.user = user

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

694

Fang Deng

2014-06-12 18:21:55 -0700

[diff] [blame]

695

# The case that a job was aborted before it got a chance to run

696

# usually indicates suite has timed out (unless aborted by user).

697

# In this case, the abort reason will be None.

698

# Update the reason with proper information.

699

if (self.is_relevant_suite_view() and

Shuqian Zhao

2016-02-24 11:27:26 -0800

[diff] [blame]

700

not self.get_testname() == self.SUITE_JOB and

Fang Deng

2014-06-12 18:21:55 -0700

[diff] [blame]

701

self.view['status'] == 'ABORT' and

702

not self.view['reason']):

703

self.view['reason'] = 'Timed out, did not run.'

704

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

705

706

def __getitem__(self, key):

707

"""Overload __getitem__ so that we can still use []

708

709

@param key: A key of the tko test view.

710

711

@returns: The value of an attribute in the view.

712

713

"""

714

return self.view[key]

715

716

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

717

def __iter__(self):

718

"""Overload __iter__ so that it supports 'in' operator."""

719

return iter(self.view)

720

721

722

def get_testname(self):

723

"""Get test name that should be shown in the output.

724

725

Formalize the test_name we got from the test view.

726

727

Remove 'build/suite' prefix if any. And append 'experimental' prefix

728

for experimental tests if their names do not start with 'experimental'.

729

730

If one runs a test in control file via the following code,

731

job.runtest('my_Test', tag='tag')

732

for most of the cases, view['test_name'] would look like 'my_Test.tag'.

733

If this is the case, this method will just return the original

734

test name, i.e. 'my_Test.tag'.

735

736

There are four special cases.

737

1) A test view is for the suite job's SERVER_JOB.

Shuqian Zhao

2016-02-24 11:27:26 -0800

[diff] [blame]

738

In this case, this method will return 'Suite job'.

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

739

Simran Basi

2015-10-14 19:05:00 -0700

[diff] [blame]

740

2) A test view is of a child job or a solo test run not part of a

741

suite, and for a SERVER_JOB or CLIENT_JOB.

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

742

In this case, we will take the job name, remove the build/suite

743

prefix from the job name, and append the rest to 'SERVER_JOB'

744

or 'CLIENT_JOB' as a prefix. So the names returned by this

745

method will look like:

746

'experimental_Telemetry Smoothness Measurement_SERVER_JOB'

747

'experimental_dummy_Pass_SERVER_JOB'

748

'dummy_Fail_SERVER_JOB'

749

Fang Deng

2014-06-12 18:21:55 -0700

[diff] [blame]

750

3) A test view is of a suite job and its status is ABORT.

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

751

In this case, the view['test_name'] is the child job's name.

752

If it is an experimental test, 'experimental' will be part

753

of the name. For instance,

754

'lumpy-release/R35-5712.0.0/perf_v2/

755

experimental_Telemetry Smoothness Measurement'

756

'lumpy-release/R35-5712.0.0/dummy/experimental_dummy_Pass'

757

'lumpy-release/R35-5712.0.0/dummy/dummy_Fail'

758

The above names will be converted to the following:

759

'experimental_Telemetry Smoothness Measurement'

760

'experimental_dummy_Pass'

761

'dummy_Fail'

762

Fang Deng

2014-06-12 18:21:55 -0700

[diff] [blame]

763

4) A test view's status is of a suite job and its status is TEST_NA.

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

764

In this case, the view['test_name'] is the NAME field of the control

765

file. If it is an experimental test, 'experimental' will part of

766

the name. For instance,

767

'experimental_Telemetry Smoothness Measurement'

768

'experimental_dummy_Pass'

769

'dummy_Fail'

770

This method will not modify these names.

771

772

@returns: Test name after normalization.

773

774

"""

775

if self.testname is not None:

776

return self.testname

777

778

if (self.is_suite_view and

779

self.view['test_name'].startswith('SERVER_JOB')):

Shuqian Zhao

2016-02-24 11:27:26 -0800

[diff] [blame]

780

# Rename suite job's SERVER_JOB to 'Suite job'.

781

self.testname = self.SUITE_JOB

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

782

return self.testname

783

784

if (self.view['test_name'].startswith('SERVER_JOB') or

785

self.view['test_name'].startswith('CLIENT_JOB')):

786

# Append job name as a prefix for SERVER_JOB and CLIENT_JOB

787

testname= '%s_%s' % (self.view['job_name'], self.view['test_name'])

788

else:

789

testname = self.view['test_name']

790

experimental = self.is_experimental()

791

# Remove the build and suite name from testname if any.

792

testname = tools.get_test_name(

793

self.build, self.suite_name, testname)

794

# If an experimental test was aborted, testname

795

# would include the 'experimental' prefix already.

796

prefix = constants.EXPERIMENTAL_PREFIX if (

797

experimental and not

798

testname.startswith(constants.EXPERIMENTAL_PREFIX)) else ''

799

self.testname = prefix + testname

return self.testname

def is_relevant_suite_view(self):

804

"""Checks whether this is a suite view we should care about.

805

806

@returns: True if it is relevant. False otherwise.

807

"""

Shuqian Zhao

2016-02-24 11:27:26 -0800

[diff] [blame]

808

return (self.get_testname() == self.SUITE_JOB or

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

809

(self.is_suite_view and

810

not self.view['test_name'].startswith('CLIENT_JOB') and

811

not self.view['subdir']))

def is_test(self):

"""Return whether the view is for an actual test.

816

817

@returns True if the view is for an actual test.

818

False if the view is for SERVER_JOB or CLIENT_JOB.

819

820

"""

821

return not (self.view['test_name'].startswith('SERVER_JOB') or

822

self.view['test_name'].startswith('CLIENT_JOB'))

def is_retry(self):

"""Check whether the view is for a retry.

827

828

@returns: True, if the view is for a retry; False otherwise.

829

830

"""

831

return self.view['job_keyvals'].get('retry_original_job_id') is not None

832

833

834

def is_experimental(self):

835

"""Check whether a test view is for an experimental test.

836

837

@returns: True if it is for an experimental test, False otherwise.

838

839

"""

840

return (self.view['job_keyvals'].get('experimental') == 'True' or

841

tools.get_test_name(self.build, self.suite_name,

842

self.view['test_name']).startswith('experimental'))

843

844

Fang Deng

2014-06-12 18:21:55 -0700

[diff] [blame]

845

def hit_timeout(self):

846

"""Check whether the corresponding job has hit its own timeout.

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

847

Fang Deng

2014-06-12 18:21:55 -0700

[diff] [blame]

848

Note this method should not be called for those test views

849

that belongs to a suite job and are determined as irrelevant

850

by is_relevant_suite_view. This is because they are associated

851

to the suite job, whose job start/finished time make no sense

852

to an irrelevant test view.

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

853

Fang Deng

2014-06-12 18:21:55 -0700

[diff] [blame]

854

@returns: True if the corresponding afe job has hit timeout.

855

False otherwise.

856

"""

857

if (self.is_relevant_suite_view() and

Shuqian Zhao

2016-02-24 11:27:26 -0800

[diff] [blame]

858

self.get_testname() != self.SUITE_JOB):

859

# Any relevant suite test view except SUITE_JOB

Fang Deng

2014-06-12 18:21:55 -0700

[diff] [blame]

860

# did not hit its own timeout because it was not ever run.

861

return False

862

start = (datetime.strptime(

Dan Shi

2014-08-10 23:38:40 -0700

[diff] [blame]

863

self.view['job_started_time'], time_utils.TIME_FMT)

Fang Deng

2014-06-12 18:21:55 -0700

[diff] [blame]

864

if self.view['job_started_time'] else None)

865

end = (datetime.strptime(

Dan Shi

2014-08-10 23:38:40 -0700

[diff] [blame]

866

self.view['job_finished_time'], time_utils.TIME_FMT)

Fang Deng

2014-06-12 18:21:55 -0700

[diff] [blame]

867

if self.view['job_finished_time'] else None)

868

if not start or not end:

869

return False

870

else:

871

return ((end - start).total_seconds()/60.0

872

> self.afe_job.max_runtime_mins)

873

874

875

def is_aborted(self):

876

"""Check if the view was aborted.

877

Shuqian Zhao

2016-02-24 11:27:26 -0800

[diff] [blame]

878

For suite job and child job test views, we check job keyval

Fang Deng

2014-06-12 18:21:55 -0700

[diff] [blame]

879

'aborted_by' and test status.

880

881

For relevant suite job test views, we only check test status

882

because the suite job keyval won't make sense to individual

883

test views.

884

885

@returns: True if the test was as aborted, False otherwise.

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

886

887

"""

Fang Deng

2014-06-12 18:21:55 -0700

[diff] [blame]

888

889

if (self.is_relevant_suite_view() and

Shuqian Zhao

2016-02-24 11:27:26 -0800

[diff] [blame]

890

self.get_testname() != self.SUITE_JOB):

Fang Deng

2014-06-12 18:21:55 -0700

[diff] [blame]

891

return self.view['status'] == 'ABORT'

892

else:

893

return (bool(self.view['job_keyvals'].get('aborted_by')) and

894

self.view['status'] in ['ABORT', 'RUNNING'])

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

895

896

897

def is_in_fail_status(self):

Fang Deng

2014-09-12 14:16:11 -0700

[diff] [blame]

898

"""Check if the given test's status corresponds to a failure.

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

899

900

@returns: True if the test's status is FAIL or ERROR. False otherwise.

901

902

"""

903

# All the statuses tests can have when they fail.

904

return self.view['status'] in ['FAIL', 'ERROR', 'ABORT']

905

906

Fang Deng

2014-09-12 14:16:11 -0700

[diff] [blame]

907

def is_infra_test(self):

908

"""Check whether this is a test that only lab infra is concerned.

909

910

@returns: True if only lab infra is concerned, False otherwise.

911

912

"""

913

return self.get_testname() in self.INFRA_TESTS

914

915

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

916

def get_buildbot_link_reason(self):

917

"""Generate the buildbot link reason for the test.

918

919

@returns: A string representing the reason.

920

921

"""

922

return ('%s: %s' % (self.view['status'], self.view['reason'])

923

if self.view['reason'] else self.view['status'])

924

925

926

def get_job_id_owner_str(self):

927

"""Generate the job_id_owner string for a test.

928

929

@returns: A string which looks like 135036-username

930

931

"""

Simran Basi

2015-10-12 15:36:45 -0700

[diff] [blame]

932

return '%s-%s' % (self.view['afe_job_id'], self.user)

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

933

934

935

def get_bug_info(self, suite_job_keyvals):

936

"""Get the bug info from suite_job_keyvals.

937

938

If a bug has been filed for the test, its bug info (bug id and counts)

939

will be stored in the suite job's keyvals. This method attempts to

940

retrieve bug info of the test from |suite_job_keyvals|. It will return

941

None if no bug info is found. No need to check bug info if the view is

Shuqian Zhao

2016-02-24 11:27:26 -0800

[diff] [blame]

942

SUITE_JOB.

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

943

944

@param suite_job_keyvals: The job keyval dictionary of the suite job.

945

All the bug info about child jobs are stored in

946

suite job's keyvals.

947

948

@returns: None if there is no bug info, or a pair with the

949

id of the bug, and the count of the number of

950

times the bug has been seen.

951

952

"""

Shuqian Zhao

2016-02-24 11:27:26 -0800

[diff] [blame]

953

if self.get_testname() == self.SUITE_JOB:

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

954

return None

955

if (self.view['test_name'].startswith('SERVER_JOB') or

956

self.view['test_name'].startswith('CLIENT_JOB')):

957

# Append job name as a prefix for SERVER_JOB and CLIENT_JOB

958

testname= '%s_%s' % (self.view['job_name'], self.view['test_name'])

959

else:

960

testname = self.view['test_name']

961

962

return tools.get_test_failure_bug_info(

963

suite_job_keyvals, self.view['afe_job_id'],

testname)

def should_display_buildbot_link(self):

968

"""Check whether a buildbot link should show for this view.

969

Shuqian Zhao

2016-02-24 11:27:26 -0800

[diff] [blame]

970

For suite job view, show buildbot link if it fails.

Fang Deng

2014-06-12 18:21:55 -0700

[diff] [blame]

971

For normal test view,

972

show buildbot link if it is a retry

973

show buildbot link if it hits its own timeout.

974

show buildbot link if it fails. This doesn't

975

include the case where it was aborted but has

976

not hit its own timeout (most likely it was aborted because

977

suite has timed out).

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

978

979

@returns: True if we should show the buildbot link.

980

False otherwise.

981

"""

982

is_bad_status = (self.view['status'] != 'GOOD' and

983

self.view['status'] != 'TEST_NA')

Shuqian Zhao

2016-02-24 11:27:26 -0800

[diff] [blame]

984

if self.get_testname() == self.SUITE_JOB:

Fang Deng

2014-06-12 18:21:55 -0700

[diff] [blame]

return is_bad_status

else:

if self.is_retry():

return True

if is_bad_status:

return not self.is_aborted() or self.hit_timeout()

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

991

992

Shuqian Zhao

2015-08-05 22:56:30 -0700

[diff] [blame]

993

def get_control_file_attributes(self):

994

"""Get the attributes from the control file of the test.

995

996

@returns: A list of test attribute or None.

997

"""

998

control_file = self.afe_job.control_file

999

attributes = None

1000

if control_file:

1001

cd = control_data.parse_control_string(control_file)

1002

attributes = list(cd.attributes)

return attributes

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1006

class ResultCollector(object):

Simran Basi

2015-10-14 19:05:00 -0700

[diff] [blame]

1007

"""Collect test results of a suite or a single test run.

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1008

1009

Once a suite job has finished, use this class to collect test results.

1010

`run` is the core method that is to be called first. Then the caller

1011

could retrieve information like return code, return message, is_aborted,

1012

and timings by accessing the collector's public attributes. And output

1013

the test results and links by calling the 'output_*' methods.

1014

1015

Here is a overview of what `run` method does.

1016

1017

1) Collect the suite job's results from tko_test_view_2.

1018

For the suite job, we only pull test views without a 'subdir'.

1019

A NULL subdir indicates that the test was _not_ executed. This could be

1020

that no child job was scheduled for this test or the child job got

1021

aborted before starts running.

1022

(Note 'SERVER_JOB'/'CLIENT_JOB' are handled specially)

1023

1024

2) Collect the child jobs' results from tko_test_view_2.

1025

For child jobs, we pull all the test views associated with them.

1026

(Note 'SERVER_JOB'/'CLIENT_JOB' are handled speically)

1027

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1028

3) Generate web and buildbot links.

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1029

4) Compute timings of the suite run.

1030

5) Compute the return code based on test results.

1031

1032

@var _instance_server: The hostname of the server that is used

1033

to service the suite.

1034

@var _afe: The afe rpc client.

1035

@var _tko: The tko rpc client.

1036

@var _build: The build for which the suite is run,

1037

e.g. 'lumpy-release/R35-5712.0.0'

MK Ryu

2014-10-21 11:58:09 -0700

[diff] [blame]

1038

@var _board: The target board for which the suite is run,

1039

e.g., 'lumpy', 'link'.

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1040

@var _suite_name: The suite name, e.g. 'bvt', 'dummy'.

1041

@var _suite_job_id: The job id of the suite for which we are going to

1042

collect results.

Shuqian Zhao

2015-09-29 14:19:28 -0700

[diff] [blame]

1043

@var _original_suite_name: The suite name we record timing would be

1044

different from _suite_name when running

1045

suite_attr_wrapper.

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1046

@var _suite_views: A list of TestView objects, representing relevant

1047

test views of the suite job.

1048

@var _child_views: A list of TestView objects, representing test views

1049

of the child jobs.

1050

@var _test_views: A list of TestView objects, representing all test views

1051

from _suite_views and _child_views.

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1052

@var _web_links: A list of web links pointing to the results of jobs.

1053

@var _buildbot_links: A list of buildbot links for non-passing tests.

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1054

@var _max_testname_width: Max width of all test names.

Simran Basi

2015-10-14 19:05:00 -0700

[diff] [blame]

1055

@var _solo_test_run: True if this is a single test run.

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1056

@var return_code: The exit code that should be returned by run_suite.

1057

@var return_message: Any message that should be displayed to explain

1058

the return code.

1059

@var is_aborted: Whether the suite was aborted or not.

1060

True, False or None (aborting status is unknown yet)

1061

@var timings: A Timing object that records the suite's timings.

"""

MK Ryu

2014-10-21 11:58:09 -0700

[diff] [blame]

1066

def __init__(self, instance_server, afe, tko, build, board,

Simran Basi

2015-10-12 15:36:45 -0700

[diff] [blame]

1067

suite_name, suite_job_id, original_suite_name=None,

Simran Basi

2015-10-14 19:05:00 -0700

[diff] [blame]

1068

user=None, solo_test_run=False):

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1069

self._instance_server = instance_server

1070

self._afe = afe

1071

self._tko = tko

1072

self._build = build

MK Ryu

2014-10-21 11:58:09 -0700

[diff] [blame]

1073

self._board = board

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1074

self._suite_name = suite_name

1075

self._suite_job_id = suite_job_id

Shuqian Zhao

2015-09-29 14:19:28 -0700

[diff] [blame]

1076

self._original_suite_name = original_suite_name or suite_name

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1077

self._suite_views = []

1078

self._child_views = []

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1079

self._test_views = []

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1080

self._retry_counts = {}

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1081

self._web_links = []

1082

self._buildbot_links = []

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1083

self._max_testname_width = 0

MK Ryu

2014-10-21 11:58:09 -0700

[diff] [blame]

1084

self._num_child_jobs = 0

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1085

self.return_code = None

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1086

self.return_message = ''

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1087

self.is_aborted = None

1088

self.timings = None

Simran Basi

2015-10-12 15:36:45 -0700

[diff] [blame]

1089

self._user = user or getpass.getuser()

Simran Basi

2015-10-14 19:05:00 -0700

[diff] [blame]

1090

self._solo_test_run = solo_test_run

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1091

1092

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1093

def _fetch_relevant_test_views_of_suite(self):

1094

"""Fetch relevant test views of the suite job.

1095

1096

For the suite job, there will be a test view for SERVER_JOB, and views

1097

for results of its child jobs. For example, assume we've ceated

1098

a suite job (afe_job_id: 40) that runs dummy_Pass, dummy_Fail,

1099

dummy_Pass.bluetooth. Assume dummy_Pass was aborted before running while

1100

dummy_Path.bluetooth got TEST_NA as no duts have bluetooth.

1101

So the suite job's test views would look like

1102

_____________________________________________________________________

1103

1104

10 | 1000 |SERVER_JOB |---- |40 |GOOD

1105

11 | 1000 |dummy_Pass |NULL |40 |ABORT

1106

12 | 1000 |dummy_Fail.Fail |41-onwer/...|40 |FAIL

1107

13 | 1000 |dummy_Fail.Error |42-owner/...|40 |ERROR

1108

14 | 1000 |dummy_Pass.bluetooth|NULL |40 |TEST_NA

1109

1110

For a suite job, we only care about

1111

a) The test view for the suite job's SERVER_JOB

1112

b) The test views for real tests without a subdir. A NULL subdir

1113

indicates that a test didn't get executed.

1114

So, for the above example, we only keep test views whose test_idxs

1115

are 10, 11, 14.

1116

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1117

@returns: A list of TestView objects, representing relevant

1118

test views of the suite job.

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1119

1120

"""

Fang Deng

2014-06-12 18:21:55 -0700

[diff] [blame]

1121

suite_job = self._afe.get_jobs(id=self._suite_job_id)[0]

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1122

views = self._tko.run(call='get_detailed_test_views',

1123

afe_job_id=self._suite_job_id)

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1124

relevant_views = []

1125

for v in views:

Simran Basi

2015-10-14 19:05:00 -0700

[diff] [blame]

1126

v = TestView(v, suite_job, self._suite_name, self._build, self._user,

1127

solo_test_run=self._solo_test_run)

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1128

if v.is_relevant_suite_view():

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1129

relevant_views.append(v)

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1130

return relevant_views

1131

1132

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1133

def _compute_retry_count(self, view):

1134

"""Return how many times the test has been retried.

1135

1136

@param view: A TestView instance.

1137

@returns: An int value indicating the retry count.

1138

1139

"""

1140

old_job = view['job_keyvals'].get('retry_original_job_id')

count = 0

while old_job:

count += 1

views = self._tko.run(

1145

call='get_detailed_test_views', afe_job_id=old_job)

1146

old_job = (views[0]['job_keyvals'].get('retry_original_job_id')

if views else None)

return count

Simran Basi

2015-10-14 19:05:00 -0700

[diff] [blame]

1151

def _fetch_test_views_of_child_jobs(self, jobs=None):

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1152

"""Fetch test views of child jobs.

1153

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1154

@returns: A tuple (child_views, retry_counts)

1155

child_views is list of TestView objects, representing

1156

all valid views. retry_counts is a dictionary that maps

1157

test_idx to retry counts. It only stores retry

1158

counts that are greater than 0.

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1159

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1160

"""

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1161

child_views = []

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1162

retry_counts = {}

Simran Basi

2015-10-14 19:05:00 -0700

[diff] [blame]

1163

child_jobs = jobs or self._afe.get_jobs(parent_job_id=self._suite_job_id)

MK Ryu

2014-10-21 11:58:09 -0700

[diff] [blame]

1164

if child_jobs:

1165

self._num_child_jobs = len(child_jobs)

Fang Deng

2014-06-12 18:21:55 -0700

[diff] [blame]

1166

for job in child_jobs:

Simran Basi

2015-10-12 15:36:45 -0700

[diff] [blame]

1167

views = [TestView(v, job, self._suite_name, self._build, self._user)

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1168

for v in self._tko.run(

Fang Deng

2014-06-12 18:21:55 -0700

[diff] [blame]

1169

call='get_detailed_test_views', afe_job_id=job.id,

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1170

invalid=0)]

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1171

contains_test_failure = any(

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1172

v.is_test() and v['status'] != 'GOOD' for v in views)

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1173

for v in views:

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1174

if (v.is_test() or

1175

v['status'] != 'GOOD' and not contains_test_failure):

1176

# For normal test view, just keep it.

1177

# For SERVER_JOB or CLIENT_JOB, only keep it

1178

# if it fails and no other test failure.

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1179

child_views.append(v)

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1180

retry_count = self._compute_retry_count(v)

1181

if retry_count > 0:

1182

retry_counts[v['test_idx']] = retry_count

1183

return child_views, retry_counts

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1184

1185

1186

def _generate_web_and_buildbot_links(self):

1187

"""Generate web links and buildbot links."""

1188

# TODO(fdeng): If a job was aborted before it reaches Running

1189

# state, we read the test view from the suite job

1190

# and thus this method generates a link pointing to the

1191

# suite job's page for the aborted job. Need a fix.

1192

self._web_links = []

1193

self._buildbot_links = []

1194

# Bug info are stored in the suite job's keyvals.

Simran Basi

2015-10-14 19:05:00 -0700

[diff] [blame]

1195

if self._solo_test_run:

1196

suite_job_keyvals = {}

1197

else:

1198

suite_job_keyvals = self._suite_views[0]['job_keyvals']

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1199

for v in self._test_views:

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1200

retry_count = self._retry_counts.get(v['test_idx'], 0)

1201

bug_info = v.get_bug_info(suite_job_keyvals)

1202

job_id_owner = v.get_job_id_owner_str()

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1203

link = LogLink(

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1204

anchor=v.get_testname().ljust(

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1205

self._max_testname_width),

1206

server=self._instance_server,

1207

job_string=job_id_owner,

Simran Basi

2015-02-03 15:50:18 -0800

[diff] [blame]

1208

bug_info=bug_info, retry_count=retry_count,

1209

testname=v.get_testname())

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1210

self._web_links.append(link)

1211

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1212

if v.should_display_buildbot_link():

1213

link.reason = v.get_buildbot_link_reason()

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1214

self._buildbot_links.append(link)

1215

1216

1217

def _record_timings(self):

1218

"""Record suite timings."""

1219

self.timings = Timings(self._suite_job_id)

1220

for v in self._test_views:

1221

self.timings.RecordTiming(v)

1222

1223

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1224

def _get_return_msg(self, code, tests_passed_after_retry):

1225

"""Return the proper message for a given return code.

1226

1227

@param code: An enum value of RETURN_CODES

1228

@param test_passed_after_retry: True/False, indicating

1229

whether there are test(s) that have passed after retry.

1230

1231

@returns: A string, representing the message.

1232

1233

"""

1234

if code == RETURN_CODES.INFRA_FAILURE:

Fang Deng

2014-09-12 14:16:11 -0700

[diff] [blame]

1235

return 'Suite job failed or provisioning failed.'

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1236

elif code == RETURN_CODES.SUITE_TIMEOUT:

1237

return ('Some test(s) was aborted before running,'

1238

' suite must have timed out.')

1239

elif code == RETURN_CODES.WARNING:

1240

if tests_passed_after_retry:

1241

return 'Some test(s) passed after retry.'

1242

else:

1243

return 'Some test(s) raised a warning.'

1244

elif code == RETURN_CODES.ERROR:

1245

return 'Some test(s) failed.'

else:

return ''

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1250

def _compute_return_code(self):

1251

"""Compute the exit code based on test results."""

1252

code = RETURN_CODES.OK

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1253

tests_passed_after_retry = False

1254

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1255

for v in self._test_views:

Fang Deng

2014-06-12 18:21:55 -0700

[diff] [blame]

1256

# The order of checking each case is important.

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1257

if v.is_experimental():

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1258

continue

Shuqian Zhao

2016-02-24 11:27:26 -0800

[diff] [blame]

1259

if v.get_testname() == TestView.SUITE_JOB:

Fang Deng

2014-06-12 18:21:55 -0700

[diff] [blame]

1260

if v.is_aborted() and v.hit_timeout():

1261

current_code = RETURN_CODES.SUITE_TIMEOUT

1262

elif v.is_in_fail_status():

1263

current_code = RETURN_CODES.INFRA_FAILURE

1264

elif v['status'] == 'WARN':

1265

current_code = RETURN_CODES.WARNING

1266

else:

1267

current_code = RETURN_CODES.OK

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1268

else:

Fang Deng

2014-06-12 18:21:55 -0700

[diff] [blame]

1269

if v.is_aborted() and v.is_relevant_suite_view():

1270

# The test was aborted before started

1271

# This gurantees that the suite has timed out.

1272

current_code = RETURN_CODES.SUITE_TIMEOUT

1273

elif v.is_aborted() and not v.hit_timeout():

1274

# The test was aborted, but

1275

# not due to a timeout. This is most likely

1276

# because the suite has timed out, but may

1277

# also because it was aborted by the user.

1278

# Since suite timing out is determined by checking

Shuqian Zhao

2016-02-24 11:27:26 -0800

[diff] [blame]

1279

# the suite job view, we simply ignore this view here.

Fang Deng

2014-06-12 18:21:55 -0700

[diff] [blame]

1280

current_code = RETURN_CODES.OK

1281

elif v.is_in_fail_status():

1282

# The test job failed.

Fang Deng

2014-09-12 14:16:11 -0700

[diff] [blame]

1283

if v.is_infra_test():

1284

current_code = RETURN_CODES.INFRA_FAILURE

1285

else:

1286

current_code = RETURN_CODES.ERROR

Fang Deng

2014-06-12 18:21:55 -0700

[diff] [blame]

1287

elif v['status'] == 'WARN':

1288

# The test/suite job raised a wanrning.

1289

current_code = RETURN_CODES.WARNING

1290

elif v.is_retry():

1291

# The test is a passing retry.

1292

current_code = RETURN_CODES.WARNING

1293

tests_passed_after_retry = True

1294

else:

1295

current_code = RETURN_CODES.OK

1296

code = get_worse_code(code, current_code)

1297

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1298

self.return_code = code

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1299

self.return_message = self._get_return_msg(

1300

code, tests_passed_after_retry)

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1301

1302

1303

def output_results(self):

1304

"""Output test results, timings and web links."""

1305

# Output test results

1306

for v in self._test_views:

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1307

display_name = v.get_testname().ljust(self._max_testname_width)

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1308

logging.info('%s%s', display_name,

1309

get_pretty_status(v['status']))

1310

if v['status'] != 'GOOD':

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1311

logging.info('%s %s: %s', display_name, v['status'],

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1312

v['reason'])

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1313

if v.is_retry():

1314

retry_count = self._retry_counts.get(v['test_idx'], 0)

1315

logging.info('%s retry_count: %s',

1316

display_name, retry_count)

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1317

# Output suite timings

1318

logging.info(self.timings)

1319

# Output links to test logs

1320

logging.info('\nLinks to test logs:')

1321

for link in self._web_links:

1322

logging.info(link.GenerateTextLink())

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1323

logging.info('\n')

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1324

1325

Shuqian Zhao

2015-08-05 22:56:30 -0700

[diff] [blame]

1326

def get_results_dict(self):

1327

"""Write test results, timings and web links into a dict.

1328

1329

@returns: A dict of results in the format like:

1330

{

1331

'tests': {

1332

'test_1': {'status': 'PASSED', 'attributes': [1,2], ...}

1333

'test_2': {'status': 'FAILED', 'attributes': [1],...}

1334

}

1335

'suite_timings': {

1336

'download_start': '1998-07-17 00:00:00',

1337

'payload_download_end': '1998-07-17 00:00:05',

...

}

}

"""

output_dict = {}

tests_dict = output_dict.setdefault('tests', {})

1344

for v in self._test_views:

1345

test_name = v.get_testname()

1346

test_info = tests_dict.setdefault(test_name, {})

1347

test_info.update({

1348

'status': v['status'],

1349

'attributes': v.get_control_file_attributes() or list(),

1350

'reason': v['reason'],

1351

'retry_count': self._retry_counts.get(v['test_idx'], 0),

1352

})

1353

1354

# Write the links to test logs into the |tests_dict| of |output_dict|.

1355

# For test whose status is not 'GOOD', the link is also buildbot_link.

1356

for link in self._web_links:

1357

test_name = link.anchor.strip()

1358

test_info = tests_dict.get(test_name)

1359

if test_info:

1360

test_info['link_to_logs'] = link.url

1361

# Write the wmatrix link into the dict.

1362

if link in self._buildbot_links and link.testname:

1363

test_info['wmatrix_link'] = WMATRIX_RETRY_URL % link.testname

1364

# Write the bug url into the dict.

1365

if link.bug_id:

1366

test_info['bug_url'] = link.get_bug_link(link.bug_id)

1367

1368

# Write the suite timings into |output_dict|

1369

time_dict = output_dict.setdefault('suite_timings', {})

1370

time_dict.update({

1371

'download_start' : str(self.timings.download_start_time),

1372

'payload_download_end' : str(self.timings.payload_end_time),

1373

'suite_start' : str(self.timings.suite_start_time),

1374

'artifact_download_end' : str(self.timings.artifact_end_time),

1375

'tests_start' : str(self.timings.tests_start_time),

1376

'tests_end' : str(self.timings.tests_end_time),

1377

})

1378

1379

output_dict['suite_job_id'] = self._suite_job_id

return output_dict

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1384

def output_buildbot_links(self):

1385

"""Output buildbot links."""

1386

for link in self._buildbot_links:

Ningning Xia

2016-04-19 14:06:03 -0700

[diff] [blame]

1387

for generate_link in link.GenerateBuildbotLinks():

1388

logging.info(generate_link)

Simran Basi

2015-02-03 15:50:18 -0800

[diff] [blame]

1389

wmatrix_link = link.GenerateWmatrixRetryLink()

1390

if wmatrix_link:

1391

logging.info(wmatrix_link)

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

def run(self):

"""Collect test results.

1396

1397

This method goes through the following steps:

1398

Fetch relevent test views of the suite job.

1399

Fetch test views of child jobs

1400

Check whether the suite was aborted.

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1401

Generate links.

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1402

Calculate suite timings.

1403

Compute return code based on the test result.

1404

1405

"""

Simran Basi

2015-10-14 19:05:00 -0700

[diff] [blame]

1406

if self._solo_test_run:

1407

self._test_views, self.retry_count = (

1408

self._fetch_test_views_of_child_jobs(

1409

jobs=self._afe.get_jobs(id=self._suite_job_id)))

1410

else:

1411

self._suite_views = self._fetch_relevant_test_views_of_suite()

1412

self._child_views, self._retry_counts = (

1413

self._fetch_test_views_of_child_jobs())

1414

self._test_views = self._suite_views + self._child_views

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1415

# For hostless job in Starting status, there is no test view associated.

1416

# This can happen when a suite job in Starting status is aborted. When

1417

# the scheduler hits some limit, e.g., max_hostless_jobs_per_drone,

1418

# max_jobs_started_per_cycle, a suite job can stays in Starting status.

1419

if not self._test_views:

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1420

self.return_code = RETURN_CODES.INFRA_FAILURE

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1421

self.return_message = 'No test view was found.'

1422

return

1423

self.is_aborted = any([view['job_keyvals'].get('aborted_by')

1424

for view in self._suite_views])

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1425

self._max_testname_width = max(

1426

[len(v.get_testname()) for v in self._test_views]) + 3

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1427

self._generate_web_and_buildbot_links()

1428

self._record_timings()

1429

self._compute_return_code()

1430

1431

MK Ryu

2014-10-21 11:58:09 -0700

[diff] [blame]

1432

def gather_timing_stats(self):

1433

"""Collect timing related statistics."""

1434

# Send timings to statsd.

1435

self.timings.SendResultsToStatsd(

Shuqian Zhao

2015-09-29 14:19:28 -0700

[diff] [blame]

1436

self._original_suite_name, self._build, self._board)

MK Ryu

2014-10-21 11:58:09 -0700

[diff] [blame]

1437

1438

# Record suite runtime in metadata db.

Prathmesh Prabhu

a3713a0

2015-03-11 13:50:55 -0700

[diff] [blame]

1439

# Some failure modes can leave times unassigned, report sentinel value

1440

# in that case.

1441

runtime_in_secs = -1

1442

if (self.timings.tests_end_time is not None and

1443

self.timings.suite_start_time is not None):

Dan Shi

2015-06-24 10:52:38 -0700

[diff] [blame]

1444

runtime_in_secs = (self.timings.tests_end_time -

1445

self.timings.suite_start_time).total_seconds()

Prathmesh Prabhu

a3713a0

2015-03-11 13:50:55 -0700

[diff] [blame]

1446

MK Ryu

2014-10-21 11:58:09 -0700

[diff] [blame]

1447

job_overhead.record_suite_runtime(self._suite_job_id, self._suite_name,

1448

self._board, self._build, self._num_child_jobs, runtime_in_secs)

1449

1450

Prashanth B

2014-05-08 18:01:27 -0700

[diff] [blame]

1451

@retry.retry(error.StageControlFileFailure, timeout_min=10)

1452

def create_suite(afe, options):

1453

"""Create a suite with retries.

1454

1455

@param afe: The afe object to insert the new suite job into.

1456

@param options: The options to use in creating the suite.

1457

1458

@return: The afe_job_id of the new suite job.

1459

"""

Dan Shi

2014-10-10 13:38:51 -0700

[diff] [blame]

1460

builds = {}

1461

if options.build:

Simran Basi

5ace6f2

2016-01-06 17:30:44 -0800

[diff] [blame]

1462

if re.match(ANDROID_BUILD_REGEX, options.build):

1463

builds[provision.ANDROID_BUILD_VERSION_PREFIX] = options.build

1464

else:

1465

builds[provision.CROS_VERSION_PREFIX] = options.build

Dan Shi

2015-06-24 10:52:38 -0700

[diff] [blame]

1466

if options.firmware_rw_build:

1467

builds[provision.FW_RW_VERSION_PREFIX] = options.firmware_rw_build

Dan Shi

2014-10-10 13:38:51 -0700

[diff] [blame]

1468

if options.firmware_ro_build:

1469

builds[provision.FW_RO_VERSION_PREFIX] = options.firmware_ro_build

Prashanth B

2014-05-08 18:01:27 -0700

[diff] [blame]

1470

wait = options.no_wait == 'False'

1471

file_bugs = options.file_bugs == 'True'

1472

retry = options.retry == 'True'

Simran Basi

1e10e92

2015-04-16 15:09:56 -0700

[diff] [blame]

1473

offload_failures_only = options.offload_failures_only == 'True'

Prashanth B

2014-05-08 18:01:27 -0700

[diff] [blame]

1474

try:

1475

priority = int(options.priority)

1476

except ValueError:

1477

try:

1478

priority = priorities.Priority.get_value(options.priority)

1479

except AttributeError:

1480

print 'Unknown priority level %s. Try one of %s.' % (

1481

options.priority, ', '.join(priorities.Priority.names))

1482

raise

1483

logging.info('%s Submitted create_suite_job rpc',

1484

diagnosis_utils.JobTimer.format_time(datetime.now()))

Dan Shi

2016-02-22 12:06:37 -0800

[diff] [blame]

1485

# Adjust timeout based on the delay_minutes setting.

1486

timeout_mins = options.timeout_mins + options.delay_minutes

1487

max_runtime_mins = options.max_runtime_mins + options.delay_minutes

Prashanth B

2014-05-08 18:01:27 -0700

[diff] [blame]

1488

return afe.run('create_suite_job', name=options.name,

1489

board=options.board, build=options.build,

Dan Shi

2014-10-10 13:38:51 -0700

[diff] [blame]

1490

builds=builds, test_source_build=options.test_source_build,

Prashanth B

2014-05-08 18:01:27 -0700

[diff] [blame]

1491

check_hosts=wait, pool=options.pool,

1492

num=options.num,

1493

file_bugs=file_bugs, priority=priority,

1494

suite_args=options.suite_args,

1495

wait_for_results=wait,

Dan Shi

2016-02-22 12:06:37 -0800

[diff] [blame]

1496

timeout_mins=timeout_mins, max_runtime_mins=max_runtime_mins,

Fang Deng

443f195

2015-01-02 14:51:49 -0800

[diff] [blame]

1497

job_retry=retry, max_retries=options.max_retries,

Simran Basi

1e10e92

2015-04-16 15:09:56 -0700

[diff] [blame]

1498

suite_min_duts=options.suite_min_duts,

Simran Basi

5ace6f2

2016-01-06 17:30:44 -0800

[diff] [blame]

1499

offload_failures_only=offload_failures_only,

Dan Shi

2016-02-22 12:06:37 -0800

[diff] [blame]

1500

run_prod_code=options.run_prod_code,

1501

delay_minutes=options.delay_minutes)

Prashanth B

2014-05-08 18:01:27 -0700

[diff] [blame]

1502

1503

Shuqian Zhao

2015-08-05 22:56:30 -0700

[diff] [blame]

1504

def main_without_exception_handling(options):

Aviv Keshet

2013-03-21 16:38:31 -0700

[diff] [blame]

1505

"""

Shuqian Zhao

2015-08-05 22:56:30 -0700

[diff] [blame]

1506

run_suite script without exception handling.

Shuqian Zhao

d235107

2015-08-06 01:48:23 +0000

[diff] [blame]

1507

Shuqian Zhao

2015-08-05 22:56:30 -0700

[diff] [blame]

1508

@param options: The parsed options.

1509

1510

@returns: A tuple contains the return_code of run_suite and the dictionary

1511

of the output.

1512

1513

"""

Shuqian Zhao

ab1bedc

2015-06-02 11:12:28 -0700

[diff] [blame]

1514

# If indicate to use the new style suite control file, convert the args

1515

if options.use_suite_attr:

1516

options = change_options_for_suite_attr(options)

1517

Chris Masone

3a85064

2012-07-11 11:11:18 -0700

[diff] [blame]

1518

log_name = 'run_suite-default.log'

Fang Deng

2015-02-20 14:49:47 -0800

[diff] [blame]

1519

if options.build:

Chris Masone

3a85064

2012-07-11 11:11:18 -0700

[diff] [blame]

1520

# convert build name from containing / to containing only _

1521

log_name = 'run_suite-%s.log' % options.build.replace('/', '_')

1522

log_dir = os.path.join(common.autotest_dir, 'logs')

1523

if os.path.exists(log_dir):

1524

log_name = os.path.join(log_dir, log_name)

Alex Miller

88762a8

2013-09-04 15:41:28 -0700

[diff] [blame]

1525

MK Ryu

8318435

2014-12-10 14:59:40 -0800

[diff] [blame]

1526

utils.setup_logging(logfile=log_name)

Alex Miller

88762a8

2013-09-04 15:41:28 -0700

[diff] [blame]

1527

Fang Deng

2014-09-25 10:18:48 -0700

[diff] [blame]

1528

if not options.bypass_labstatus:

1529

utils.check_lab_status(options.build)

Prashanth Balasubramanian

673016d

2014-11-04 10:40:48 -0800

[diff] [blame]

1530

instance_server = (options.web if options.web else

1531

instance_for_pool(options.pool))

Alex Miller

2013-10-30 15:18:57 -0700

[diff] [blame]

1532

afe = frontend_wrappers.RetryingAFE(server=instance_server,

Simran Basi

25effe3

2013-11-26 13:02:11 -0800

[diff] [blame]

1533

timeout_min=options.afe_timeout_mins,

Chris Masone

8ac6671

2012-02-15 14:21:02 -0800

[diff] [blame]

1534

delay_sec=options.delay_sec)

Alex Miller

2013-10-30 15:18:57 -0700

[diff] [blame]

1535

logging.info('Autotest instance: %s', instance_server)

Chris Masone

359c0fd

2012-03-13 15:18:59 -0700

[diff] [blame]

1536

Dan Shi

20952c1

2014-05-14 17:07:38 -0700

[diff] [blame]

1537

rpc_helper = diagnosis_utils.RPCHelper(afe)

Fang Deng

2015-02-20 14:49:47 -0800

[diff] [blame]

1538

is_real_time = True

Chris Masone

986459e

2012-04-11 11:36:48 -0700

[diff] [blame]

1539

if options.mock_job_id:

1540

job_id = int(options.mock_job_id)

Fang Deng

2015-02-20 14:49:47 -0800

[diff] [blame]

1541

existing_job = afe.get_jobs(id=job_id, finished=True)

if existing_job:

is_real_time = False

else:

existing_job = afe.get_jobs(id=job_id)

1546

if existing_job:

1547

job_created_on = time_utils.date_string_to_epoch_time(

1548

existing_job[0].created_on)

1549

else:

1550

raise utils.TestLabException('Failed to retrieve job: %d' % job_id)

Chris Masone

986459e

2012-04-11 11:36:48 -0700

[diff] [blame]

1551

else:

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1552

try:

Fang Deng

2015-02-20 14:49:47 -0800

[diff] [blame]

1553

rpc_helper.check_dut_availability(options.board, options.pool,

Ningning Xia

f2c206c

2016-04-13 14:15:51 -0700

[diff] [blame]

1554

options.minimum_duts,

1555

options.skip_duts_check)

Prashanth B

2014-05-08 18:01:27 -0700

[diff] [blame]

1556

job_id = create_suite(afe, options)

Fang Deng

2015-02-20 14:49:47 -0800

[diff] [blame]

1557

job_created_on = time.time()

1558

except diagnosis_utils.NotEnoughDutsError:

1559

logging.info(GetBuildbotStepLink(

1560

'Pool Health Bug', LogLink.get_bug_link(rpc_helper.bug)))

1561

raise

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1562

except (error.CrosDynamicSuiteException,

1563

error.RPCException, proxy.JSONRPCException) as e:

1564

logging.warning('Error Message: %s', e)

Shuqian Zhao

2015-08-05 22:56:30 -0700

[diff] [blame]

1565

return (RETURN_CODES.INFRA_FAILURE, {'return_message': e})

Prashanth B

2014-05-08 18:01:27 -0700

[diff] [blame]

1566

except AttributeError:

Shuqian Zhao

2015-08-05 22:56:30 -0700

[diff] [blame]

1567

return (RETURN_CODES.INVALID_OPTIONS, {})

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1568

Prashanth B

2014-03-14 12:36:29 -0700

[diff] [blame]

1569

job_timer = diagnosis_utils.JobTimer(

Fang Deng

2015-02-20 14:49:47 -0800

[diff] [blame]

1570

job_created_on, float(options.timeout_mins))

Aviv Keshet

9afee5e

2014-10-09 16:33:09 -0700

[diff] [blame]

1571

job_url = reporting_utils.link_job(job_id,

1572

instance_server=instance_server)

Prashanth B

2014-03-14 12:36:29 -0700

[diff] [blame]

1573

logging.info('%s Created suite job: %s',

1574

job_timer.format_time(job_timer.job_created_time),

Aviv Keshet

9afee5e

2014-10-09 16:33:09 -0700

[diff] [blame]

1575

job_url)

Aviv Keshet

2015-04-10 19:09:58 -0700

[diff] [blame]

1576

# TODO(akeshet): Move this link-printing to chromite.

Aviv Keshet

20bae47

2016-03-15 12:28:18 -0700

[diff] [blame]

1577

logging.info(GetBuildbotStepLink('Link to suite', job_url))

Aviv Keshet

2015-04-10 19:09:58 -0700

[diff] [blame]

1578

1579

if options.create_and_return:

Shuqian Zhao

2015-08-05 22:56:30 -0700

[diff] [blame]

1580

msg = '--create_and_return was specified, terminating now.'

1581

logging.info(msg)

1582

return (RETURN_CODES.OK, {'return_message':msg})

Aviv Keshet

2015-04-10 19:09:58 -0700

[diff] [blame]

1583

Alex Miller

2013-10-30 15:18:57 -0700

[diff] [blame]

1584

TKO = frontend_wrappers.RetryingTKO(server=instance_server,

Simran Basi

25effe3

2013-11-26 13:02:11 -0800

[diff] [blame]

1585

timeout_min=options.afe_timeout_mins,

Chris Masone

8ac6671

2012-02-15 14:21:02 -0800

[diff] [blame]

1586

delay_sec=options.delay_sec)

Aviv Keshet

2013-03-21 16:38:31 -0700

[diff] [blame]

1587

code = RETURN_CODES.OK

Prashanth B

2014-05-08 18:01:27 -0700

[diff] [blame]

1588

wait = options.no_wait == 'False'

Shuqian Zhao

2015-08-05 22:56:30 -0700

[diff] [blame]

1589

output_dict = {}

J. Richard Barnette

712eb40

2013-08-13 18:03:00 -0700

[diff] [blame]

1590

if wait:

1591

while not afe.get_jobs(id=job_id, finished=True):

Prashanth B

2014-03-14 12:36:29 -0700

[diff] [blame]

1592

# Note that this call logs output, preventing buildbot's

1593

# 9000 second silent timeout from kicking in. Let there be no

1594

# doubt, this is a hack. The timeout is from upstream buildbot and

1595

# this is the easiest work around.

1596

if job_timer.first_past_halftime():

MK Ryu

4790eec

2014-07-31 11:39:02 -0700

[diff] [blame]

1597

rpc_helper.diagnose_job(job_id, instance_server)

Prashanth B

a7be207

2014-07-15 15:03:21 -0700

[diff] [blame]

1598

if job_timer.debug_output_timer.poll():

1599

logging.info('The suite job has another %s till timeout.',

Prashanth B

2014-03-14 12:36:29 -0700

[diff] [blame]

1600

job_timer.timeout_hours - job_timer.elapsed_time())

Alex Miller

764227d

2013-11-15 10:28:56 -0800

[diff] [blame]

1601

time.sleep(10)

Fang Deng

2014-06-12 18:21:55 -0700

[diff] [blame]

1602

# For most cases, ResultCollector should be able to determine whether

1603

# a suite has timed out by checking information in the test view.

1604

# However, occationally tko parser may fail on parsing the

1605

# job_finished time from the job's keyval file. So we add another

1606

# layer of timeout check in run_suite. We do the check right after

1607

# the suite finishes to make it as accurate as possible.

1608

# There is a minor race condition here where we might have aborted

1609

# for some reason other than a timeout, and the job_timer thinks

1610

# it's a timeout because of the jitter in waiting for results.

1611

# The consequence would be that run_suite exits with code

1612

# SUITE_TIMEOUT while it should have returned INFRA_FAILURE

1613

# instead, which should happen very rarely.

1614

# Note the timeout will have no sense when using -m option.

1615

is_suite_timeout = job_timer.is_suite_timeout()

J. Richard Barnette

712eb40

2013-08-13 18:03:00 -0700

[diff] [blame]

1616

Shuqian Zhao

2015-09-29 14:19:28 -0700

[diff] [blame]

1617

# Extract the original suite name to record timing.

1618

original_suite_name = get_original_suite_name(options.name,

1619

options.suite_args)

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1620

# Start collecting test results.

1621

collector = ResultCollector(instance_server=instance_server,

1622

afe=afe, tko=TKO, build=options.build,

MK Ryu

2014-10-21 11:58:09 -0700

[diff] [blame]

1623

board=options.board,

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1624

suite_name=options.name,

Shuqian Zhao

2015-09-29 14:19:28 -0700

[diff] [blame]

1625

suite_job_id=job_id,

1626

original_suite_name=original_suite_name)

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1627

collector.run()

Shuqian Zhao

2015-08-05 22:56:30 -0700

[diff] [blame]

1628

# Dump test outputs into json.

1629

output_dict = collector.get_results_dict()

1630

output_dict['autotest_instance'] = instance_server

1631

if not options.json_dump:

1632

collector.output_results()

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1633

code = collector.return_code

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1634

return_message = collector.return_message

Fang Deng

2015-02-20 14:49:47 -0800

[diff] [blame]

1635

if is_real_time:

MK Ryu

2014-10-21 11:58:09 -0700

[diff] [blame]

1636

# Do not record stats if the suite was aborted (either by a user

1637

# or through the golo rpc).

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1638

# Also do not record stats if is_aborted is None, indicating

1639

# aborting status is unknown yet.

1640

if collector.is_aborted == False:

MK Ryu

2014-10-21 11:58:09 -0700

[diff] [blame]

1641

collector.gather_timing_stats()

Fang Deng

2015-02-20 14:49:47 -0800

[diff] [blame]

1642

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1643

if collector.is_aborted == True and is_suite_timeout:

1644

# There are two possible cases when a suite times out.

1645

# 1. the suite job was aborted due to timing out

1646

# 2. the suite job succeeded, but some child jobs

1647

# were already aborted before the suite job exited.

1648

# The case 2 was handled by ResultCollector,

1649

# here we handle case 1.

1650

old_code = code

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1651

code = get_worse_code(

1652

code, RETURN_CODES.SUITE_TIMEOUT)

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1653

if old_code != code:

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1654

return_message = 'Suite job timed out.'

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1655

logging.info('Upgrade return code from %s to %s '

1656

'because suite job has timed out.',

1657

RETURN_CODES.get_string(old_code),

1658

RETURN_CODES.get_string(code))

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1659

if is_suite_timeout:

1660

logging.info('\nAttempting to diagnose pool: %s', options.pool)

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1661

try:

1662

# Add some jitter to make up for any latency in

1663

# aborting the suite or checking for results.

1664

cutoff = (job_timer.timeout_hours +

1665

datetime_base.timedelta(hours=0.3))

1666

rpc_helper.diagnose_pool(

1667

options.board, options.pool, cutoff)

1668

except proxy.JSONRPCException as e:

1669

logging.warning('Unable to diagnose suite abort.')

1670

1671

# And output return message.

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1672

if return_message:

1673

logging.info('Reason: %s', return_message)

Shuqian Zhao

2015-08-05 22:56:30 -0700

[diff] [blame]

1674

output_dict['return_message'] = return_message

Prashanth B

2014-03-14 12:36:29 -0700

[diff] [blame]

1675

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1676

logging.info('\nOutput below this line is for buildbot consumption:')

1677

collector.output_buildbot_links()

Chris Masone

d5939fe

2012-03-13 10:11:06 -0700

[diff] [blame]

1678

else:

Scott Zawalski

94457b7

2012-07-02 18:45:07 -0400

[diff] [blame]

1679

logging.info('Created suite job: %r', job_id)

Alex Miller

a05498f

2013-11-01 16:16:21 -0700

[diff] [blame]

1680

link = LogLink(options.name, instance_server,

1681

'%s-%s' % (job_id, getpass.getuser()))

Ningning Xia

2016-04-19 14:06:03 -0700

[diff] [blame]

1682

for generate_link in link.GenerateBuildbotLinks():

1683

logging.info(generate_link)

Shuqian Zhao

2015-08-05 22:56:30 -0700

[diff] [blame]

1684

output_dict['return_message'] = '--no_wait specified; Exiting.'

Scott Zawalski

94457b7

2012-07-02 18:45:07 -0400

[diff] [blame]

1685

logging.info('--no_wait specified; Exiting.')

Shuqian Zhao

2015-08-05 22:56:30 -0700

[diff] [blame]

1686

return (code, output_dict)

Chris Masone

2012-02-14 14:18:01 -0800

[diff] [blame]

1687

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1688

Fang Deng

2014-09-18 17:52:06 -0700

[diff] [blame]

1689

def main():

1690

"""Entry point."""

Simran Basi

9f364a6

2015-12-07 14:15:19 -0800

[diff] [blame]

1691

utils.verify_not_root_user()

Fang Deng

2014-09-25 10:18:48 -0700

[diff] [blame]

1692

code = RETURN_CODES.OK

Shuqian Zhao

2015-08-05 22:56:30 -0700

[diff] [blame]

1693

output_dict = {}

1694

Fang Deng

2014-09-18 17:52:06 -0700

[diff] [blame]

1695

try:

Shuqian Zhao

2015-08-05 22:56:30 -0700

[diff] [blame]

1696

parser, options, args = parse_options()

1697

# Silence the log when dumping outputs into json

1698

if options.json_dump:

1699

logging.disable(logging.CRITICAL)

1700

1701

if not verify_options_and_args(options, args):

1702

parser.print_help()

1703

code = RETURN_CODES.INVALID_OPTIONS

1704

else:

1705

(code, output_dict) = main_without_exception_handling(options)

Shuqian Zhao

ade6e7d

2015-12-07 18:01:11 -0800

[diff] [blame]

1706

except diagnosis_utils.BoardNotAvailableError as e:

1707

output_dict['return_message'] = 'Skipping testing: %s' % e.message

1708

code = RETURN_CODES.BOARD_NOT_AVAILABLE

1709

logging.info(output_dict['return_message'])

1710

except utils.TestLabException as e:

1711

output_dict['return_message'] = 'TestLabException: %s' % e

1712

code = RETURN_CODES.INFRA_FAILURE

1713

logging.exception(output_dict['return_message'])

Fang Deng

2014-09-18 17:52:06 -0700

[diff] [blame]

1714

except Exception as e:

Shuqian Zhao

ade6e7d

2015-12-07 18:01:11 -0800

[diff] [blame]

1715

output_dict['return_message'] = 'Unhandled run_suite exception: %s' % e

1716

code = RETURN_CODES.INFRA_FAILURE

1717

logging.exception(output_dict['return_message'])

Shuqian Zhao

2015-08-05 22:56:30 -0700

[diff] [blame]

1718

1719

# Dump test outputs into json.

1720

output_dict['return_code'] = code

1721

output_json = json.dumps(output_dict, sort_keys=True)

1722

if options.json_dump:

Shuqian Zhao

74ca35d

2015-11-25 14:33:50 -0800

[diff] [blame]

1723

output_json_marked = '#JSON_START#%s#JSON_END#' % output_json.strip()

1724

sys.stdout.write(output_json_marked)

Fang Deng

2014-09-25 10:18:48 -0700

[diff] [blame]

1725

1726

logging.info('Will return from run_suite with status: %s',

1727

RETURN_CODES.get_string(code))

Gabe Black

2015-02-04 23:55:15 -0800

[diff] [blame]

1728

autotest_stats.Counter('run_suite.%s' %

1729

RETURN_CODES.get_string(code)).increment()

Fang Deng

2014-09-25 10:18:48 -0700

[diff] [blame]

1730

return code

Fang Deng

2014-09-18 17:52:06 -0700

[diff] [blame]

1731

1732

Chris Masone