Blame - site_utils/run_suite.py - platform/external/autotest

2012-02-14 14:18:01 -0800

[diff] [blame]

#!/usr/bin/python

#

# Use of this source code is governed by a BSD-style license that can be

5

# found in the LICENSE file.

6

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

7

Chris Masone

2012-02-14 14:18:01 -0800

[diff] [blame]

8

"""Tool for running suites of tests and waiting for completion.

9

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

10

The desired test suite will be scheduled with autotest. By default,

Chris Masone

2012-02-14 14:18:01 -0800

[diff] [blame]

11

this tool will block until the job is complete, printing a summary

12

at the end. Error conditions result in exceptions.

13

14

This is intended for use only with Chrome OS test suits that leverage the

15

dynamic suite infrastructure in server/cros/dynamic_suite.py.

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

16

17

This script exits with one of the following codes:

18

0 - OK: Suite finished successfully

19

1 - ERROR: Test(s) failed, or hits its own timeout

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

20

2 - WARNING: Test(s) raised a warning or passed on retry, none failed/timed out.

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

21

3 - INFRA_FAILURE: Infrastructure related issues, e.g.

22

* Lab is down

23

* Too many duts (defined as a constant) in repair failed status

24

* Suite job issues, like bug in dynamic suite,

25

user aborted the suite, lose a drone/all devservers/rpc server,

26

0 tests ran, etc.

Fang Deng

2014-09-12 14:16:11 -0700

[diff] [blame]

27

* provision failed

28

TODO(fdeng): crbug.com/413918, reexamine treating all provision

29

failures as INFRA failures.

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

30

4 - SUITE_TIMEOUT: Suite timed out, some tests ran,

31

none failed by the time the suite job was aborted. This will cover,

32

but not limited to, the following cases:

33

* A devserver failure that manifests as a timeout

34

* No DUTs available midway through a suite

35

* Provision/Reset/Cleanup took longer time than expected for new image

36

* A regression in scheduler tick time.

Fang Deng

2014-09-25 10:18:48 -0700

[diff] [blame]

37

5- BOARD_NOT_AVAILABLE: If there is no host for the requested board/pool.

38

6- INVALID_OPTIONS: If options are not valid.

Chris Masone

2012-02-14 14:18:01 -0800

[diff] [blame]

39

"""

40

Allen Li

2016-09-14 14:44:59 -0700

[diff] [blame]

41

import argparse

42

import ast

Allen Li

2016-08-16 14:19:08 -0700

[diff] [blame]

43

from collections import namedtuple

Chris Masone

2012-09-06 16:00:07 -0700

[diff] [blame]

44

from datetime import datetime

Allen Li

2016-09-14 14:44:59 -0700

[diff] [blame]

45

from datetime import timedelta

import getpass

import json

import logging

import os

import re

import sys

import time

Chris Masone

2012-09-06 16:00:07 -0700

[diff] [blame]

53

Chris Masone

2012-02-14 14:18:01 -0800

[diff] [blame]

54

import common

Allen Li

2016-09-14 15:19:20 -0700

[diff] [blame]

55

from chromite.lib import buildbot_annotations as annotations

56

Shuqian Zhao

2015-08-05 22:56:30 -0700

[diff] [blame]

57

from autotest_lib.client.common_lib import control_data

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

58

from autotest_lib.client.common_lib import error

J. Richard Barnette

3cbd76b

2013-11-27 12:11:25 -0800

[diff] [blame]

59

from autotest_lib.client.common_lib import global_config, enum

60

from autotest_lib.client.common_lib import priorities

Dan Shi

2014-08-10 23:38:40 -0700

[diff] [blame]

61

from autotest_lib.client.common_lib import time_utils

Prashanth B

2014-05-08 18:01:27 -0700

[diff] [blame]

62

from autotest_lib.client.common_lib.cros import retry

Prashanth B

2014-03-14 12:36:29 -0700

[diff] [blame]

63

from autotest_lib.frontend.afe.json_rpc import proxy

xixuan

2017-06-29 15:40:19 -0700

[diff] [blame]

64

from autotest_lib.server import site_utils

J. Richard Barnette

3cbd76b

2013-11-27 12:11:25 -0800

[diff] [blame]

65

from autotest_lib.server import utils

Dan Shi

2014-10-10 13:38:51 -0700

[diff] [blame]

66

from autotest_lib.server.cros import provision

Chris Masone

44e4d6c

2012-08-15 14:25:53 -0700

[diff] [blame]

67

from autotest_lib.server.cros.dynamic_suite import constants

Chris Masone

b493555

2012-08-14 12:05:54 -0700

[diff] [blame]

68

from autotest_lib.server.cros.dynamic_suite import frontend_wrappers

Allen Li

2016-08-18 12:09:32 -0700

[diff] [blame]

69

from autotest_lib.server.cros.dynamic_suite import reporting

Prashanth B

2014-03-14 12:36:29 -0700

[diff] [blame]

70

from autotest_lib.server.cros.dynamic_suite import reporting_utils

J. Richard Barnette

e7b98bb

2013-08-21 16:34:16 -0700

[diff] [blame]

71

from autotest_lib.server.cros.dynamic_suite import tools

Prashanth B

2014-03-14 12:36:29 -0700

[diff] [blame]

72

from autotest_lib.site_utils import diagnosis_utils

MK Ryu

2014-10-21 11:58:09 -0700

[diff] [blame]

73

from autotest_lib.site_utils import job_overhead

74

Chris Masone

1120cdf

2012-02-27 17:35:07 -0800

[diff] [blame]

75

CONFIG = global_config.global_config

76

Allen Li

2016-09-14 19:05:47 -0700

[diff] [blame]

77

_DEFAULT_AUTOTEST_INSTANCE = CONFIG.get_config_value(

78

'SERVER', 'hostname', type=str)

79

_URL_PATTERN = CONFIG.get_config_value('CROS', 'log_url_pattern', type=str)

Simran Basi

2015-02-03 15:50:18 -0800

[diff] [blame]

80

Simran Basi

22aa9fe

2012-12-07 16:37:09 -0800

[diff] [blame]

81

# Return code that will be sent back to autotest_rpc_server.py

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

82

RETURN_CODES = enum.Enum(

Fang Deng

fb4a949

2014-09-18 17:52:06 -0700

[diff] [blame]

83

'OK', 'ERROR', 'WARNING', 'INFRA_FAILURE', 'SUITE_TIMEOUT',

Simran Basi

ba90ec8

2017-02-17 02:02:50 +0000

[diff] [blame]

84

'BOARD_NOT_AVAILABLE', 'INVALID_OPTIONS')

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

85

# The severity of return code. If multiple codes

86

# apply, the script should always return the severest one.

87

# E.g. if we have a test failure and the suite also timed out,

88

# we should return 'ERROR'.

89

SEVERITY = {RETURN_CODES.OK: 0,

90

RETURN_CODES.WARNING: 1,

Fang Deng

2014-09-12 14:16:11 -0700

[diff] [blame]

91

RETURN_CODES.SUITE_TIMEOUT: 2,

92

RETURN_CODES.INFRA_FAILURE: 3,

Fang Deng

2014-09-25 10:18:48 -0700

[diff] [blame]

93

RETURN_CODES.ERROR: 4}

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

94

xixuan

2017-06-29 15:40:19 -0700

[diff] [blame]

95

# Minimum RPC timeout setting for calls expected to take long time, e.g.,

96

# create_suite_job. If default socket time (socket.getdefaulttimeout()) is

97

# None or greater than this value, the default will be used.

98

# The value here is set to be the same as the timeout for the RetryingAFE object

99

# so long running RPCs can wait long enough before being aborted.

100

_MIN_RPC_TIMEOUT = 600

101

102

# Number of days back to search for existing job.

103

_SEARCH_JOB_MAX_DAYS = 14

104

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

105

106

def get_worse_code(code1, code2):

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

107

"""Compare the severity of two codes and return the worse code.

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

108

109

@param code1: An enum value of RETURN_CODES

110

@param code2: An enum value of RETURN_CODES

111

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

112

@returns: the more severe one between code1 and code2.

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

113

114

"""

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

115

return code1 if SEVERITY[code1] >= SEVERITY[code2] else code2

Simran Basi

22aa9fe

2012-12-07 16:37:09 -0800

[diff] [blame]

116

Chris Masone

dfa0beba

2012-03-19 11:41:47 -0700

[diff] [blame]

117

Allen Li

2016-09-14 14:44:59 -0700

[diff] [blame]

118

def bool_str(x):

119

"""Boolean string type for option arguments.

120

121

@param x: string representation of boolean value.

"""

if x == 'True':

return True

elif x == 'False':

return False

else:

raise argparse.ArgumentTypeError(

130

'%s is not one of True or False' % (x,))

131

132

Allen Li

603728a

2016-12-08 13:58:11 -0800

[diff] [blame]

133

def _get_priority_value(x):

134

"""Convert a priority representation to its int value.

135

136

Priorities can be described either by an int value (possibly as a string)

137

or a name string. This function coerces both forms to an int value.

138

139

This function is intended for casting command line arguments during

140

parsing.

141

142

@param x: priority value as an int, int string, or name string

143

144

@returns: int value of priority

"""

try:

return int(x)

except ValueError:

try:

return priorities.Priority.get_value(x)

151

except AttributeError:

152

raise argparse.ArgumentTypeError(

153

'Unknown priority level %s. Try one of %s.'

154

% (x, ', '.join(priorities.Priority.names)))

155

156

Allen Li

2016-09-14 14:44:59 -0700

[diff] [blame]

157

def make_parser():

158

"""Make ArgumentParser instance for run_suite.py."""

159

parser = argparse.ArgumentParser(

160

usage="%(prog)s [options]")

161

parser.add_argument("-b", "--board", dest="board")

162

parser.add_argument("-i", "--build", dest="build")

163

parser.add_argument(

164

"-w", "--web", dest="web", default=None,

165

help="Address of a webserver to receive suite requests.")

166

parser.add_argument(

167

'--firmware_rw_build', dest='firmware_rw_build', default=None,

168

help='Firmware build to be installed in dut RW firmware.')

169

parser.add_argument(

170

'--firmware_ro_build', dest='firmware_ro_build', default=None,

171

help='Firmware build to be installed in dut RO firmware.')

172

parser.add_argument(

173

'--test_source_build', dest='test_source_build', default=None,

174

help=('Build that contains the test code, '

175

'e.g., it can be the value of `--build`, '

176

'`--firmware_rw_build` or `--firmware_ro_build` '

177

'arguments. Default is None, that is, use the test '

178

'code from `--build` (CrOS image)'))

Chris Masone

359c0fd

2012-03-13 15:18:59 -0700

[diff] [blame]

179

# This should just be a boolean flag, but the autotest "proxy" code

180

# can't handle flags that don't take arguments.

Allen Li

2016-09-14 14:44:59 -0700

[diff] [blame]

181

parser.add_argument(

182

"-n", "--no_wait", dest="no_wait", default=False, type=bool_str,

183

help='Must pass "True" or "False" if used.')

Alex Miller

0032e93

2013-10-23 12:52:58 -0700

[diff] [blame]

184

# If you really want no pool, --pool="" will do it. USE WITH CARE.

Allen Li

2016-09-14 14:44:59 -0700

[diff] [blame]

185

parser.add_argument("-p", "--pool", dest="pool", default="suites")

186

parser.add_argument("-s", "--suite_name", dest="name")

187

parser.add_argument("-a", "--afe_timeout_mins", type=int,

188

dest="afe_timeout_mins", default=30)

189

parser.add_argument("-t", "--timeout_mins", type=int,

190

dest="timeout_mins", default=1440)

191

parser.add_argument("-x", "--max_runtime_mins", type=int,

192

dest="max_runtime_mins", default=1440)

193

parser.add_argument("-d", "--delay_sec", type=int,

194

dest="delay_sec", default=10)

195

parser.add_argument("-m", "--mock_job_id", dest="mock_job_id",

196

help="Attach to existing job id for already running "

197

"suite, and creates report.")

Aviv Keshet

db321de

2015-04-10 19:09:58 -0700

[diff] [blame]

198

# NOTE(akeshet): This looks similar to --no_wait, but behaves differently.

199

# --no_wait is passed in to the suite rpc itself and affects the suite,

200

# while this does not.

Allen Li

2016-09-14 14:44:59 -0700

[diff] [blame]

201

parser.add_argument("-c", "--create_and_return", dest="create_and_return",

202

action="store_true",

203

help="Create the suite and print the job id, then "

204

"finish immediately.")

205

parser.add_argument("-u", "--num", dest="num", type=int, default=None,

206

help="Run on at most NUM machines.")

Alex Miller

f43d0eb

2012-10-01 13:43:13 -0700

[diff] [blame]

207

# Same boolean flag issue applies here.

Allen Li

2016-09-14 14:44:59 -0700

[diff] [blame]

208

parser.add_argument(

209

"-f", "--file_bugs", dest="file_bugs", default=False, type=bool_str,

210

help=('File bugs on test failures. Must pass "True" or '

211

'"False" if used.'))

212

parser.add_argument("-l", "--bypass_labstatus", dest="bypass_labstatus",

213

action="store_true", help='Bypass lab status check.')

Alex Miller

88762a8

2013-09-04 15:41:28 -0700

[diff] [blame]

214

# We allow either a number or a string for the priority. This way, if you

215

# know what you're doing, one can specify a custom priority level between

216

# other levels.

Allen Li

2016-09-14 14:44:59 -0700

[diff] [blame]

217

parser.add_argument("-r", "--priority", dest="priority",

Allen Li

603728a

2016-12-08 13:58:11 -0800

[diff] [blame]

218

type=_get_priority_value,

Allen Li

2016-09-14 14:44:59 -0700

[diff] [blame]

219

default=priorities.Priority.DEFAULT,

220

action="store",

221

help="Priority of suite. Either numerical value, or "

222

"one of (" + ", ".join(priorities.Priority.names)

223

+ ").")

224

parser.add_argument(

225

'--retry', dest='retry', default=False, type=bool_str, action='store',

226

help='Enable test retry. Must pass "True" or "False" if used.')

227

parser.add_argument('--max_retries', dest='max_retries', default=None,

228

type=int, action='store', help='Maximum retries'

229

'allowed at suite level. No limit if not specified.')

230

parser.add_argument('--minimum_duts', dest='minimum_duts', type=int,

231

default=0, action='store',

232

help='Check that the pool has at least such many '

233

'healthy machines, otherwise suite will not run. '

234

'Default to 0.')

235

parser.add_argument('--suite_min_duts', dest='suite_min_duts', type=int,

236

default=0, action='store',

237

help='Preferred minimum number of machines. Scheduler '

238

'will prioritize on getting such many machines for '

239

'the suite when it is competing with another suite '

240

'that has a higher priority but already got minimum '

241

'machines it needs. Default to 0.')

242

parser.add_argument("--suite_args", dest="suite_args",

243

default=None, action="store",

244

help="Argument string for suite control file.")

245

parser.add_argument('--offload_failures_only',

Allen Li

40599a3

2016-12-08 13:23:35 -0800

[diff] [blame]

246

dest='offload_failures_only', type=bool_str,

247

action='store', default=False,

Allen Li

2016-09-14 14:44:59 -0700

[diff] [blame]

248

help='Only enable gs_offloading for failed tests. '

249

'Successful tests will be deleted. Must pass "True"'

250

' or "False" if used.')

251

parser.add_argument('--use_suite_attr', dest='use_suite_attr',

252

action='store_true', default=False,

253

help='Advanced. Run the suite based on ATTRIBUTES of '

254

'control files, rather than SUITE.')

255

parser.add_argument('--json_dump', dest='json_dump', action='store_true',

256

default=False,

257

help='Dump the output of run_suite to stdout.')

258

parser.add_argument(

259

'--run_prod_code', dest='run_prod_code',

260

action='store_true', default=False,

261

help='Run the test code that lives in prod aka the test '

262

'code currently on the lab servers.')

263

parser.add_argument(

264

'--delay_minutes', type=int, default=0,

265

help=('Delay the creation of test jobs for a given '

266

'number of minutes. This argument can be used to '

267

'force provision jobs being delayed, which helps '

268

'to distribute loads across devservers.'))

269

parser.add_argument(

270

'--skip_duts_check', dest='skip_duts_check', action='store_true',

271

default=False, help='If True, skip minimum available DUTs check')

Shuqian Zhao

843ae5c7

2017-02-22 11:25:01 -0800

[diff] [blame]

272

parser.add_argument(

Shuqian Zhao

637d22c

2017-03-06 15:52:32 -0800

[diff] [blame]

273

'--job_keyvals', dest='job_keyvals', type=ast.literal_eval,

Shuqian Zhao

843ae5c7

2017-02-22 11:25:01 -0800

[diff] [blame]

274

action='store', default=None,

275

help='A dict of job keyvals to be inject to suite control file')

Shuqian Zhao

ed0da86

2017-03-06 14:47:13 -0800

[diff] [blame]

276

parser.add_argument(

277

'--test_args', dest='test_args', type=ast.literal_eval,

278

action='store', default=None,

279

help=('A dict of args passed all the way to each individual test that '

280

'will be actually ran.'))

xixuan

d3cb33d

2017-07-07 14:47:53 -0700

[diff] [blame]

281

parser.add_argument(

xixuan

99eba0b

2017-07-12 15:10:01 -0700

[diff] [blame]

282

'--require_logfile', action='store_true',

xixuan

d3cb33d

2017-07-07 14:47:53 -0700

[diff] [blame]

283

help=('Stream logs of run_suite.py to a local file named '

284

'run_suite-<build name>.log.'))

Aviv Keshet

97bebd4

2017-05-24 21:02:32 -0700

[diff] [blame]

285

286

# Used for monitoring purposes, to measure no-op swarming proxy latency.

287

parser.add_argument('--do_nothing', action='store_true',

288

help=argparse.SUPPRESS)

289

xixuan

2017-06-29 15:40:19 -0700

[diff] [blame]

290

# Used when lab/job status checking is needed. Currently its only user is

291

# suite scheduler v2.

292

parser.add_argument(

293

'--pre_check', action='store_true',

294

help=('Check lab and job status before kicking off a suite. Used by '

295

'suite scheduler v2.'))

296

Allen Li

2016-09-14 14:44:59 -0700

[diff] [blame]

297

return parser

Chris Masone

2012-02-14 14:18:01 -0800

[diff] [blame]

298

299

Allen Li

2017-07-10 14:58:16 -0700

[diff] [blame]

300

def verify_and_clean_options(options):

Allen Li

2016-09-14 14:44:59 -0700

[diff] [blame]

301

"""Verify the validity of options.

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

302

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

303

@param options: The parsed options to verify.

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

304

305

@returns: True if verification passes, False otherwise.

306

307

"""

Fang Deng

2015-02-20 14:49:47 -0800

[diff] [blame]

308

if options.mock_job_id and (

309

not options.build or not options.name or not options.board):

310

print ('When using -m, need to specify build, board and suite '

311

'name which you have used for creating the original job')

312

return False

313

else:

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

314

if not options.build:

315

print 'Need to specify which build to use'

316

return False

317

if not options.board:

318

print 'Need to specify board'

319

return False

320

if not options.name:

321

print 'Need to specify suite name'

322

return False

323

if options.num is not None and options.num < 1:

324

print 'Number of machines must be more than 0, if specified.'

325

return False

Allen Li

2016-09-14 14:44:59 -0700

[diff] [blame]

326

if not options.retry and options.max_retries is not None:

Fang Deng

443f195

2015-01-02 14:51:49 -0800

[diff] [blame]

327

print 'max_retries can only be used with --retry=True'

328

return False

Shuqian Zhao

ab1bedc

2015-06-02 11:12:28 -0700

[diff] [blame]

329

if options.use_suite_attr and options.suite_args is not None:

330

print ('The new suite control file cannot parse the suite_args: %s.'

331

'Please not specify any suite_args here.' % options.suite_args)

332

return False

Allen Li

2016-09-14 14:44:59 -0700

[diff] [blame]

333

if options.no_wait and options.retry:

Fang Deng

058860c

2014-05-15 15:41:50 -0700

[diff] [blame]

334

print 'Test retry is not available when using --no_wait=True'

Dan Shi

2014-10-10 13:38:51 -0700

[diff] [blame]

335

# Default to use the test code in CrOS build.

336

if not options.test_source_build and options.build:

337

options.test_source_build = options.build

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

return True

Shuqian Zhao

2015-06-02 11:12:28 -0700

[diff] [blame]

341

def change_options_for_suite_attr(options):

342

"""Change options to be prepared to run the suite_attr_wrapper.

343

344

If specify 'use_suite_attr' from the cmd line, it indicates to run the

345

new style suite control file, suite_attr_wrapper. Then, change the

Allen Li

2016-08-18 12:09:32 -0700

[diff] [blame]

346

options.name to 'suite_attr_wrapper', change the options.suite_args to

Shuqian Zhao

ab1bedc

2015-06-02 11:12:28 -0700

[diff] [blame]

347

include the arguments needed by suite_attr_wrapper.

348

349

@param options: The verified options.

350

351

@returns: The changed options.

352

353

"""

354

# Convert the suite_name to attribute boolean expression.

355

if type(options.name) is str:

356

attr_filter_val = 'suite:%s' % options.name

357

else:

358

attr_filter_val = ' or '.join(['suite:%s' % x for x in options.name])

359

360

# change the suite_args to be a dict of arguments for suite_attr_wrapper

361

# if suite_args is not None, store the values in 'other_args' of the dict

362

args_dict = {}

363

args_dict['attr_filter'] = attr_filter_val

364

options.suite_args = str(args_dict)

365

options.name = 'suite_attr_wrapper'

return options

Allen Li

2016-09-02 11:52:34 -0700

[diff] [blame]

370

class TestResult(object):

Aviv Keshet

1480c4a

2013-03-21 16:38:31 -0700

[diff] [blame]

371

Allen Li

2016-09-02 11:52:34 -0700

[diff] [blame]

372

"""Represents the result of a TestView."""

Aviv Keshet

1480c4a

2013-03-21 16:38:31 -0700

[diff] [blame]

373

Allen Li

2016-09-02 11:52:34 -0700

[diff] [blame]

374

def __init__(self, test_view, retry_count=0):

375

"""Initialize instance.

376

377

@param test_view: TestView instance.

378

@param retry_count: Retry count for test. Optional.

379

"""

380

self.name = test_view.get_testname()

381

self.status = test_view['status']

382

self.reason = test_view['reason']

383

self.retry_count = retry_count

384

385

_PRETTY_STATUS_MAP = {

386

'GOOD': '[ PASSED ]',

387

'TEST_NA': '[ INFO ]',

}

@property

def _pretty_status(self):

392

"""Pretty status string."""

393

return self._PRETTY_STATUS_MAP.get(self.status, '[ FAILED ]')

394

395

def log_using(self, log_function, name_column_width):

396

"""Log the test result using the given log function.

397

398

@param log_function: Log function to use. Example: logging.info

399

@param name_column_width: Width of name column for formatting.

400

"""

401

padded_name = self.name.ljust(name_column_width)

402

log_function('%s%s', padded_name, self._pretty_status)

403

if self.status != 'GOOD':

404

log_function('%s %s: %s', padded_name, self.status, self.reason)

405

if self.retry_count > 0:

406

log_function('%s retry_count: %s', padded_name, self.retry_count)

Chris Masone

2012-02-14 14:18:01 -0800

[diff] [blame]

407

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

408

Shuqian Zhao

f39bf2a

2015-09-29 14:19:28 -0700

[diff] [blame]

409

def get_original_suite_name(suite_name, suite_args):

410

"""Get the original suite name when running suite_attr_wrapper.

411

412

@param suite_name: the name of the suite launched in afe. When it is

413

suite_attr_wrapper, the suite that actually running is

414

specified in the suite_args.

415

@param suite_args: the parsed option which contains the original suite name.

416

417

@returns: the original suite name.

418

419

"""

420

if suite_name == 'suite_attr_wrapper':

421

attrs = ast.literal_eval(suite_args).get('attr_filter', '')

422

suite_list = ([x[6:] for x in re.split('[() ]', attrs)

423

if x and x.startswith('suite:')])

424

return suite_list[0] if suite_list else suite_name

return suite_name

Craig Harrison

2012-08-23 16:48:49 -0700

[diff] [blame]

428

class LogLink(object):

J. Richard Barnette

2013-08-23 11:24:21 -0700

[diff] [blame]

429

"""Information needed to record a link in the logs.

Craig Harrison

2012-08-23 16:48:49 -0700

[diff] [blame]

430

J. Richard Barnette

2013-08-23 11:24:21 -0700

[diff] [blame]

431

Depending on context and the information provided at

432

construction time, the link may point to either to log files for

433

a job, or to a bug filed for a failure in the job.

Craig Harrison

2012-08-23 16:48:49 -0700

[diff] [blame]

434

J. Richard Barnette

2013-08-23 11:24:21 -0700

[diff] [blame]

435

@var anchor The link text.

436

@var url The link url.

437

@var bug_id Id of a bug to link to, or None.

438

"""

439

Kevin Cheng

2bdd372

2016-03-24 21:30:52 -0700

[diff] [blame]

440

# A list of tests that don't get retried so skip the dashboard.

441

_SKIP_RETRY_DASHBOARD = ['provision']

442

Ningning Xia

2016-04-19 14:06:03 -0700

[diff] [blame]

443

_BUG_LINK_PREFIX = 'Auto-Bug'

444

_LOG_LINK_PREFIX = 'Test-Logs'

445

J. Richard Barnette

2013-08-23 11:24:21 -0700

[diff] [blame]

446

Prashanth Balasubramanian

ae43721

2014-10-27 11:17:26 -0700

[diff] [blame]

447

@classmethod

448

def get_bug_link(cls, bug_id):

449

"""Generate a bug link for the given bug_id.

450

451

@param bug_id: The id of the bug.

452

@return: A link, eg: https://crbug.com/<bug_id>.

453

"""

Allen Li

2016-09-14 19:05:47 -0700

[diff] [blame]

454

return reporting_utils.link_crbug(bug_id)

Prashanth Balasubramanian

ae43721

2014-10-27 11:17:26 -0700

[diff] [blame]

455

456

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

457

def __init__(self, anchor, server, job_string, bug_info=None, reason=None,

Simran Basi

2015-02-03 15:50:18 -0800

[diff] [blame]

458

retry_count=0, testname=None):

J. Richard Barnette

2013-08-23 11:24:21 -0700

[diff] [blame]

459

"""Initialize the LogLink by generating the log URL.

460

461

@param anchor The link text.

Alex Miller

c7a5952

2013-10-30 15:18:57 -0700

[diff] [blame]

462

@param server The hostname of the server this suite ran on.

J. Richard Barnette

2013-08-23 11:24:21 -0700

[diff] [blame]

463

@param job_string The job whose logs we'd like to link to.

464

@param bug_info Info about the bug, if one was filed.

Fang Deng

53c6ff5

2014-02-24 17:51:24 -0800

[diff] [blame]

465

@param reason A string representing the reason of failure if any.

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

466

@param retry_count How many times the test has been retried.

Simran Basi

2015-02-03 15:50:18 -0800

[diff] [blame]

467

@param testname Optional Arg that supplies the testname.

Craig Harrison

2012-08-23 16:48:49 -0700

[diff] [blame]

468

"""

469

self.anchor = anchor

Allen Li

2016-09-14 19:05:47 -0700

[diff] [blame]

470

self.url = _URL_PATTERN % (server, job_string)

Fang Deng

53c6ff5

2014-02-24 17:51:24 -0800

[diff] [blame]

471

self.reason = reason

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

472

self.retry_count = retry_count

Simran Basi

2015-02-03 15:50:18 -0800

[diff] [blame]

473

self.testname = testname

J. Richard Barnette

2013-08-23 11:24:21 -0700

[diff] [blame]

474

if bug_info:

475

self.bug_id, self.bug_count = bug_info

476

else:

477

self.bug_id = None

478

self.bug_count = None

Craig Harrison

2012-08-23 16:48:49 -0700

[diff] [blame]

479

480

Allen Li

2016-09-14 15:19:20 -0700

[diff] [blame]

481

@property

482

def bug_url(self):

483

"""URL of associated bug."""

484

if self.bug_id:

485

return reporting_utils.link_crbug(self.bug_id)

else:

return None

@property

def _bug_count_text(self):

492

"""Return bug count as human friendly text."""

493

if self.bug_count is None:

494

bug_info = 'unknown number of reports'

495

elif self.bug_count == 1:

496

bug_info = 'new report'

497

else:

498

bug_info = '%s reports' % self.bug_count

return bug_info

Ningning Xia

2016-04-19 14:06:03 -0700

[diff] [blame]

502

def GenerateBuildbotLinks(self):

J. Richard Barnette

2013-08-23 11:24:21 -0700

[diff] [blame]

503

"""Generate a link formatted to meet buildbot expectations.

504

Ningning Xia

2016-04-19 14:06:03 -0700

[diff] [blame]

505

If there is a bug associated with this link, report a link to the bug

506

and a link to the job logs;

J. Richard Barnette

2013-08-23 11:24:21 -0700

[diff] [blame]

507

otherwise report a link to the job logs.

Craig Harrison

2012-08-23 16:48:49 -0700

[diff] [blame]

508

Ningning Xia

2016-04-19 14:06:03 -0700

[diff] [blame]

509

@return A list of links formatted for the buildbot log annotator.

Craig Harrison

2012-08-23 16:48:49 -0700

[diff] [blame]

510

"""

Ningning Xia

2016-04-19 14:06:03 -0700

[diff] [blame]

511

bug_info_strings = []

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

512

info_strings = []

Ningning Xia

2016-04-19 14:06:03 -0700

[diff] [blame]

513

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

514

if self.retry_count > 0:

515

info_strings.append('retry_count: %d' % self.retry_count)

Ningning Xia

2016-04-19 14:06:03 -0700

[diff] [blame]

516

bug_info_strings.append('retry_count: %d' % self.retry_count)

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

517

Fang Deng

53c6ff5

2014-02-24 17:51:24 -0800

[diff] [blame]

518

if self.reason:

Allen Li

2016-09-14 15:19:20 -0700

[diff] [blame]

519

bug_info_strings.append(self.reason)

520

info_strings.append(self.reason)

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

521

Allen Li

2016-09-14 15:19:20 -0700

[diff] [blame]

522

# Add the bug link to buildbot_links

523

if self.bug_url:

524

bug_info_strings.append(self._bug_count_text)

Ningning Xia

2016-04-19 14:06:03 -0700

[diff] [blame]

525

Allen Li

2016-09-14 15:19:20 -0700

[diff] [blame]

526

bug_anchor_text = self._format_anchor_text(self._BUG_LINK_PREFIX,

527

bug_info_strings)

528

529

yield annotations.StepLink(bug_anchor_text, self.bug_url)

530

531

anchor_text = self._format_anchor_text(self._LOG_LINK_PREFIX,

532

info_strings)

533

yield annotations.StepLink(anchor_text, self.url)

Ningning Xia

2016-04-19 14:06:03 -0700

[diff] [blame]

534

535

Allen Li

2016-09-14 15:19:20 -0700

[diff] [blame]

536

def _format_anchor_text(self, prefix, info_strings):

537

"""Format anchor text given a prefix and info strings.

Ningning Xia

2016-04-19 14:06:03 -0700

[diff] [blame]

538

539

@param prefix The prefix of the anchor text.

540

@param info_strings The infos presented in the anchor text.

541

@return A anchor_text with the right prefix and info strings.

542

"""

Allen Li

2016-09-14 15:19:20 -0700

[diff] [blame]

543

anchor_text = '[{prefix}]: {anchor}'.format(

544

prefix=prefix,

545

anchor=self.anchor.strip())

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

546

if info_strings:

Allen Li

2016-09-14 15:19:20 -0700

[diff] [blame]

547

info_text = ', '.join(info_strings)

548

anchor_text += ': ' + info_text

Ningning Xia

2016-04-19 14:06:03 -0700

[diff] [blame]

549

return anchor_text

Craig Harrison

2012-08-23 16:48:49 -0700

[diff] [blame]

550

Allen Li

2016-09-14 15:19:20 -0700

[diff] [blame]

551

@property

552

def text_link(self):

553

"""Link to the job's logs, for consumption by a human.

Craig Harrison

2012-08-23 16:48:49 -0700

[diff] [blame]

554

Craig Harrison

d845157

2012-08-31 10:29:33 -0700

[diff] [blame]

555

@return A link formatted for human readability.

Craig Harrison

2012-08-23 16:48:49 -0700

[diff] [blame]

556

"""

Aviv Keshet

269848b

2016-10-03 00:13:19 -0700

[diff] [blame]

557

return '%s %s' % (self.anchor, self.url)

Craig Harrison

2012-08-23 16:48:49 -0700

[diff] [blame]

558

Simran Basi

2015-02-03 15:50:18 -0800

[diff] [blame]

559

def GenerateWmatrixRetryLink(self):

560

"""Generate a link to the wmatrix retry dashboard.

561

562

@return A link formatted for the buildbot log annotator.

563

"""

Allen Li

2016-09-14 15:19:20 -0700

[diff] [blame]

564

if not self.testname or self.testname in self._SKIP_RETRY_DASHBOARD:

Simran Basi

2015-02-03 15:50:18 -0800

[diff] [blame]

565

return None

Allen Li

2016-09-14 15:19:20 -0700

[diff] [blame]

566

return annotations.StepLink(

567

text='[Flake-Dashboard]: %s' % self.testname,

568

url=reporting_utils.link_retry_url(self.testname))

Simran Basi

2015-02-03 15:50:18 -0800

[diff] [blame]

569

David Riley

a0cd1c2

2017-07-10 11:15:57 -0700

[diff] [blame]

570

def GenerateWmatrixHistoryLink(self):

571

"""Generate a link to the wmatrix test history dashboard.

572

573

@return A link formatted for the buildbot log annotator.

574

"""

575

if not self.testname or self.testname in self._SKIP_RETRY_DASHBOARD:

576

return None

577

return annotations.StepLink(

578

text='[Test-History]: %s' % self.testname,

579

url=reporting_utils.link_test_history(self.testname))

580

Simran Basi

2015-02-03 15:50:18 -0800

[diff] [blame]

581

Chris Masone

2012-04-30 14:35:28 -0700

[diff] [blame]

582

class Timings(object):

583

"""Timings for important events during a suite.

584

585

All timestamps are datetime.datetime objects.

586

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

587

@var suite_job_id: the afe job id of the suite job for which

588

we are recording the timing for.

589

@var download_start_time: the time the devserver starts staging

590

the build artifacts. Recorded in create_suite_job.

591

@var payload_end_time: the time when the artifacts only necessary to start

592

installsing images onto DUT's are staged.

593

Recorded in create_suite_job.

594

@var artifact_end_time: the remaining artifacts are downloaded after we kick

595

off the reimaging job, at which point we record

596

artifact_end_time. Recorded in dynamic_suite.py.

Chris Masone

2012-04-30 14:35:28 -0700

[diff] [blame]

597

@var suite_start_time: the time the suite started.

Chris Masone

2012-04-30 14:35:28 -0700

[diff] [blame]

598

@var tests_start_time: the time the first test started running.

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

599

@var tests_end_time: the time the last test finished running.

Chris Masone

2012-04-30 14:35:28 -0700

[diff] [blame]

600

"""

beeps

2013-03-22 13:15:49 -0700

[diff] [blame]

601

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

602

def __init__(self, suite_job_id):

603

self.suite_job_id = suite_job_id

604

# Timings related to staging artifacts on devserver.

605

self.download_start_time = None

606

self.payload_end_time = None

607

self.artifact_end_time = None

beeps

2013-03-22 13:15:49 -0700

[diff] [blame]

608

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

609

# The test_start_time, but taken off the view that corresponds to the

610

# suite instead of an individual test.

611

self.suite_start_time = None

beeps

2013-03-22 13:15:49 -0700

[diff] [blame]

612

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

613

# Earliest and Latest tests in the set of TestViews passed to us.

614

self.tests_start_time = None

615

self.tests_end_time = None

616

Chris Masone

2012-04-30 14:35:28 -0700

[diff] [blame]

617

Chris Masone

d9f13c5

2012-08-29 10:37:08 -0700

[diff] [blame]

618

def RecordTiming(self, view):

619

"""Given a test report view, extract and record pertinent time info.

Chris Masone

2012-04-30 14:35:28 -0700

[diff] [blame]

620

621

get_detailed_test_views() returns a list of entries that provide

622

info about the various parts of a suite run. This method can take

623

any one of these entries and look up timestamp info we might want

624

and record it.

625

Chris Masone

2012-09-06 16:00:07 -0700

[diff] [blame]

626

If timestamps are unavailable, datetime.datetime.min/max will be used.

627

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

628

@param view: A TestView object.

Chris Masone

2012-04-30 14:35:28 -0700

[diff] [blame]

629

"""

Chris Masone

2012-09-06 16:00:07 -0700

[diff] [blame]

630

start_candidate = datetime.min

631

end_candidate = datetime.max

632

if view['test_started_time']:

Dan Shi

2014-08-10 23:38:40 -0700

[diff] [blame]

633

start_candidate = time_utils.time_string_to_datetime(

634

view['test_started_time'])

Chris Masone

2012-09-06 16:00:07 -0700

[diff] [blame]

635

if view['test_finished_time']:

Dan Shi

2014-08-10 23:38:40 -0700

[diff] [blame]

636

end_candidate = time_utils.time_string_to_datetime(

637

view['test_finished_time'])

Chris Masone

2012-09-06 16:00:07 -0700

[diff] [blame]

638

Shuqian Zhao

2016-02-24 11:27:26 -0800

[diff] [blame]

639

if view.get_testname() == TestView.SUITE_JOB:

Chris Masone

2012-04-30 14:35:28 -0700

[diff] [blame]

640

self.suite_start_time = start_candidate

Chris Masone

2012-04-30 14:35:28 -0700

[diff] [blame]

641

else:

642

self._UpdateFirstTestStartTime(start_candidate)

643

self._UpdateLastTestEndTime(end_candidate)

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

644

if view['afe_job_id'] == self.suite_job_id and 'job_keyvals' in view:

Chris Masone

d9f13c5

2012-08-29 10:37:08 -0700

[diff] [blame]

645

keyvals = view['job_keyvals']

Dan Shi

2014-08-10 23:38:40 -0700

[diff] [blame]

646

self.download_start_time = time_utils.time_string_to_datetime(

647

keyvals.get(constants.DOWNLOAD_STARTED_TIME),

648

handle_type_error=True)

beeps

2013-03-22 13:15:49 -0700

[diff] [blame]

649

Dan Shi

2014-08-10 23:38:40 -0700

[diff] [blame]

650

self.payload_end_time = time_utils.time_string_to_datetime(

651

keyvals.get(constants.PAYLOAD_FINISHED_TIME),

652

handle_type_error=True)

beeps

2013-03-22 13:15:49 -0700

[diff] [blame]

653

Dan Shi

2014-08-10 23:38:40 -0700

[diff] [blame]

654

self.artifact_end_time = time_utils.time_string_to_datetime(

655

keyvals.get(constants.ARTIFACT_FINISHED_TIME),

656

handle_type_error=True)

Chris Masone

44e4d6c

2012-08-15 14:25:53 -0700

[diff] [blame]

657

Chris Masone

2012-04-30 14:35:28 -0700

[diff] [blame]

658

659

def _UpdateFirstTestStartTime(self, candidate):

660

"""Update self.tests_start_time, iff candidate is an earlier time.

661

662

@param candidate: a datetime.datetime object.

663

"""

664

if not self.tests_start_time or candidate < self.tests_start_time:

665

self.tests_start_time = candidate

666

667

668

def _UpdateLastTestEndTime(self, candidate):

669

"""Update self.tests_end_time, iff candidate is a later time.

670

671

@param candidate: a datetime.datetime object.

672

"""

673

if not self.tests_end_time or candidate > self.tests_end_time:

674

self.tests_end_time = candidate

def __str__(self):

return ('\n'

'Suite timings:\n'

Chris Masone

2012-05-01 16:52:31 -0700

[diff] [blame]

680

'Downloads started at %s\n'

681

'Payload downloads ended at %s\n'

Chris Masone

2012-04-30 14:35:28 -0700

[diff] [blame]

682

'Suite started at %s\n'

Chris Masone

2012-05-01 16:52:31 -0700

[diff] [blame]

683

'Artifact downloads ended (at latest) at %s\n'

Chris Masone

2012-04-30 14:35:28 -0700

[diff] [blame]

684

'Testing started at %s\n'

Chris Masone

2012-05-01 16:52:31 -0700

[diff] [blame]

685

'Testing ended at %s\n' % (self.download_start_time,

686

self.payload_end_time,

687

self.suite_start_time,

Chris Masone

2012-05-01 16:52:31 -0700

[diff] [blame]

688

self.artifact_end_time,

Chris Masone

2012-04-30 14:35:28 -0700

[diff] [blame]

689

self.tests_start_time,

690

self.tests_end_time))

691

692

Alex Miller

c7a5952

2013-10-30 15:18:57 -0700

[diff] [blame]

693

def instance_for_pool(pool_name):

694

"""

695

Return the hostname of the server that should be used to service a suite

696

for the specified pool.

697

698

@param pool_name: The pool (without 'pool:' to schedule the suite against.

699

@return: The correct host that should be used to service this suite run.

700

"""

701

return CONFIG.get_config_value(

702

'POOL_INSTANCE_SHARDING', pool_name,

703

default=_DEFAULT_AUTOTEST_INSTANCE)

704

705

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

706

class TestView(object):

707

"""Represents a test view and provides a set of helper functions."""

708

709

Shuqian Zhao

2016-02-24 11:27:26 -0800

[diff] [blame]

710

SUITE_JOB = 'Suite job'

Fang Deng

2014-09-12 14:16:11 -0700

[diff] [blame]

711

INFRA_TESTS = ['provision']

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

712

713

Simran Basi

2015-10-14 19:05:00 -0700

[diff] [blame]

714

def __init__(self, view, afe_job, suite_name, build, user,

715

solo_test_run=False):

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

716

"""Init a TestView object representing a tko test view.

717

718

@param view: A dictionary representing a tko test view.

Fang Deng

2014-06-12 18:21:55 -0700

[diff] [blame]

719

@param afe_job: An instance of frontend.afe.models.Job

720

representing the job that kicked off the test.

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

721

@param suite_name: The name of the suite

722

that the test belongs to.

723

@param build: The build for which the test is run.

Simran Basi

2015-10-12 15:36:45 -0700

[diff] [blame]

724

@param user: The user for which the test is run.

Simran Basi

2015-10-14 19:05:00 -0700

[diff] [blame]

725

@param solo_test_run: This is a solo test run not part of a suite.

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

726

"""

727

self.view = view

Fang Deng

2014-06-12 18:21:55 -0700

[diff] [blame]

728

self.afe_job = afe_job

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

729

self.suite_name = suite_name

730

self.build = build

Simran Basi

2015-10-14 19:05:00 -0700

[diff] [blame]

731

self.is_suite_view = afe_job.parent_job is None and not solo_test_run

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

732

# This is the test name that will be shown in the output.

733

self.testname = None

Simran Basi

2015-10-12 15:36:45 -0700

[diff] [blame]

734

self.user = user

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

735

Fang Deng

2014-06-12 18:21:55 -0700

[diff] [blame]

736

# The case that a job was aborted before it got a chance to run

737

# usually indicates suite has timed out (unless aborted by user).

738

# In this case, the abort reason will be None.

739

# Update the reason with proper information.

740

if (self.is_relevant_suite_view() and

Shuqian Zhao

2016-02-24 11:27:26 -0800

[diff] [blame]

741

not self.get_testname() == self.SUITE_JOB and

Fang Deng

2014-06-12 18:21:55 -0700

[diff] [blame]

742

self.view['status'] == 'ABORT' and

743

not self.view['reason']):

744

self.view['reason'] = 'Timed out, did not run.'

745

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

746

747

def __getitem__(self, key):

748

"""Overload __getitem__ so that we can still use []

749

750

@param key: A key of the tko test view.

751

752

@returns: The value of an attribute in the view.

753

754

"""

755

return self.view[key]

756

757

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

758

def __iter__(self):

759

"""Overload __iter__ so that it supports 'in' operator."""

760

return iter(self.view)

761

762

763

def get_testname(self):

764

"""Get test name that should be shown in the output.

765

766

Formalize the test_name we got from the test view.

767

Allen Li

2017-07-05 12:52:36 -0700

[diff] [blame]

768

Remove 'build/suite' prefix if any.

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

769

770

If one runs a test in control file via the following code,

771

job.runtest('my_Test', tag='tag')

772

for most of the cases, view['test_name'] would look like 'my_Test.tag'.

773

If this is the case, this method will just return the original

774

test name, i.e. 'my_Test.tag'.

775

776

There are four special cases.

777

1) A test view is for the suite job's SERVER_JOB.

Shuqian Zhao

2016-02-24 11:27:26 -0800

[diff] [blame]

778

In this case, this method will return 'Suite job'.

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

779

Simran Basi

2015-10-14 19:05:00 -0700

[diff] [blame]

780

2) A test view is of a child job or a solo test run not part of a

781

suite, and for a SERVER_JOB or CLIENT_JOB.

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

782

In this case, we will take the job name, remove the build/suite

783

prefix from the job name, and append the rest to 'SERVER_JOB'

784

or 'CLIENT_JOB' as a prefix. So the names returned by this

785

method will look like:

Allen Li

2017-07-05 12:52:36 -0700

[diff] [blame]

786

'Telemetry Smoothness Measurement_SERVER_JOB'

787

'dummy_Pass_SERVER_JOB'

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

788

'dummy_Fail_SERVER_JOB'

789

Fang Deng

2014-06-12 18:21:55 -0700

[diff] [blame]

790

3) A test view is of a suite job and its status is ABORT.

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

791

In this case, the view['test_name'] is the child job's name.

Allen Li

2017-07-05 12:52:36 -0700

[diff] [blame]

792

For instance,

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

793

'lumpy-release/R35-5712.0.0/perf_v2/

Allen Li

2017-07-05 12:52:36 -0700

[diff] [blame]

794

Telemetry Smoothness Measurement'

795

'lumpy-release/R35-5712.0.0/dummy/dummy_Pass'

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

796

'lumpy-release/R35-5712.0.0/dummy/dummy_Fail'

797

The above names will be converted to the following:

Allen Li

2017-07-05 12:52:36 -0700

[diff] [blame]

798

'Telemetry Smoothness Measurement'

799

'dummy_Pass'

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

800

'dummy_Fail'

801

Fang Deng

2014-06-12 18:21:55 -0700

[diff] [blame]

802

4) A test view's status is of a suite job and its status is TEST_NA.

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

803

In this case, the view['test_name'] is the NAME field of the control

Allen Li

2017-07-05 12:52:36 -0700

[diff] [blame]

804

file. For instance,

805

'Telemetry Smoothness Measurement'

806

'dummy_Pass'

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

807

'dummy_Fail'

808

This method will not modify these names.

809

810

@returns: Test name after normalization.

811

812

"""

813

if self.testname is not None:

814

return self.testname

815

816

if (self.is_suite_view and

817

self.view['test_name'].startswith('SERVER_JOB')):

Shuqian Zhao

2016-02-24 11:27:26 -0800

[diff] [blame]

818

# Rename suite job's SERVER_JOB to 'Suite job'.

819

self.testname = self.SUITE_JOB

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

820

return self.testname

821

822

if (self.view['test_name'].startswith('SERVER_JOB') or

823

self.view['test_name'].startswith('CLIENT_JOB')):

824

# Append job name as a prefix for SERVER_JOB and CLIENT_JOB

825

testname= '%s_%s' % (self.view['job_name'], self.view['test_name'])

826

else:

827

testname = self.view['test_name']

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

828

# Remove the build and suite name from testname if any.

Allen Li

2017-07-05 12:52:36 -0700

[diff] [blame]

829

self.testname = tools.get_test_name(

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

830

self.build, self.suite_name, testname)

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

return self.testname

def is_relevant_suite_view(self):

835

"""Checks whether this is a suite view we should care about.

836

837

@returns: True if it is relevant. False otherwise.

838

"""

Shuqian Zhao

2016-02-24 11:27:26 -0800

[diff] [blame]

839

return (self.get_testname() == self.SUITE_JOB or

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

840

(self.is_suite_view and

841

not self.view['test_name'].startswith('CLIENT_JOB') and

842

not self.view['subdir']))

def is_test(self):

"""Return whether the view is for an actual test.

847

848

@returns True if the view is for an actual test.

849

False if the view is for SERVER_JOB or CLIENT_JOB.

850

851

"""

852

return not (self.view['test_name'].startswith('SERVER_JOB') or

853

self.view['test_name'].startswith('CLIENT_JOB'))

def is_retry(self):

"""Check whether the view is for a retry.

858

859

@returns: True, if the view is for a retry; False otherwise.

860

861

"""

862

return self.view['job_keyvals'].get('retry_original_job_id') is not None

863

864

Fang Deng

2014-06-12 18:21:55 -0700

[diff] [blame]

865

def hit_timeout(self):

866

"""Check whether the corresponding job has hit its own timeout.

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

867

Fang Deng

2014-06-12 18:21:55 -0700

[diff] [blame]

868

Note this method should not be called for those test views

869

that belongs to a suite job and are determined as irrelevant

870

by is_relevant_suite_view. This is because they are associated

871

to the suite job, whose job start/finished time make no sense

872

to an irrelevant test view.

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

873

Fang Deng

2014-06-12 18:21:55 -0700

[diff] [blame]

874

@returns: True if the corresponding afe job has hit timeout.

875

False otherwise.

876

"""

877

if (self.is_relevant_suite_view() and

Shuqian Zhao

2016-02-24 11:27:26 -0800

[diff] [blame]

878

self.get_testname() != self.SUITE_JOB):

879

# Any relevant suite test view except SUITE_JOB

Fang Deng

2014-06-12 18:21:55 -0700

[diff] [blame]

880

# did not hit its own timeout because it was not ever run.

881

return False

882

start = (datetime.strptime(

Dan Shi

2014-08-10 23:38:40 -0700

[diff] [blame]

883

self.view['job_started_time'], time_utils.TIME_FMT)

Fang Deng

2014-06-12 18:21:55 -0700

[diff] [blame]

884

if self.view['job_started_time'] else None)

885

end = (datetime.strptime(

Dan Shi

2014-08-10 23:38:40 -0700

[diff] [blame]

886

self.view['job_finished_time'], time_utils.TIME_FMT)

Fang Deng

2014-06-12 18:21:55 -0700

[diff] [blame]

887

if self.view['job_finished_time'] else None)

888

if not start or not end:

889

return False

890

else:

891

return ((end - start).total_seconds()/60.0

892

> self.afe_job.max_runtime_mins)

893

894

895

def is_aborted(self):

896

"""Check if the view was aborted.

897

Shuqian Zhao

2016-02-24 11:27:26 -0800

[diff] [blame]

898

For suite job and child job test views, we check job keyval

Fang Deng

2014-06-12 18:21:55 -0700

[diff] [blame]

899

'aborted_by' and test status.

900

901

For relevant suite job test views, we only check test status

902

because the suite job keyval won't make sense to individual

903

test views.

904

905

@returns: True if the test was as aborted, False otherwise.

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

906

907

"""

Fang Deng

2014-06-12 18:21:55 -0700

[diff] [blame]

908

909

if (self.is_relevant_suite_view() and

Shuqian Zhao

2016-02-24 11:27:26 -0800

[diff] [blame]

910

self.get_testname() != self.SUITE_JOB):

Fang Deng

2014-06-12 18:21:55 -0700

[diff] [blame]

911

return self.view['status'] == 'ABORT'

912

else:

913

return (bool(self.view['job_keyvals'].get('aborted_by')) and

914

self.view['status'] in ['ABORT', 'RUNNING'])

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

915

916

917

def is_in_fail_status(self):

Fang Deng

2014-09-12 14:16:11 -0700

[diff] [blame]

918

"""Check if the given test's status corresponds to a failure.

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

919

920

@returns: True if the test's status is FAIL or ERROR. False otherwise.

921

922

"""

923

# All the statuses tests can have when they fail.

924

return self.view['status'] in ['FAIL', 'ERROR', 'ABORT']

925

926

Fang Deng

2014-09-12 14:16:11 -0700

[diff] [blame]

927

def is_infra_test(self):

928

"""Check whether this is a test that only lab infra is concerned.

929

930

@returns: True if only lab infra is concerned, False otherwise.

931

932

"""

933

return self.get_testname() in self.INFRA_TESTS

934

935

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

936

def get_buildbot_link_reason(self):

937

"""Generate the buildbot link reason for the test.

938

939

@returns: A string representing the reason.

940

941

"""

942

return ('%s: %s' % (self.view['status'], self.view['reason'])

943

if self.view['reason'] else self.view['status'])

944

945

946

def get_job_id_owner_str(self):

947

"""Generate the job_id_owner string for a test.

948

949

@returns: A string which looks like 135036-username

950

951

"""

Simran Basi

2015-10-12 15:36:45 -0700

[diff] [blame]

952

return '%s-%s' % (self.view['afe_job_id'], self.user)

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

953

954

955

def get_bug_info(self, suite_job_keyvals):

956

"""Get the bug info from suite_job_keyvals.

957

958

If a bug has been filed for the test, its bug info (bug id and counts)

959

will be stored in the suite job's keyvals. This method attempts to

960

retrieve bug info of the test from |suite_job_keyvals|. It will return

961

None if no bug info is found. No need to check bug info if the view is

Shuqian Zhao

2016-02-24 11:27:26 -0800

[diff] [blame]

962

SUITE_JOB.

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

963

964

@param suite_job_keyvals: The job keyval dictionary of the suite job.

965

All the bug info about child jobs are stored in

966

suite job's keyvals.

967

968

@returns: None if there is no bug info, or a pair with the

969

id of the bug, and the count of the number of

970

times the bug has been seen.

971

972

"""

Shuqian Zhao

2016-02-24 11:27:26 -0800

[diff] [blame]

973

if self.get_testname() == self.SUITE_JOB:

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

974

return None

975

if (self.view['test_name'].startswith('SERVER_JOB') or

976

self.view['test_name'].startswith('CLIENT_JOB')):

977

# Append job name as a prefix for SERVER_JOB and CLIENT_JOB

978

testname= '%s_%s' % (self.view['job_name'], self.view['test_name'])

979

else:

980

testname = self.view['test_name']

981

982

return tools.get_test_failure_bug_info(

983

suite_job_keyvals, self.view['afe_job_id'],

testname)

def should_display_buildbot_link(self):

988

"""Check whether a buildbot link should show for this view.

989

Shuqian Zhao

2016-02-24 11:27:26 -0800

[diff] [blame]

990

For suite job view, show buildbot link if it fails.

Fang Deng

2014-06-12 18:21:55 -0700

[diff] [blame]

991

For normal test view,

992

show buildbot link if it is a retry

993

show buildbot link if it hits its own timeout.

994

show buildbot link if it fails. This doesn't

995

include the case where it was aborted but has

996

not hit its own timeout (most likely it was aborted because

997

suite has timed out).

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

998

999

@returns: True if we should show the buildbot link.

1000

False otherwise.

1001

"""

1002

is_bad_status = (self.view['status'] != 'GOOD' and

1003

self.view['status'] != 'TEST_NA')

Shuqian Zhao

2016-02-24 11:27:26 -0800

[diff] [blame]

1004

if self.get_testname() == self.SUITE_JOB:

Fang Deng

2014-06-12 18:21:55 -0700

[diff] [blame]

return is_bad_status

else:

if self.is_retry():

return True

if is_bad_status:

return not self.is_aborted() or self.hit_timeout()

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1011

1012

Shuqian Zhao

2015-08-05 22:56:30 -0700

[diff] [blame]

1013

def get_control_file_attributes(self):

1014

"""Get the attributes from the control file of the test.

1015

1016

@returns: A list of test attribute or None.

1017

"""

1018

control_file = self.afe_job.control_file

1019

attributes = None

1020

if control_file:

1021

cd = control_data.parse_control_string(control_file)

1022

attributes = list(cd.attributes)

return attributes

David Riley

2017-03-01 23:15:08 -0800

[diff] [blame]

1026

def override_afe_job_id(self, afe_job_id):

1027

"""Overrides the AFE job id for the test.

1028

1029

@param afe_job_id: The new AFE job id to use.

1030

"""

1031

self.view['afe_job_id'] = afe_job_id

1032

1033

Allen Li

2016-09-14 19:05:47 -0700

[diff] [blame]

1034

def log_buildbot_links(log_func, links):

1035

"""Output buildbot links to log.

1036

1037

@param log_func: Logging function to use.

1038

@param links: Iterable of LogLink instances.

1039

"""

1040

for link in links:

1041

for generated_link in link.GenerateBuildbotLinks():

1042

log_func(generated_link)

David Riley

a0cd1c2

2017-07-10 11:15:57 -0700

[diff] [blame]

1043

wmatrix_retry_link = link.GenerateWmatrixRetryLink()

1044

if wmatrix_retry_link:

1045

log_func(wmatrix_retry_link)

1046

wmatrix_history_link = link.GenerateWmatrixHistoryLink()

1047

if wmatrix_history_link:

1048

log_func(wmatrix_history_link)

Allen Li

2016-09-14 19:05:47 -0700

[diff] [blame]

1049

1050

Allen Li

2017-07-05 13:38:04 -0700

[diff] [blame]

1051

class _ReturnCodeComputer(object):

1052

1053

def __init__(self, ignore_test_results=False):

1054

"""Initialize instance.

1055

1056

If ignore_test_results is True, don't check the test jobs for

1057

failure.

1058

"""

1059

self._ignore_test_results = ignore_test_results

1060

1061

def __call__(self, test_views):

1062

"""Compute the exit code based on test results."""

1063

code = RETURN_CODES.OK

1064

tests_passed_after_retry = False

1065

1066

for v in test_views:

1067

# The order of checking each case is important.

1068

if v.get_testname() == TestView.SUITE_JOB:

1069

if v.is_aborted() and v.hit_timeout():

1070

current_code = RETURN_CODES.SUITE_TIMEOUT

1071

elif v.is_in_fail_status():

1072

current_code = RETURN_CODES.INFRA_FAILURE

1073

elif v['status'] == 'WARN':

1074

current_code = RETURN_CODES.WARNING

1075

else:

1076

current_code = RETURN_CODES.OK

1077

elif self._ignore_test_results:

1078

pass

1079

else:

1080

if v.is_aborted() and v.is_relevant_suite_view():

1081

# The test was aborted before started

1082

# This gurantees that the suite has timed out.

1083

current_code = RETURN_CODES.SUITE_TIMEOUT

1084

elif v.is_aborted() and not v.hit_timeout():

1085

# The test was aborted, but

1086

# not due to a timeout. This is most likely

1087

# because the suite has timed out, but may

1088

# also because it was aborted by the user.

1089

# Since suite timing out is determined by checking

1090

# the suite job view, we simply ignore this view here.

1091

current_code = RETURN_CODES.OK

1092

elif v.is_in_fail_status():

1093

# The test job failed.

1094

if v.is_infra_test():

1095

current_code = RETURN_CODES.INFRA_FAILURE

1096

else:

1097

current_code = RETURN_CODES.ERROR

1098

elif v['status'] == 'WARN':

1099

# The test/suite job raised a wanrning.

1100

current_code = RETURN_CODES.WARNING

1101

elif v.is_retry():

1102

# The test is a passing retry.

1103

current_code = RETURN_CODES.WARNING

1104

tests_passed_after_retry = True

1105

else:

1106

current_code = RETURN_CODES.OK

1107

code = get_worse_code(code, current_code)

1108

1109

return code, _get_return_msg(code, tests_passed_after_retry)

1110

1111

1112

def _get_return_msg(code, tests_passed_after_retry):

1113

"""Return the proper message for a given return code.

1114

1115

@param code: An enum value of RETURN_CODES

1116

@param test_passed_after_retry: True/False, indicating

1117

whether there are test(s) that have passed after retry.

1118

1119

@returns: A string, representing the message.

1120

1121

"""

1122

if code == RETURN_CODES.INFRA_FAILURE:

1123

return 'Suite job failed or provisioning failed.'

1124

elif code == RETURN_CODES.SUITE_TIMEOUT:

1125

return ('Some test(s) was aborted before running,'

1126

' suite must have timed out.')

1127

elif code == RETURN_CODES.WARNING:

1128

if tests_passed_after_retry:

1129

return 'Some test(s) passed after retry.'

1130

else:

1131

return 'Some test(s) raised a warning.'

1132

elif code == RETURN_CODES.ERROR:

1133

return 'Some test(s) failed.'

else:

return ''

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1138

class ResultCollector(object):

Simran Basi

2015-10-14 19:05:00 -0700

[diff] [blame]

1139

"""Collect test results of a suite or a single test run.

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1140

1141

Once a suite job has finished, use this class to collect test results.

1142

`run` is the core method that is to be called first. Then the caller

1143

could retrieve information like return code, return message, is_aborted,

1144

and timings by accessing the collector's public attributes. And output

1145

the test results and links by calling the 'output_*' methods.

1146

1147

Here is a overview of what `run` method does.

1148

1149

1) Collect the suite job's results from tko_test_view_2.

1150

For the suite job, we only pull test views without a 'subdir'.

1151

A NULL subdir indicates that the test was _not_ executed. This could be

1152

that no child job was scheduled for this test or the child job got

1153

aborted before starts running.

1154

(Note 'SERVER_JOB'/'CLIENT_JOB' are handled specially)

1155

1156

2) Collect the child jobs' results from tko_test_view_2.

1157

For child jobs, we pull all the test views associated with them.

Allen Li

2016-09-14 19:05:47 -0700

[diff] [blame]

1158

(Note 'SERVER_JOB'/'CLIENT_JOB' are handled specially)

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1159

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1160

3) Generate web and buildbot links.

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1161

4) Compute timings of the suite run.

1162

5) Compute the return code based on test results.

1163

1164

@var _instance_server: The hostname of the server that is used

1165

to service the suite.

1166

@var _afe: The afe rpc client.

1167

@var _tko: The tko rpc client.

1168

@var _build: The build for which the suite is run,

1169

e.g. 'lumpy-release/R35-5712.0.0'

MK Ryu

2014-10-21 11:58:09 -0700

[diff] [blame]

1170

@var _board: The target board for which the suite is run,

1171

e.g., 'lumpy', 'link'.

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1172

@var _suite_name: The suite name, e.g. 'bvt', 'dummy'.

1173

@var _suite_job_id: The job id of the suite for which we are going to

1174

collect results.

Shuqian Zhao

f39bf2a

2015-09-29 14:19:28 -0700

[diff] [blame]

1175

@var _original_suite_name: The suite name we record timing would be

1176

different from _suite_name when running

1177

suite_attr_wrapper.

Allen Li

2017-07-05 13:38:04 -0700

[diff] [blame]

1178

@var _return_code_function: Called to return what the overall result of

1179

the suite is.

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1180

@var _suite_views: A list of TestView objects, representing relevant

1181

test views of the suite job.

1182

@var _child_views: A list of TestView objects, representing test views

1183

of the child jobs.

1184

@var _test_views: A list of TestView objects, representing all test views

1185

from _suite_views and _child_views.

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1186

@var _web_links: A list of web links pointing to the results of jobs.

1187

@var _buildbot_links: A list of buildbot links for non-passing tests.

Simran Basi

2015-10-14 19:05:00 -0700

[diff] [blame]

1188

@var _solo_test_run: True if this is a single test run.

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1189

@var return_code: The exit code that should be returned by run_suite.

1190

@var return_message: Any message that should be displayed to explain

1191

the return code.

1192

@var is_aborted: Whether the suite was aborted or not.

1193

True, False or None (aborting status is unknown yet)

1194

@var timings: A Timing object that records the suite's timings.

"""

MK Ryu

2014-10-21 11:58:09 -0700

[diff] [blame]

1199

def __init__(self, instance_server, afe, tko, build, board,

Allen Li

2017-07-05 13:38:04 -0700

[diff] [blame]

1200

suite_name, suite_job_id,

1201

return_code_function,

1202

original_suite_name=None,

Simran Basi

2015-10-14 19:05:00 -0700

[diff] [blame]

1203

user=None, solo_test_run=False):

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1204

self._instance_server = instance_server

1205

self._afe = afe

1206

self._tko = tko

1207

self._build = build

MK Ryu

2014-10-21 11:58:09 -0700

[diff] [blame]

1208

self._board = board

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1209

self._suite_name = suite_name

1210

self._suite_job_id = suite_job_id

Shuqian Zhao

f39bf2a

2015-09-29 14:19:28 -0700

[diff] [blame]

1211

self._original_suite_name = original_suite_name or suite_name

Allen Li

2017-07-05 13:38:04 -0700

[diff] [blame]

1212

self._return_code_function = return_code_function

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1213

self._suite_views = []

1214

self._child_views = []

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1215

self._test_views = []

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1216

self._retry_counts = {}

David Riley

2017-03-01 23:15:08 -0800

[diff] [blame]

1217

self._missing_results = {}

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1218

self._web_links = []

1219

self._buildbot_links = []

MK Ryu

2014-10-21 11:58:09 -0700

[diff] [blame]

1220

self._num_child_jobs = 0

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1221

self.return_code = None

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1222

self.return_message = ''

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1223

self.is_aborted = None

1224

self.timings = None

Simran Basi

2015-10-12 15:36:45 -0700

[diff] [blame]

1225

self._user = user or getpass.getuser()

Simran Basi

2015-10-14 19:05:00 -0700

[diff] [blame]

1226

self._solo_test_run = solo_test_run

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1227

1228

Allen Li

2016-09-14 19:05:47 -0700

[diff] [blame]

1229

@property

1230

def buildbot_links(self):

1231

"""Provide public access to buildbot links."""

1232

return self._buildbot_links

1233

1234

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1235

def _fetch_relevant_test_views_of_suite(self):

1236

"""Fetch relevant test views of the suite job.

1237

1238

For the suite job, there will be a test view for SERVER_JOB, and views

Allen Li

2016-09-14 19:05:47 -0700

[diff] [blame]

1239

for results of its child jobs. For example, assume we've created

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1240

a suite job (afe_job_id: 40) that runs dummy_Pass, dummy_Fail,

1241

dummy_Pass.bluetooth. Assume dummy_Pass was aborted before running while

1242

dummy_Path.bluetooth got TEST_NA as no duts have bluetooth.

1243

So the suite job's test views would look like

1244

_____________________________________________________________________

1245

1246

10 | 1000 |SERVER_JOB |---- |40 |GOOD

1247

11 | 1000 |dummy_Pass |NULL |40 |ABORT

1248

12 | 1000 |dummy_Fail.Fail |41-onwer/...|40 |FAIL

1249

13 | 1000 |dummy_Fail.Error |42-owner/...|40 |ERROR

1250

14 | 1000 |dummy_Pass.bluetooth|NULL |40 |TEST_NA

1251

1252

For a suite job, we only care about

1253

a) The test view for the suite job's SERVER_JOB

1254

b) The test views for real tests without a subdir. A NULL subdir

1255

indicates that a test didn't get executed.

1256

So, for the above example, we only keep test views whose test_idxs

1257

are 10, 11, 14.

1258

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1259

@returns: A list of TestView objects, representing relevant

1260

test views of the suite job.

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1261

1262

"""

Fang Deng

2014-06-12 18:21:55 -0700

[diff] [blame]

1263

suite_job = self._afe.get_jobs(id=self._suite_job_id)[0]

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1264

views = self._tko.run(call='get_detailed_test_views',

1265

afe_job_id=self._suite_job_id)

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1266

relevant_views = []

1267

for v in views:

Simran Basi

2015-10-14 19:05:00 -0700

[diff] [blame]

1268

v = TestView(v, suite_job, self._suite_name, self._build, self._user,

1269

solo_test_run=self._solo_test_run)

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1270

if v.is_relevant_suite_view():

David Riley

2017-03-01 23:15:08 -0800

[diff] [blame]

1271

# If the test doesn't have results in TKO and is being

1272

# displayed in the suite view instead of the child view,

1273

# then afe_job_id is incorrect and from the suite.

1274

# Override it based on the AFE job id which was missing

1275

# results.

1276

# TODO: This is likely inaccurate if a test has multiple

1277

# tries which all fail TKO parse stage.

1278

if v['test_name'] in self._missing_results:

1279

v.override_afe_job_id(

1280

self._missing_results[v['test_name']][0])

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1281

relevant_views.append(v)

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1282

return relevant_views

1283

1284

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1285

def _compute_retry_count(self, view):

1286

"""Return how many times the test has been retried.

1287

1288

@param view: A TestView instance.

1289

@returns: An int value indicating the retry count.

1290

1291

"""

1292

old_job = view['job_keyvals'].get('retry_original_job_id')

count = 0

while old_job:

count += 1

views = self._tko.run(

1297

call='get_detailed_test_views', afe_job_id=old_job)

1298

old_job = (views[0]['job_keyvals'].get('retry_original_job_id')

if views else None)

return count

Simran Basi

2015-10-14 19:05:00 -0700

[diff] [blame]

1303

def _fetch_test_views_of_child_jobs(self, jobs=None):

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1304

"""Fetch test views of child jobs.

1305

David Riley

2017-03-01 23:15:08 -0800

[diff] [blame]

1306

@returns: A tuple (child_views, retry_counts, missing_results)

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1307

child_views is list of TestView objects, representing

David Riley

2017-03-01 23:15:08 -0800

[diff] [blame]

1308

all valid views.

1309

retry_counts is a dictionary that maps test_idx to retry

1310

counts. It only stores retry counts that are greater than 0.

1311

missing_results is a dictionary that maps test names to

1312

lists of job ids.

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1313

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1314

"""

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1315

child_views = []

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1316

retry_counts = {}

David Riley

2017-03-01 23:15:08 -0800

[diff] [blame]

1317

missing_results = {}

Simran Basi

2015-10-14 19:05:00 -0700

[diff] [blame]

1318

child_jobs = jobs or self._afe.get_jobs(parent_job_id=self._suite_job_id)

MK Ryu

2014-10-21 11:58:09 -0700

[diff] [blame]

1319

if child_jobs:

1320

self._num_child_jobs = len(child_jobs)

Fang Deng

2014-06-12 18:21:55 -0700

[diff] [blame]

1321

for job in child_jobs:

Simran Basi

2015-10-12 15:36:45 -0700

[diff] [blame]

1322

views = [TestView(v, job, self._suite_name, self._build, self._user)

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1323

for v in self._tko.run(

Fang Deng

2014-06-12 18:21:55 -0700

[diff] [blame]

1324

call='get_detailed_test_views', afe_job_id=job.id,

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1325

invalid=0)]

David Riley

2017-03-01 23:15:08 -0800

[diff] [blame]

1326

if len(views) == 0:

1327

missing_results.setdefault(job.name, []).append(job.id)

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1328

contains_test_failure = any(

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1329

v.is_test() and v['status'] != 'GOOD' for v in views)

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1330

for v in views:

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1331

if (v.is_test() or

1332

v['status'] != 'GOOD' and not contains_test_failure):

1333

# For normal test view, just keep it.

1334

# For SERVER_JOB or CLIENT_JOB, only keep it

1335

# if it fails and no other test failure.

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1336

child_views.append(v)

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1337

retry_count = self._compute_retry_count(v)

1338

if retry_count > 0:

1339

retry_counts[v['test_idx']] = retry_count

David Riley

2017-03-01 23:15:08 -0800

[diff] [blame]

1340

return child_views, retry_counts, missing_results

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1341

1342

1343

def _generate_web_and_buildbot_links(self):

1344

"""Generate web links and buildbot links."""

1345

# TODO(fdeng): If a job was aborted before it reaches Running

1346

# state, we read the test view from the suite job

1347

# and thus this method generates a link pointing to the

1348

# suite job's page for the aborted job. Need a fix.

1349

self._web_links = []

1350

self._buildbot_links = []

1351

# Bug info are stored in the suite job's keyvals.

Simran Basi

2015-10-14 19:05:00 -0700

[diff] [blame]

1352

if self._solo_test_run:

1353

suite_job_keyvals = {}

1354

else:

1355

suite_job_keyvals = self._suite_views[0]['job_keyvals']

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1356

for v in self._test_views:

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1357

retry_count = self._retry_counts.get(v['test_idx'], 0)

1358

bug_info = v.get_bug_info(suite_job_keyvals)

1359

job_id_owner = v.get_job_id_owner_str()

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1360

link = LogLink(

Allen Li

2016-09-02 11:52:34 -0700

[diff] [blame]

1361

anchor=v.get_testname(),

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1362

server=self._instance_server,

1363

job_string=job_id_owner,

Simran Basi

2015-02-03 15:50:18 -0800

[diff] [blame]

1364

bug_info=bug_info, retry_count=retry_count,

1365

testname=v.get_testname())

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1366

self._web_links.append(link)

1367

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1368

if v.should_display_buildbot_link():

1369

link.reason = v.get_buildbot_link_reason()

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1370

self._buildbot_links.append(link)

1371

1372

1373

def _record_timings(self):

1374

"""Record suite timings."""

1375

self.timings = Timings(self._suite_job_id)

1376

for v in self._test_views:

1377

self.timings.RecordTiming(v)

1378

1379

1380

def _compute_return_code(self):

1381

"""Compute the exit code based on test results."""

Allen Li

2017-07-05 13:38:04 -0700

[diff] [blame]

1382

return_code, message = self._return_code_function(self._test_views)

1383

self.return_code = return_code

1384

self.return_message = message

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1385

1386

Allen Li

2016-09-02 11:52:34 -0700

[diff] [blame]

1387

def _make_test_results(self):

1388

"""Make TestResults for collected tests.

1389

1390

@returns: List of TestResult instances.

1391

"""

1392

test_results = []

1393

for test_view in self._test_views:

1394

test_result = TestResult(

1395

test_view=test_view,

1396

retry_count=self._retry_counts.get(test_view['test_idx'], 0))

1397

test_results.append(test_result)

return test_results

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1401

def output_results(self):

1402

"""Output test results, timings and web links."""

1403

# Output test results

Allen Li

2016-09-02 11:52:34 -0700

[diff] [blame]

1404

test_results = self._make_test_results()

1405

max_name_length = max(len(test_result.name)

1406

for test_result in test_results)

1407

for test_result in test_results:

1408

test_result.log_using(logging.info, max_name_length + 3)

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1409

# Output suite timings

1410

logging.info(self.timings)

1411

# Output links to test logs

1412

logging.info('\nLinks to test logs:')

1413

for link in self._web_links:

Allen Li

2016-09-14 15:19:20 -0700

[diff] [blame]

1414

logging.info(link.text_link)

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1415

logging.info('\n')

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1416

1417

Shuqian Zhao

2015-08-05 22:56:30 -0700

[diff] [blame]

1418

def get_results_dict(self):

1419

"""Write test results, timings and web links into a dict.

1420

1421

@returns: A dict of results in the format like:

1422

{

1423

'tests': {

1424

'test_1': {'status': 'PASSED', 'attributes': [1,2], ...}

1425

'test_2': {'status': 'FAILED', 'attributes': [1],...}

1426

}

1427

'suite_timings': {

1428

'download_start': '1998-07-17 00:00:00',

1429

'payload_download_end': '1998-07-17 00:00:05',

...

}

}

"""

output_dict = {}

tests_dict = output_dict.setdefault('tests', {})

1436

for v in self._test_views:

Shuqian Zhao

fae149c

2017-01-30 16:46:53 -0800

[diff] [blame]

1437

test_name = v.get_testname()

1438

test_info = tests_dict.setdefault(test_name, {})

1439

test_info.update({

1440

'status': v['status'],

1441

'attributes': v.get_control_file_attributes() or list(),

1442

'reason': v['reason'],

1443

'retry_count': self._retry_counts.get(v['test_idx'], 0),

1444

})

1445

# For aborted test, the control file will not be parsed and thus

1446

# fail to get the attributes info. Therefore, the subsystems the

1447

# abort test testing will be missing. For this case, we will assume

1448

# the aborted test will test all subsystems, set subsystem:default.

1449

if (test_info['status'] == 'ABORT' and

1450

not any('subsystem:' in a for a in test_info['attributes'])):

1451

test_info['attributes'].append('subsystem:default')

Shuqian Zhao

2015-08-05 22:56:30 -0700

[diff] [blame]

1452

1453

# Write the links to test logs into the |tests_dict| of |output_dict|.

1454

# For test whose status is not 'GOOD', the link is also buildbot_link.

1455

for link in self._web_links:

Shuqian Zhao

fae149c

2017-01-30 16:46:53 -0800

[diff] [blame]

1456

test_name = link.anchor.strip()

1457

test_info = tests_dict.get(test_name)

1458

if test_info:

1459

test_info['link_to_logs'] = link.url

1460

# Write the wmatrix link into the dict.

1461

if link in self._buildbot_links and link.testname:

1462

test_info['wmatrix_link'] \

1463

= reporting_utils.link_retry_url(link.testname)

1464

# Write the bug url into the dict.

1465

if link.bug_id:

1466

test_info['bug_url'] = link.bug_url

Shuqian Zhao

2015-08-05 22:56:30 -0700

[diff] [blame]

1467

1468

# Write the suite timings into |output_dict|

Allen Li

2c5d44b

2016-08-15 17:58:58 -0700

[diff] [blame]

1469

timings = self.timings

1470

if timings is not None:

1471

time_dict = output_dict.setdefault('suite_timings', {})

1472

time_dict.update({

1473

'download_start' : str(timings.download_start_time),

1474

'payload_download_end' : str(timings.payload_end_time),

1475

'suite_start' : str(timings.suite_start_time),

1476

'artifact_download_end' : str(timings.artifact_end_time),

1477

'tests_start' : str(timings.tests_start_time),

1478

'tests_end' : str(timings.tests_end_time),

1479

})

Shuqian Zhao

2015-08-05 22:56:30 -0700

[diff] [blame]

1480

1481

output_dict['suite_job_id'] = self._suite_job_id

return output_dict

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1486

def run(self):

1487

"""Collect test results.

1488

1489

This method goes through the following steps:

1490

Fetch relevent test views of the suite job.

1491

Fetch test views of child jobs

1492

Check whether the suite was aborted.

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1493

Generate links.

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1494

Calculate suite timings.

1495

Compute return code based on the test result.

1496

1497

"""

Simran Basi

2015-10-14 19:05:00 -0700

[diff] [blame]

1498

if self._solo_test_run:

Allen Li

29f5e24

2017-07-10 15:00:57 -0700

[diff] [blame]

1499

self._test_views, self._retry_counts, self._missing_results = (

Simran Basi

2015-10-14 19:05:00 -0700

[diff] [blame]

1500

self._fetch_test_views_of_child_jobs(

1501

jobs=self._afe.get_jobs(id=self._suite_job_id)))

1502

else:

David Riley

2017-03-01 23:15:08 -0800

[diff] [blame]

1503

self._child_views, self._retry_counts, self._missing_results = (

Simran Basi

2015-10-14 19:05:00 -0700

[diff] [blame]

1504

self._fetch_test_views_of_child_jobs())

David Riley

2017-03-01 23:15:08 -0800

[diff] [blame]

1505

self._suite_views = self._fetch_relevant_test_views_of_suite()

Simran Basi

2015-10-14 19:05:00 -0700

[diff] [blame]

1506

self._test_views = self._suite_views + self._child_views

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1507

# For hostless job in Starting status, there is no test view associated.

1508

# This can happen when a suite job in Starting status is aborted. When

1509

# the scheduler hits some limit, e.g., max_hostless_jobs_per_drone,

1510

# max_jobs_started_per_cycle, a suite job can stays in Starting status.

1511

if not self._test_views:

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1512

self.return_code = RETURN_CODES.INFRA_FAILURE

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1513

self.return_message = 'No test view was found.'

1514

return

1515

self.is_aborted = any([view['job_keyvals'].get('aborted_by')

1516

for view in self._suite_views])

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1517

self._generate_web_and_buildbot_links()

1518

self._record_timings()

1519

self._compute_return_code()

1520

1521

MK Ryu

2014-10-21 11:58:09 -0700

[diff] [blame]

1522

def gather_timing_stats(self):

1523

"""Collect timing related statistics."""

MK Ryu

2014-10-21 11:58:09 -0700

[diff] [blame]

1524

# Record suite runtime in metadata db.

Prathmesh Prabhu

a3713a0

2015-03-11 13:50:55 -0700

[diff] [blame]

1525

# Some failure modes can leave times unassigned, report sentinel value

1526

# in that case.

1527

runtime_in_secs = -1

1528

if (self.timings.tests_end_time is not None and

1529

self.timings.suite_start_time is not None):

Dan Shi

0723bf5

2015-06-24 10:52:38 -0700

[diff] [blame]

1530

runtime_in_secs = (self.timings.tests_end_time -

1531

self.timings.suite_start_time).total_seconds()

Prathmesh Prabhu

a3713a0

2015-03-11 13:50:55 -0700

[diff] [blame]

1532

MK Ryu

2014-10-21 11:58:09 -0700

[diff] [blame]

1533

job_overhead.record_suite_runtime(self._suite_job_id, self._suite_name,

1534

self._board, self._build, self._num_child_jobs, runtime_in_secs)

1535

1536

Allen Li

2017-07-05 13:38:04 -0700

[diff] [blame]

1537

Allen Li

2016-12-08 13:51:31 -0800

[diff] [blame]

1538

def _make_builds_from_options(options):

1539

"""Create a dict of builds for creating a suite job.

Prashanth B

2014-05-08 18:01:27 -0700

[diff] [blame]

1540

Allen Li

2016-12-08 13:51:31 -0800

[diff] [blame]

1541

The returned dict maps version label prefixes to build names. Together,

1542

each key-value pair describes a complete label.

Prashanth B

2014-05-08 18:01:27 -0700

[diff] [blame]

1543

Allen Li

2016-12-08 13:51:31 -0800

[diff] [blame]

1544

@param options: SimpleNamespace from argument parsing.

1545

1546

@return: dict mapping version label prefixes to build names

Prashanth B

2014-05-08 18:01:27 -0700

[diff] [blame]

1547

"""

Dan Shi

2014-10-10 13:38:51 -0700

[diff] [blame]

1548

builds = {}

1549

if options.build:

Richard Barnette

6c2b70a

2017-01-26 13:40:51 -0800

[diff] [blame]

1550

prefix = provision.get_version_label_prefix(options.build)

1551

builds[prefix] = options.build

Dan Shi

0723bf5

2015-06-24 10:52:38 -0700

[diff] [blame]

1552

if options.firmware_rw_build:

1553

builds[provision.FW_RW_VERSION_PREFIX] = options.firmware_rw_build

Dan Shi

2014-10-10 13:38:51 -0700

[diff] [blame]

1554

if options.firmware_ro_build:

1555

builds[provision.FW_RO_VERSION_PREFIX] = options.firmware_ro_build

Allen Li

2016-12-08 13:51:31 -0800

[diff] [blame]

return builds

@retry.retry(error.StageControlFileFailure, timeout_min=10)

1560

def create_suite(afe, options):

1561

"""Create a suite with retries.

1562

1563

@param afe: The afe object to insert the new suite job into.

1564

@param options: The options to use in creating the suite.

1565

1566

@return: The afe_job_id of the new suite job.

1567

"""

Prashanth B

2014-05-08 18:01:27 -0700

[diff] [blame]

1568

logging.info('%s Submitted create_suite_job rpc',

1569

diagnosis_utils.JobTimer.format_time(datetime.now()))

Allen Li

2016-12-08 12:50:22 -0800

[diff] [blame]

return afe.run(

'create_suite_job',

name=options.name,

board=options.board,

Allen Li

2016-12-08 13:51:31 -0800

[diff] [blame]

1574

builds=_make_builds_from_options(options),

Allen Li

2016-12-08 12:50:22 -0800

[diff] [blame]

1575

test_source_build=options.test_source_build,

Allen Li

0fd0889

2016-12-08 13:47:38 -0800

[diff] [blame]

1576

check_hosts=not options.no_wait,

Allen Li

2016-12-08 12:50:22 -0800

[diff] [blame]

1577

pool=options.pool,

1578

num=options.num,

Allen Li

d3758d4

2016-12-08 13:46:17 -0800

[diff] [blame]

1579

file_bugs=options.file_bugs,

Allen Li

603728a

2016-12-08 13:58:11 -0800

[diff] [blame]

1580

priority=options.priority,

Allen Li

2016-12-08 12:50:22 -0800

[diff] [blame]

1581

suite_args=options.suite_args,

Allen Li

0fd0889

2016-12-08 13:47:38 -0800

[diff] [blame]

1582

wait_for_results=not options.no_wait,

Allen Li

2016-12-08 12:50:22 -0800

[diff] [blame]

1583

timeout_mins=options.timeout_mins + options.delay_minutes,

1584

max_runtime_mins=options.max_runtime_mins + options.delay_minutes,

1585

job_retry=options.retry,

1586

max_retries=options.max_retries,

1587

suite_min_duts=options.suite_min_duts,

Allen Li

40599a3

2016-12-08 13:23:35 -0800

[diff] [blame]

1588

offload_failures_only=options.offload_failures_only,

Allen Li

2016-12-08 12:50:22 -0800

[diff] [blame]

1589

run_prod_code=options.run_prod_code,

1590

delay_minutes=options.delay_minutes,

Shuqian Zhao

843ae5c7

2017-02-22 11:25:01 -0800

[diff] [blame]

1591

job_keyvals=options.job_keyvals,

Shuqian Zhao

ed0da86

2017-03-06 14:47:13 -0800

[diff] [blame]

1592

test_args=options.test_args,

Allen Li

2016-12-08 12:50:22 -0800

[diff] [blame]

1593

)

Prashanth B

2014-05-08 18:01:27 -0700

[diff] [blame]

1594

1595

Allen Li

2017-07-10 14:58:16 -0700

[diff] [blame]

1596

class SuiteResult(namedtuple('SuiteResult', ['return_code', 'output_dict'])):

1597

"""Result of running a suite to return."""

1598

1599

def __new__(cls, return_code, output_dict):

1600

output_dict = output_dict.copy()

1601

output_dict['return_cde'] = return_code

1602

return super(SuiteResult, cls).__new__(cls, return_code, output_dict)

Allen Li

2016-08-16 14:19:08 -0700

[diff] [blame]

1603

1604

Allen Li

2017-07-05 14:24:18 -0700

[diff] [blame]

1605

def _run_suite(options):

Aviv Keshet

1480c4a

2013-03-21 16:38:31 -0700

[diff] [blame]

1606

"""

Shuqian Zhao

2015-08-05 22:56:30 -0700

[diff] [blame]

1607

run_suite script without exception handling.

Shuqian Zhao

d235107

2015-08-06 01:48:23 +0000

[diff] [blame]

1608

Shuqian Zhao

2015-08-05 22:56:30 -0700

[diff] [blame]

1609

@param options: The parsed options.

1610

1611

@returns: A tuple contains the return_code of run_suite and the dictionary

1612

of the output.

1613

1614

"""

Shuqian Zhao

ab1bedc

2015-06-02 11:12:28 -0700

[diff] [blame]

1615

# If indicate to use the new style suite control file, convert the args

1616

if options.use_suite_attr:

1617

options = change_options_for_suite_attr(options)

1618

xixuan

99eba0b

2017-07-12 15:10:01 -0700

[diff] [blame]

1619

log_name = _get_log_name(options)

1620

utils.setup_logging(logfile=log_name)

Alex Miller

88762a8

2013-09-04 15:41:28 -0700

[diff] [blame]

1621

John Carey

1425d29

2016-09-30 15:25:09 -0700

[diff] [blame]

1622

if not options.bypass_labstatus and not options.web:

Fang Deng

2014-09-25 10:18:48 -0700

[diff] [blame]

1623

utils.check_lab_status(options.build)

xixuan

2017-06-29 15:40:19 -0700

[diff] [blame]

1624

1625

afe = _create_afe(options)

1626

instance_server = afe.server

Chris Masone

359c0fd

2012-03-13 15:18:59 -0700

[diff] [blame]

1627

Dan Shi

20952c1

2014-05-14 17:07:38 -0700

[diff] [blame]

1628

rpc_helper = diagnosis_utils.RPCHelper(afe)

Fang Deng

2015-02-20 14:49:47 -0800

[diff] [blame]

1629

is_real_time = True

Chris Masone

986459e

2012-04-11 11:36:48 -0700

[diff] [blame]

1630

if options.mock_job_id:

1631

job_id = int(options.mock_job_id)

Fang Deng

2015-02-20 14:49:47 -0800

[diff] [blame]

1632

existing_job = afe.get_jobs(id=job_id, finished=True)

if existing_job:

is_real_time = False

else:

existing_job = afe.get_jobs(id=job_id)

1637

if existing_job:

1638

job_created_on = time_utils.date_string_to_epoch_time(

1639

existing_job[0].created_on)

1640

else:

1641

raise utils.TestLabException('Failed to retrieve job: %d' % job_id)

Chris Masone

986459e

2012-04-11 11:36:48 -0700

[diff] [blame]

1642

else:

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1643

try:

Fang Deng

2015-02-20 14:49:47 -0800

[diff] [blame]

1644

rpc_helper.check_dut_availability(options.board, options.pool,

Ningning Xia

f2c206c

2016-04-13 14:15:51 -0700

[diff] [blame]

1645

options.minimum_duts,

1646

options.skip_duts_check)

Prashanth B

2014-05-08 18:01:27 -0700

[diff] [blame]

1647

job_id = create_suite(afe, options)

Fang Deng

2015-02-20 14:49:47 -0800

[diff] [blame]

1648

job_created_on = time.time()

Allen Li

2016-08-18 12:09:32 -0700

[diff] [blame]

1649

except diagnosis_utils.NotEnoughDutsError as e:

1650

e.add_suite_name(options.name)

1651

e.add_build(options.test_source_build)

1652

pool_health_bug = reporting.PoolHealthBug(e)

1653

bug_id = reporting.Reporter().report(pool_health_bug).bug_id

1654

if bug_id is not None:

Allen Li

2016-09-14 15:19:20 -0700

[diff] [blame]

1655

logging.info(annotations.StepLink(

1656

text='Pool Health Bug',

1657

url=reporting_utils.link_crbug(bug_id)))

Allen Li

2016-08-18 12:09:32 -0700

[diff] [blame]

1658

e.add_bug_id(bug_id)

1659

raise e

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1660

except (error.CrosDynamicSuiteException,

1661

error.RPCException, proxy.JSONRPCException) as e:

Allen Li

c3aa769

2016-08-08 11:45:00 -0700

[diff] [blame]

1662

logging.exception('Error Message: %s', e)

1663

return (RETURN_CODES.INFRA_FAILURE, {'return_message': str(e)})

Prashanth B

2014-05-08 18:01:27 -0700

[diff] [blame]

1664

except AttributeError:

Shuqian Zhao

2015-08-05 22:56:30 -0700

[diff] [blame]

1665

return (RETURN_CODES.INVALID_OPTIONS, {})

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1666

Prashanth B

2014-03-14 12:36:29 -0700

[diff] [blame]

1667

job_timer = diagnosis_utils.JobTimer(

Fang Deng

2015-02-20 14:49:47 -0800

[diff] [blame]

1668

job_created_on, float(options.timeout_mins))

Aviv Keshet

9afee5e

2014-10-09 16:33:09 -0700

[diff] [blame]

1669

job_url = reporting_utils.link_job(job_id,

1670

instance_server=instance_server)

Prashanth B

2014-03-14 12:36:29 -0700

[diff] [blame]

1671

logging.info('%s Created suite job: %s',

1672

job_timer.format_time(job_timer.job_created_time),

Aviv Keshet

9afee5e

2014-10-09 16:33:09 -0700

[diff] [blame]

1673

job_url)

Allen Li

2016-09-14 15:19:20 -0700

[diff] [blame]

1674

logging.info(annotations.StepLink(

1675

text='Link to suite',

1676

url=job_url))

Aviv Keshet

db321de

2015-04-10 19:09:58 -0700

[diff] [blame]

1677

1678

if options.create_and_return:

Shuqian Zhao

2015-08-05 22:56:30 -0700

[diff] [blame]

1679

msg = '--create_and_return was specified, terminating now.'

1680

logging.info(msg)

1681

return (RETURN_CODES.OK, {'return_message':msg})

Aviv Keshet

db321de

2015-04-10 19:09:58 -0700

[diff] [blame]

1682

Allen Li

2016-09-14 14:44:59 -0700

[diff] [blame]

1683

if options.no_wait:

Allen Li

2016-08-16 14:19:08 -0700

[diff] [blame]

1684

return _handle_job_nowait(job_id, options, instance_server)

Allen Li

2016-09-14 14:44:59 -0700

[diff] [blame]

1685

else:

1686

return _handle_job_wait(afe, job_id, options, job_timer, is_real_time)

Allen Li

2016-08-16 14:19:08 -0700

[diff] [blame]

1687

1688

xixuan

99eba0b

2017-07-12 15:10:01 -0700

[diff] [blame]

1689

def _get_log_name(options):

1690

"""Return local log file's name.

1691

1692

@param options: Parsed options.

1693

1694

@return log_name, a string file name.

1695

"""

1696

if options.require_logfile:

1697

# options.build is verified to exist in verify_options.

1698

# convert build name from containing / to containing only _.

1699

log_name = 'run_suite-%s.log' % options.build.replace('/', '_')

1700

log_dir = os.path.join(common.autotest_dir, 'logs')

1701

if os.path.exists(log_dir):

1702

log_name = os.path.join(log_dir, log_name)

return log_name

else:

return None

xixuan

2017-06-29 15:40:19 -0700

[diff] [blame]

1709

def _create_afe(options):

1710

"""Return an afe instance based on options.

1711

1712

@param options Parsed options.

1713

1714

@return afe, an AFE instance.

1715

"""

1716

instance_server = (options.web if options.web else

1717

instance_for_pool(options.pool))

1718

afe = frontend_wrappers.RetryingAFE(server=instance_server,

1719

timeout_min=options.afe_timeout_mins,

1720

delay_sec=options.delay_sec)

1721

logging.info('Autotest instance created: %s', instance_server)

return afe

Allen Li

2016-08-16 14:19:08 -0700

[diff] [blame]

1725

def _handle_job_wait(afe, job_id, options, job_timer, is_real_time):

1726

"""Handle suite job synchronously.

1727

1728

@param afe AFE instance.

1729

@param job_id Suite job id.

1730

@param options Parsed options.

1731

@param job_timer JobTimer for suite job.

1732

@param is_real_time Whether or not to handle job timeout.

1733

1734

@return SuiteResult of suite job.

1735

"""

Allen Li

2016-08-16 14:19:08 -0700

[diff] [blame]

1736

rpc_helper = diagnosis_utils.RPCHelper(afe)

1737

instance_server = afe.server

1738

while not afe.get_jobs(id=job_id, finished=True):

Allen Li

2017-07-10 15:14:20 -0700

[diff] [blame^]

1739

_poke_buildbot_with_output(afe, job_id, job_timer)

Allen Li

2016-08-16 14:19:08 -0700

[diff] [blame]

1740

if job_timer.debug_output_timer.poll():

1741

logging.info('The suite job has another %s till timeout.',

Allen Li

2017-07-10 15:14:20 -0700

[diff] [blame^]

1742

job_timer.timeout_hours - job_timer.elapsed_time())

Allen Li

2016-08-16 14:19:08 -0700

[diff] [blame]

1743

time.sleep(10)

xixuan

2017-01-13 12:51:22 +0800

[diff] [blame]

1744

logging.info('%s Suite job is finished.',

1745

diagnosis_utils.JobTimer.format_time(datetime.now()))

Allen Li

2016-08-16 14:19:08 -0700

[diff] [blame]

1746

# For most cases, ResultCollector should be able to determine whether

1747

# a suite has timed out by checking information in the test view.

1748

# However, occationally tko parser may fail on parsing the

1749

# job_finished time from the job's keyval file. So we add another

1750

# layer of timeout check in run_suite. We do the check right after

1751

# the suite finishes to make it as accurate as possible.

1752

# There is a minor race condition here where we might have aborted

1753

# for some reason other than a timeout, and the job_timer thinks

1754

# it's a timeout because of the jitter in waiting for results.

1755

# The consequence would be that run_suite exits with code

1756

# SUITE_TIMEOUT while it should have returned INFRA_FAILURE

1757

# instead, which should happen very rarely.

1758

# Note the timeout will have no sense when using -m option.

1759

is_suite_timeout = job_timer.is_suite_timeout()

1760

1761

# Extract the original suite name to record timing.

1762

original_suite_name = get_original_suite_name(options.name,

Allen Li

2017-07-10 15:14:20 -0700

[diff] [blame^]

1763

options.suite_args)

Allen Li

2016-08-16 14:19:08 -0700

[diff] [blame]

1764

# Start collecting test results.

Aseda Aboagye

d72df75

2017-05-22 14:30:11 -0700

[diff] [blame]

1765

logging.info('%s Start collecting test results and dump them to json.',

xixuan

2017-01-13 12:51:22 +0800

[diff] [blame]

1766

diagnosis_utils.JobTimer.format_time(datetime.now()))

Alex Miller

c7a5952

2013-10-30 15:18:57 -0700

[diff] [blame]

1767

TKO = frontend_wrappers.RetryingTKO(server=instance_server,

Simran Basi

25effe3

2013-11-26 13:02:11 -0800

[diff] [blame]

1768

timeout_min=options.afe_timeout_mins,

Chris Masone

8ac6671

2012-02-15 14:21:02 -0800

[diff] [blame]

1769

delay_sec=options.delay_sec)

Allen Li

2016-08-16 14:19:08 -0700

[diff] [blame]

1770

collector = ResultCollector(instance_server=instance_server,

1771

afe=afe, tko=TKO, build=options.build,

1772

board=options.board,

1773

suite_name=options.name,

1774

suite_job_id=job_id,

Allen Li

2017-07-05 13:38:04 -0700

[diff] [blame]

1775

# TODO(ayatane): It needs to be possible

1776

# for provision suite to pass if only a

1777

# few tests fail. Otherwise, a single

1778

# failing test will be reported as

1779

# failure even if the suite reports

1780

# success.

1781

return_code_function=_ReturnCodeComputer(

1782

ignore_test_results=(options.name

1783

== 'provision'),

1784

),

Allen Li

2016-08-16 14:19:08 -0700

[diff] [blame]

1785

original_suite_name=original_suite_name)

1786

collector.run()

1787

# Dump test outputs into json.

1788

output_dict = collector.get_results_dict()

1789

output_dict['autotest_instance'] = instance_server

1790

if not options.json_dump:

1791

collector.output_results()

1792

code = collector.return_code

1793

return_message = collector.return_message

1794

if is_real_time:

1795

# Do not record stats if the suite was aborted (either by a user

1796

# or through the golo rpc).

1797

# Also do not record stats if is_aborted is None, indicating

1798

# aborting status is unknown yet.

1799

if collector.is_aborted == False:

xixuan

2017-01-13 12:51:22 +0800

[diff] [blame]

1800

logging.info('%s Gathering timing stats for the suite job.',

1801

diagnosis_utils.JobTimer.format_time(datetime.now()))

Allen Li

2016-08-16 14:19:08 -0700

[diff] [blame]

1802

collector.gather_timing_stats()

J. Richard Barnette

712eb40

2013-08-13 18:03:00 -0700

[diff] [blame]

1803

Allen Li

2016-08-16 14:19:08 -0700

[diff] [blame]

1804

if collector.is_aborted == True and is_suite_timeout:

1805

# There are two possible cases when a suite times out.

1806

# 1. the suite job was aborted due to timing out

1807

# 2. the suite job succeeded, but some child jobs

1808

# were already aborted before the suite job exited.

1809

# The case 2 was handled by ResultCollector,

1810

# here we handle case 1.

1811

old_code = code

1812

code = get_worse_code(

1813

code, RETURN_CODES.SUITE_TIMEOUT)

1814

if old_code != code:

1815

return_message = 'Suite job timed out.'

1816

logging.info('Upgrade return code from %s to %s '

1817

'because suite job has timed out.',

1818

RETURN_CODES.get_string(old_code),

1819

RETURN_CODES.get_string(code))

Fang Deng

2015-02-20 14:49:47 -0800

[diff] [blame]

1820

xixuan

2017-01-13 12:51:22 +0800

[diff] [blame]

1821

logging.info('\n %s Attempting to display pool info: %s',

1822

diagnosis_utils.JobTimer.format_time(datetime.now()),

1823

options.pool)

Allen Li

2016-08-16 14:19:08 -0700

[diff] [blame]

1824

try:

1825

# Add some jitter to make up for any latency in

1826

# aborting the suite or checking for results.

Allen Li

2017-07-05 13:38:04 -0700

[diff] [blame]

1827

cutoff = job_timer.timeout_hours + timedelta(hours=0.3)

Allen Li

2016-08-16 14:19:08 -0700

[diff] [blame]

1828

rpc_helper.diagnose_pool(

1829

options.board, options.pool, cutoff)

Allen Li

d4aa2fb

2016-12-08 14:03:54 -0800

[diff] [blame]

1830

except proxy.JSONRPCException:

Allen Li

2016-08-16 14:19:08 -0700

[diff] [blame]

1831

logging.warning('Unable to display pool info.')

Aviv Keshet

6b1122d

2016-06-20 13:29:52 -0700

[diff] [blame]

1832

Allen Li

2016-08-16 14:19:08 -0700

[diff] [blame]

1833

# And output return message.

1834

if return_message:

1835

logging.info('Reason: %s', return_message)

1836

output_dict['return_message'] = return_message

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1837

xixuan

2017-01-13 12:51:22 +0800

[diff] [blame]

1838

logging.info('\n %s Output below this line is for buildbot consumption:',

1839

diagnosis_utils.JobTimer.format_time(datetime.now()))

Allen Li

2016-09-14 19:05:47 -0700

[diff] [blame]

1840

log_buildbot_links(logging.info, collector._buildbot_links)

Allen Li

2016-08-16 14:19:08 -0700

[diff] [blame]

1841

return SuiteResult(code, output_dict)

Prashanth B

2014-03-14 12:36:29 -0700

[diff] [blame]

1842

Allen Li

2016-08-16 14:19:08 -0700

[diff] [blame]

1843

1844

def _handle_job_nowait(job_id, options, instance_server):

1845

"""Handle suite job asynchronously.

1846

1847

@param job_id Suite job id.

1848

@param options Parsed options.

1849

@param instance_server Autotest instance hostname.

1850

1851

@return SuiteResult of suite job.

1852

"""

1853

logging.info('Created suite job: %r', job_id)

1854

link = LogLink(options.name, instance_server,

Allen Li

2017-07-05 13:38:04 -0700

[diff] [blame]

1855

'%s-%s' % (job_id, getpass.getuser()))

Allen Li

2016-08-16 14:19:08 -0700

[diff] [blame]

1856

for generate_link in link.GenerateBuildbotLinks():

1857

logging.info(generate_link)

1858

logging.info('--no_wait specified; Exiting.')

1859

return SuiteResult(RETURN_CODES.OK,

Allen Li

2017-07-05 13:38:04 -0700

[diff] [blame]

1860

{'return_message': '--no_wait specified; Exiting.'})

Chris Masone

2012-02-14 14:18:01 -0800

[diff] [blame]

1861

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1862

xixuan

2017-06-29 15:40:19 -0700

[diff] [blame]

1863

def _should_run(options):

1864

"""Check whether the suite should be run based on lab/job status checking.

1865

1866

@param options Parsed options.

1867

"""

1868

try:

1869

site_utils.check_lab_status(options.test_source_build)

1870

except site_utils.TestLabException as ex:

1871

logging.exception('Lab is closed or build is blocked. Skipping '

1872

'suite %s, board %s, build %s: %s',

1873

options.name, options.board,

1874

options.test_source_build, str(ex))

1875

return False

1876

1877

start_time = str(datetime.now() -

1878

timedelta(days=_SEARCH_JOB_MAX_DAYS))

1879

afe = _create_afe(options)

1880

return not afe.get_jobs(

1881

name__istartswith=options.test_source_build,

1882

name__iendswith='control.'+options.name,

1883

created_on__gte=start_time,

1884

min_rpc_timeout=_MIN_RPC_TIMEOUT)

1885

Shuqian Zhao

2015-08-05 22:56:30 -0700

[diff] [blame]

1886

Allen Li

2017-07-10 15:14:20 -0700

[diff] [blame^]

1887

def _poke_buildbot_with_output(afe, job_id, job_timer):

1888

"""Poke buildbot so it doesn't timeout from silence.

1889

1890

@param afe AFE instance.

1891

@param job_id Suite job id.

1892

@param job_timer JobTimer for suite job.

1893

"""

1894

rpc_helper = diagnosis_utils.RPCHelper(afe)

1895

# Note that this call logs output, preventing buildbot's

1896

# 9000 second silent timeout from kicking in. Let there be no

1897

# doubt, this is a hack. The timeout is from upstream buildbot and

1898

# this is the easiest work around.

1899

if job_timer.first_past_halftime():

1900

rpc_helper.diagnose_job(job_id, afe.server)

Allen Li

2017-07-10 14:58:16 -0700

[diff] [blame]

1904

def _run_task(options):

Allen Li

2017-07-05 14:24:18 -0700

[diff] [blame]

1905

"""Perform this script's function minus setup.

Aviv Keshet

97bebd4

2017-05-24 21:02:32 -0700

[diff] [blame]

1906

Allen Li

2017-07-05 14:24:18 -0700

[diff] [blame]

1907

Boilerplate like argument parsing, logging, output formatting happen

1908

elsewhere.

Allen Li

2017-07-10 14:58:16 -0700

[diff] [blame]

1909

1910

Returns a SuiteResult instance.

1911

1912

TODO(ayatane): The try/except should be moved into _run_suite().

1913

Good luck trying to figure out which function calls are supposed to

1914

raise which of the exceptions.

Allen Li

2017-07-05 14:24:18 -0700

[diff] [blame]

1915

"""

Fang Deng

fb4a949

2014-09-18 17:52:06 -0700

[diff] [blame]

1916

try:

Allen Li

2017-07-10 14:58:16 -0700

[diff] [blame]

1917

return _run_suite(options)

Shuqian Zhao

ade6e7d

2015-12-07 18:01:11 -0800

[diff] [blame]

1918

except diagnosis_utils.BoardNotAvailableError as e:

Allen Li

2017-07-10 14:58:16 -0700

[diff] [blame]

1919

result = SuiteResult(

1920

RETURN_CODES.BOARD_NOT_AVAILABLE,

1921

{'return_message': 'Skipping testing: %s' % e.message})

1922

logging.info(result.output_dict['return_message'])

1923

return result

Shuqian Zhao

ade6e7d

2015-12-07 18:01:11 -0800

[diff] [blame]

1924

except utils.TestLabException as e:

Allen Li

2017-07-10 14:58:16 -0700

[diff] [blame]

1925

result = SuiteResult(

1926

RETURN_CODES.INFRA_FAILURE,

1927

{'return_message': 'TestLabException: %s' % e})

1928

logging.exception(result.output_dict['return_message'])

return result

class _ExceptionHandler(object):

1933

"""Global exception handler replacement."""

1934

1935

def __init__(self, dump_json):

1936

"""Initialize instance.

1937

1938

@param dump_json: Whether to print a JSON dump of the result dict to

1939

stdout.

1940

"""

1941

self._should_dump_json = dump_json

1942

1943

def __call__(self, exc_type, value, traceback):

1944

if self._should_dump_json:

1945

_dump_json({'return_message': ('Unhandled run_suite exception: %s'

1946

% value)})

1947

sys.exit(RETURN_CODES.INFRA_FAILURE)

Allen Li

2017-07-05 14:24:18 -0700

[diff] [blame]

def main():

"""Entry point."""

utils.verify_not_root_user()

1953

1954

parser = make_parser()

1955

options = parser.parse_args()

1956

if options.do_nothing:

Allen Li

2017-07-10 14:58:16 -0700

[diff] [blame]

1957

return 0

1958

1959

sys.exceptionhandler = _ExceptionHandler(dump_json=options.json_dump)

Allen Li

2017-07-05 14:24:18 -0700

[diff] [blame]

1960

if options.json_dump:

1961

logging.disable(logging.CRITICAL)

1962

Allen Li

2017-07-10 14:58:16 -0700

[diff] [blame]

1963

options_okay = verify_and_clean_options(options)

1964

if not options_okay:

1965

parser.print_help()

1966

result = SuiteResult(

1967

RETURN_CODES.INVALID_OPTIONS,

1968

{'return_code': RETURN_CODES.INVALID_OPTIONS})

1969

elif options.pre_check and not _should_run(options):

1970

logging.info('Lab is closed, OR build %s is blocked, OR suite '

1971

'%s for this build has already been kicked off '

1972

'once in past %d days.',

1973

options.test_source_build, options.name,

1974

_SEARCH_JOB_MAX_DAYS)

1975

result = SuiteResult(

1976

RETURN_CODES.ERROR,

1977

{'return_message': ("Lab is closed OR other reason"

1978

" (see code, it's complicated)")})

1979

else:

1980

result = _run_task(options)

Shuqian Zhao

2015-08-05 22:56:30 -0700

[diff] [blame]

1981

Shuqian Zhao

2015-08-05 22:56:30 -0700

[diff] [blame]

1982

if options.json_dump:

Allen Li

2017-07-10 14:58:16 -0700

[diff] [blame]

1983

_dump_json(result.output_dict)

Fang Deng

2014-09-25 10:18:48 -0700

[diff] [blame]

1984

1985

logging.info('Will return from run_suite with status: %s',

Allen Li

2017-07-10 14:58:16 -0700

[diff] [blame]

1986

RETURN_CODES.get_string(result.return_code))

1987

return result.return_code

def _dump_json(obj):

"""Write obj JSON to stdout."""

1992

output_json = json.dumps(obj, sort_keys=True)

1993

sys.stdout.write('#JSON_START#%s#JSON_END#' % output_json.strip())

Fang Deng

fb4a949

2014-09-18 17:52:06 -0700

[diff] [blame]

1994

1995

Chris Masone