Blame - site_utils/run_suite.py - platform/external/autotest

2012-02-14 14:18:01 -0800

[diff] [blame]

#!/usr/bin/python

#

# Use of this source code is governed by a BSD-style license that can be

5

# found in the LICENSE file.

6

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

7

Chris Masone

2012-02-14 14:18:01 -0800

[diff] [blame]

8

"""Tool for running suites of tests and waiting for completion.

9

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

10

The desired test suite will be scheduled with autotest. By default,

Chris Masone

2012-02-14 14:18:01 -0800

[diff] [blame]

11

this tool will block until the job is complete, printing a summary

12

at the end. Error conditions result in exceptions.

13

14

This is intended for use only with Chrome OS test suits that leverage the

15

dynamic suite infrastructure in server/cros/dynamic_suite.py.

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

16

17

This script exits with one of the following codes:

18

0 - OK: Suite finished successfully

19

1 - ERROR: Test(s) failed, or hits its own timeout

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

20

2 - WARNING: Test(s) raised a warning or passed on retry, none failed/timed out.

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

21

3 - INFRA_FAILURE: Infrastructure related issues, e.g.

22

* Lab is down

23

* Too many duts (defined as a constant) in repair failed status

24

* Suite job issues, like bug in dynamic suite,

25

user aborted the suite, lose a drone/all devservers/rpc server,

26

0 tests ran, etc.

Fang Deng

2014-09-12 14:16:11 -0700

[diff] [blame]

27

* provision failed

28

TODO(fdeng): crbug.com/413918, reexamine treating all provision

29

failures as INFRA failures.

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

30

4 - SUITE_TIMEOUT: Suite timed out, some tests ran,

31

none failed by the time the suite job was aborted. This will cover,

32

but not limited to, the following cases:

33

* A devserver failure that manifests as a timeout

34

* No DUTs available midway through a suite

35

* Provision/Reset/Cleanup took longer time than expected for new image

36

* A regression in scheduler tick time.

Fang Deng

2014-09-25 10:18:48 -0700

[diff] [blame]

37

5- BOARD_NOT_AVAILABLE: If there is no host for the requested board/pool.

38

6- INVALID_OPTIONS: If options are not valid.

Chris Masone

2012-02-14 14:18:01 -0800

[diff] [blame]

39

"""

40

Allen Li

2016-09-14 14:44:59 -0700

[diff] [blame]

41

import argparse

42

import ast

Allen Li

2016-08-16 14:19:08 -0700

[diff] [blame]

43

from collections import namedtuple

Chris Masone

2012-09-06 16:00:07 -0700

[diff] [blame]

44

from datetime import datetime

Allen Li

2016-09-14 14:44:59 -0700

[diff] [blame]

45

from datetime import timedelta

import getpass

import json

import logging

import os

import re

import sys

import time

Chris Masone

2012-09-06 16:00:07 -0700

[diff] [blame]

53

Chris Masone

2012-02-14 14:18:01 -0800

[diff] [blame]

54

import common

Allen Li

2016-09-14 15:19:20 -0700

[diff] [blame]

55

from chromite.lib import buildbot_annotations as annotations

56

Shuqian Zhao

2015-08-05 22:56:30 -0700

[diff] [blame]

57

from autotest_lib.client.common_lib import control_data

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

58

from autotest_lib.client.common_lib import error

J. Richard Barnette

3cbd76b

2013-11-27 12:11:25 -0800

[diff] [blame]

59

from autotest_lib.client.common_lib import global_config, enum

60

from autotest_lib.client.common_lib import priorities

Dan Shi

2014-08-10 23:38:40 -0700

[diff] [blame]

61

from autotest_lib.client.common_lib import time_utils

Gabe Black

2015-02-04 23:55:15 -0800

[diff] [blame]

62

from autotest_lib.client.common_lib.cros.graphite import autotest_stats

Prashanth B

2014-05-08 18:01:27 -0700

[diff] [blame]

63

from autotest_lib.client.common_lib.cros import retry

Prashanth B

2014-03-14 12:36:29 -0700

[diff] [blame]

64

from autotest_lib.frontend.afe.json_rpc import proxy

J. Richard Barnette

3cbd76b

2013-11-27 12:11:25 -0800

[diff] [blame]

65

from autotest_lib.server import utils

Dan Shi

2014-10-10 13:38:51 -0700

[diff] [blame]

66

from autotest_lib.server.cros import provision

Chris Masone

44e4d6c

2012-08-15 14:25:53 -0700

[diff] [blame]

67

from autotest_lib.server.cros.dynamic_suite import constants

Chris Masone

b493555

2012-08-14 12:05:54 -0700

[diff] [blame]

68

from autotest_lib.server.cros.dynamic_suite import frontend_wrappers

Allen Li

2016-08-18 12:09:32 -0700

[diff] [blame]

69

from autotest_lib.server.cros.dynamic_suite import reporting

Prashanth B

2014-03-14 12:36:29 -0700

[diff] [blame]

70

from autotest_lib.server.cros.dynamic_suite import reporting_utils

J. Richard Barnette

e7b98bb

2013-08-21 16:34:16 -0700

[diff] [blame]

71

from autotest_lib.server.cros.dynamic_suite import tools

Prashanth B

2014-03-14 12:36:29 -0700

[diff] [blame]

72

from autotest_lib.site_utils import diagnosis_utils

MK Ryu

2014-10-21 11:58:09 -0700

[diff] [blame]

73

from autotest_lib.site_utils import job_overhead

74

Chris Masone

1120cdf

2012-02-27 17:35:07 -0800

[diff] [blame]

75

CONFIG = global_config.global_config

76

Allen Li

2016-09-14 19:05:47 -0700

[diff] [blame]

77

_DEFAULT_AUTOTEST_INSTANCE = CONFIG.get_config_value(

78

'SERVER', 'hostname', type=str)

79

_URL_PATTERN = CONFIG.get_config_value('CROS', 'log_url_pattern', type=str)

Simran Basi

2015-02-03 15:50:18 -0800

[diff] [blame]

80

Simran Basi

22aa9fe

2012-12-07 16:37:09 -0800

[diff] [blame]

81

# Return code that will be sent back to autotest_rpc_server.py

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

82

RETURN_CODES = enum.Enum(

Fang Deng

2014-09-18 17:52:06 -0700

[diff] [blame]

83

'OK', 'ERROR', 'WARNING', 'INFRA_FAILURE', 'SUITE_TIMEOUT',

Fang Deng

2014-09-25 10:18:48 -0700

[diff] [blame]

84

'BOARD_NOT_AVAILABLE', 'INVALID_OPTIONS')

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

85

# The severity of return code. If multiple codes

86

# apply, the script should always return the severest one.

87

# E.g. if we have a test failure and the suite also timed out,

88

# we should return 'ERROR'.

89

SEVERITY = {RETURN_CODES.OK: 0,

90

RETURN_CODES.WARNING: 1,

Fang Deng

2014-09-12 14:16:11 -0700

[diff] [blame]

91

RETURN_CODES.SUITE_TIMEOUT: 2,

92

RETURN_CODES.INFRA_FAILURE: 3,

Fang Deng

2014-09-25 10:18:48 -0700

[diff] [blame]

93

RETURN_CODES.ERROR: 4}

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

94

95

96

def get_worse_code(code1, code2):

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

97

"""Compare the severity of two codes and return the worse code.

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

98

99

@param code1: An enum value of RETURN_CODES

100

@param code2: An enum value of RETURN_CODES

101

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

102

@returns: the more severe one between code1 and code2.

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

103

104

"""

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

105

return code1 if SEVERITY[code1] >= SEVERITY[code2] else code2

Simran Basi

22aa9fe

2012-12-07 16:37:09 -0800

[diff] [blame]

106

Chris Masone

dfa0beba

2012-03-19 11:41:47 -0700

[diff] [blame]

107

Allen Li

2016-09-14 14:44:59 -0700

[diff] [blame]

108

def bool_str(x):

109

"""Boolean string type for option arguments.

110

111

@param x: string representation of boolean value.

"""

if x == 'True':

return True

elif x == 'False':

return False

else:

raise argparse.ArgumentTypeError(

120

'%s is not one of True or False' % (x,))

121

122

Allen Li

603728a

2016-12-08 13:58:11 -0800

[diff] [blame]

123

def _get_priority_value(x):

124

"""Convert a priority representation to its int value.

125

126

Priorities can be described either by an int value (possibly as a string)

127

or a name string. This function coerces both forms to an int value.

128

129

This function is intended for casting command line arguments during

130

parsing.

131

132

@param x: priority value as an int, int string, or name string

133

134

@returns: int value of priority

"""

try:

return int(x)

except ValueError:

try:

return priorities.Priority.get_value(x)

141

except AttributeError:

142

raise argparse.ArgumentTypeError(

143

'Unknown priority level %s. Try one of %s.'

144

% (x, ', '.join(priorities.Priority.names)))

145

146

Allen Li

2016-09-14 14:44:59 -0700

[diff] [blame]

147

def make_parser():

148

"""Make ArgumentParser instance for run_suite.py."""

149

parser = argparse.ArgumentParser(

150

usage="%(prog)s [options]")

151

parser.add_argument("-b", "--board", dest="board")

152

parser.add_argument("-i", "--build", dest="build")

153

parser.add_argument(

154

"-w", "--web", dest="web", default=None,

155

help="Address of a webserver to receive suite requests.")

156

parser.add_argument(

157

'--firmware_rw_build', dest='firmware_rw_build', default=None,

158

help='Firmware build to be installed in dut RW firmware.')

159

parser.add_argument(

160

'--firmware_ro_build', dest='firmware_ro_build', default=None,

161

help='Firmware build to be installed in dut RO firmware.')

162

parser.add_argument(

163

'--test_source_build', dest='test_source_build', default=None,

164

help=('Build that contains the test code, '

165

'e.g., it can be the value of `--build`, '

166

'`--firmware_rw_build` or `--firmware_ro_build` '

167

'arguments. Default is None, that is, use the test '

168

'code from `--build` (CrOS image)'))

Chris Masone

359c0fd

2012-03-13 15:18:59 -0700

[diff] [blame]

169

# This should just be a boolean flag, but the autotest "proxy" code

170

# can't handle flags that don't take arguments.

Allen Li

2016-09-14 14:44:59 -0700

[diff] [blame]

171

parser.add_argument(

172

"-n", "--no_wait", dest="no_wait", default=False, type=bool_str,

173

help='Must pass "True" or "False" if used.')

Alex Miller

0032e93

2013-10-23 12:52:58 -0700

[diff] [blame]

174

# If you really want no pool, --pool="" will do it. USE WITH CARE.

Allen Li

2016-09-14 14:44:59 -0700

[diff] [blame]

175

parser.add_argument("-p", "--pool", dest="pool", default="suites")

176

parser.add_argument("-s", "--suite_name", dest="name")

177

parser.add_argument("-a", "--afe_timeout_mins", type=int,

178

dest="afe_timeout_mins", default=30)

179

parser.add_argument("-t", "--timeout_mins", type=int,

180

dest="timeout_mins", default=1440)

181

parser.add_argument("-x", "--max_runtime_mins", type=int,

182

dest="max_runtime_mins", default=1440)

183

parser.add_argument("-d", "--delay_sec", type=int,

184

dest="delay_sec", default=10)

185

parser.add_argument("-m", "--mock_job_id", dest="mock_job_id",

186

help="Attach to existing job id for already running "

187

"suite, and creates report.")

Aviv Keshet

db321de

2015-04-10 19:09:58 -0700

[diff] [blame]

188

# NOTE(akeshet): This looks similar to --no_wait, but behaves differently.

189

# --no_wait is passed in to the suite rpc itself and affects the suite,

190

# while this does not.

Allen Li

2016-09-14 14:44:59 -0700

[diff] [blame]

191

parser.add_argument("-c", "--create_and_return", dest="create_and_return",

192

action="store_true",

193

help="Create the suite and print the job id, then "

194

"finish immediately.")

195

parser.add_argument("-u", "--num", dest="num", type=int, default=None,

196

help="Run on at most NUM machines.")

Alex Miller

f43d0eb

2012-10-01 13:43:13 -0700

[diff] [blame]

197

# Same boolean flag issue applies here.

Allen Li

2016-09-14 14:44:59 -0700

[diff] [blame]

198

parser.add_argument(

199

"-f", "--file_bugs", dest="file_bugs", default=False, type=bool_str,

200

help=('File bugs on test failures. Must pass "True" or '

201

'"False" if used.'))

202

parser.add_argument("-l", "--bypass_labstatus", dest="bypass_labstatus",

203

action="store_true", help='Bypass lab status check.')

Alex Miller

88762a8

2013-09-04 15:41:28 -0700

[diff] [blame]

204

# We allow either a number or a string for the priority. This way, if you

205

# know what you're doing, one can specify a custom priority level between

206

# other levels.

Allen Li

2016-09-14 14:44:59 -0700

[diff] [blame]

207

parser.add_argument("-r", "--priority", dest="priority",

Allen Li

603728a

2016-12-08 13:58:11 -0800

[diff] [blame]

208

type=_get_priority_value,

Allen Li

2016-09-14 14:44:59 -0700

[diff] [blame]

209

default=priorities.Priority.DEFAULT,

210

action="store",

211

help="Priority of suite. Either numerical value, or "

212

"one of (" + ", ".join(priorities.Priority.names)

213

+ ").")

214

parser.add_argument(

215

'--retry', dest='retry', default=False, type=bool_str, action='store',

216

help='Enable test retry. Must pass "True" or "False" if used.')

217

parser.add_argument('--max_retries', dest='max_retries', default=None,

218

type=int, action='store', help='Maximum retries'

219

'allowed at suite level. No limit if not specified.')

220

parser.add_argument('--minimum_duts', dest='minimum_duts', type=int,

221

default=0, action='store',

222

help='Check that the pool has at least such many '

223

'healthy machines, otherwise suite will not run. '

224

'Default to 0.')

225

parser.add_argument('--suite_min_duts', dest='suite_min_duts', type=int,

226

default=0, action='store',

227

help='Preferred minimum number of machines. Scheduler '

228

'will prioritize on getting such many machines for '

229

'the suite when it is competing with another suite '

230

'that has a higher priority but already got minimum '

231

'machines it needs. Default to 0.')

232

parser.add_argument("--suite_args", dest="suite_args",

233

default=None, action="store",

234

help="Argument string for suite control file.")

235

parser.add_argument('--offload_failures_only',

Allen Li

40599a3

2016-12-08 13:23:35 -0800

[diff] [blame]

236

dest='offload_failures_only', type=bool_str,

237

action='store', default=False,

Allen Li

2016-09-14 14:44:59 -0700

[diff] [blame]

238

help='Only enable gs_offloading for failed tests. '

239

'Successful tests will be deleted. Must pass "True"'

240

' or "False" if used.')

241

parser.add_argument('--use_suite_attr', dest='use_suite_attr',

242

action='store_true', default=False,

243

help='Advanced. Run the suite based on ATTRIBUTES of '

244

'control files, rather than SUITE.')

245

parser.add_argument('--json_dump', dest='json_dump', action='store_true',

246

default=False,

247

help='Dump the output of run_suite to stdout.')

248

parser.add_argument(

249

'--run_prod_code', dest='run_prod_code',

250

action='store_true', default=False,

251

help='Run the test code that lives in prod aka the test '

252

'code currently on the lab servers.')

253

parser.add_argument(

254

'--delay_minutes', type=int, default=0,

255

help=('Delay the creation of test jobs for a given '

256

'number of minutes. This argument can be used to '

257

'force provision jobs being delayed, which helps '

258

'to distribute loads across devservers.'))

259

parser.add_argument(

260

'--skip_duts_check', dest='skip_duts_check', action='store_true',

261

default=False, help='If True, skip minimum available DUTs check')

262

return parser

Chris Masone

2012-02-14 14:18:01 -0800

[diff] [blame]

263

264

Allen Li

2016-09-14 14:44:59 -0700

[diff] [blame]

265

def verify_options(options):

266

"""Verify the validity of options.

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

267

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

268

@param options: The parsed options to verify.

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

269

270

@returns: True if verification passes, False otherwise.

271

272

"""

Fang Deng

2015-02-20 14:49:47 -0800

[diff] [blame]

273

if options.mock_job_id and (

274

not options.build or not options.name or not options.board):

275

print ('When using -m, need to specify build, board and suite '

276

'name which you have used for creating the original job')

277

return False

278

else:

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

279

if not options.build:

280

print 'Need to specify which build to use'

281

return False

282

if not options.board:

283

print 'Need to specify board'

284

return False

285

if not options.name:

286

print 'Need to specify suite name'

287

return False

288

if options.num is not None and options.num < 1:

289

print 'Number of machines must be more than 0, if specified.'

290

return False

Allen Li

2016-09-14 14:44:59 -0700

[diff] [blame]

291

if not options.retry and options.max_retries is not None:

Fang Deng

443f195

2015-01-02 14:51:49 -0800

[diff] [blame]

292

print 'max_retries can only be used with --retry=True'

293

return False

Shuqian Zhao

ab1bedc

2015-06-02 11:12:28 -0700

[diff] [blame]

294

if options.use_suite_attr and options.suite_args is not None:

295

print ('The new suite control file cannot parse the suite_args: %s.'

296

'Please not specify any suite_args here.' % options.suite_args)

297

return False

Allen Li

2016-09-14 14:44:59 -0700

[diff] [blame]

298

if options.no_wait and options.retry:

Fang Deng

058860c

2014-05-15 15:41:50 -0700

[diff] [blame]

299

print 'Test retry is not available when using --no_wait=True'

Dan Shi

2014-10-10 13:38:51 -0700

[diff] [blame]

300

# Default to use the test code in CrOS build.

301

if not options.test_source_build and options.build:

302

options.test_source_build = options.build

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

return True

Shuqian Zhao

2015-06-02 11:12:28 -0700

[diff] [blame]

306

def change_options_for_suite_attr(options):

307

"""Change options to be prepared to run the suite_attr_wrapper.

308

309

If specify 'use_suite_attr' from the cmd line, it indicates to run the

310

new style suite control file, suite_attr_wrapper. Then, change the

Allen Li

2016-08-18 12:09:32 -0700

[diff] [blame]

311

options.name to 'suite_attr_wrapper', change the options.suite_args to

Shuqian Zhao

ab1bedc

2015-06-02 11:12:28 -0700

[diff] [blame]

312

include the arguments needed by suite_attr_wrapper.

313

314

@param options: The verified options.

315

316

@returns: The changed options.

317

318

"""

319

# Convert the suite_name to attribute boolean expression.

320

if type(options.name) is str:

321

attr_filter_val = 'suite:%s' % options.name

322

else:

323

attr_filter_val = ' or '.join(['suite:%s' % x for x in options.name])

324

325

# change the suite_args to be a dict of arguments for suite_attr_wrapper

326

# if suite_args is not None, store the values in 'other_args' of the dict

327

args_dict = {}

328

args_dict['attr_filter'] = attr_filter_val

329

options.suite_args = str(args_dict)

330

options.name = 'suite_attr_wrapper'

return options

Allen Li

2016-09-02 11:52:34 -0700

[diff] [blame]

335

class TestResult(object):

Aviv Keshet

1480c4a

2013-03-21 16:38:31 -0700

[diff] [blame]

336

Allen Li

2016-09-02 11:52:34 -0700

[diff] [blame]

337

"""Represents the result of a TestView."""

Aviv Keshet

1480c4a

2013-03-21 16:38:31 -0700

[diff] [blame]

338

Allen Li

2016-09-02 11:52:34 -0700

[diff] [blame]

339

def __init__(self, test_view, retry_count=0):

340

"""Initialize instance.

341

342

@param test_view: TestView instance.

343

@param retry_count: Retry count for test. Optional.

344

"""

345

self.name = test_view.get_testname()

346

self.status = test_view['status']

347

self.reason = test_view['reason']

348

self.retry_count = retry_count

349

350

_PRETTY_STATUS_MAP = {

351

'GOOD': '[ PASSED ]',

352

'TEST_NA': '[ INFO ]',

}

@property

def _pretty_status(self):

357

"""Pretty status string."""

358

return self._PRETTY_STATUS_MAP.get(self.status, '[ FAILED ]')

359

360

def log_using(self, log_function, name_column_width):

361

"""Log the test result using the given log function.

362

363

@param log_function: Log function to use. Example: logging.info

364

@param name_column_width: Width of name column for formatting.

365

"""

366

padded_name = self.name.ljust(name_column_width)

367

log_function('%s%s', padded_name, self._pretty_status)

368

if self.status != 'GOOD':

369

log_function('%s %s: %s', padded_name, self.status, self.reason)

370

if self.retry_count > 0:

371

log_function('%s retry_count: %s', padded_name, self.retry_count)

Chris Masone

2012-02-14 14:18:01 -0800

[diff] [blame]

372

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

373

Shuqian Zhao

2015-09-29 14:19:28 -0700

[diff] [blame]

374

def get_original_suite_name(suite_name, suite_args):

375

"""Get the original suite name when running suite_attr_wrapper.

376

377

@param suite_name: the name of the suite launched in afe. When it is

378

suite_attr_wrapper, the suite that actually running is

379

specified in the suite_args.

380

@param suite_args: the parsed option which contains the original suite name.

381

382

@returns: the original suite name.

383

384

"""

385

if suite_name == 'suite_attr_wrapper':

386

attrs = ast.literal_eval(suite_args).get('attr_filter', '')

387

suite_list = ([x[6:] for x in re.split('[() ]', attrs)

388

if x and x.startswith('suite:')])

389

return suite_list[0] if suite_list else suite_name

return suite_name

Craig Harrison

2012-08-23 16:48:49 -0700

[diff] [blame]

393

class LogLink(object):

J. Richard Barnette

2013-08-23 11:24:21 -0700

[diff] [blame]

394

"""Information needed to record a link in the logs.

Craig Harrison

2012-08-23 16:48:49 -0700

[diff] [blame]

395

J. Richard Barnette

2013-08-23 11:24:21 -0700

[diff] [blame]

396

Depending on context and the information provided at

397

construction time, the link may point to either to log files for

398

a job, or to a bug filed for a failure in the job.

Craig Harrison

2012-08-23 16:48:49 -0700

[diff] [blame]

399

J. Richard Barnette

2013-08-23 11:24:21 -0700

[diff] [blame]

400

@var anchor The link text.

401

@var url The link url.

402

@var bug_id Id of a bug to link to, or None.

403

"""

404

Kevin Cheng

2bdd372

2016-03-24 21:30:52 -0700

[diff] [blame]

405

# A list of tests that don't get retried so skip the dashboard.

406

_SKIP_RETRY_DASHBOARD = ['provision']

407

Ningning Xia

2016-04-19 14:06:03 -0700

[diff] [blame]

408

_BUG_LINK_PREFIX = 'Auto-Bug'

409

_LOG_LINK_PREFIX = 'Test-Logs'

410

J. Richard Barnette

2013-08-23 11:24:21 -0700

[diff] [blame]

411

Prashanth Balasubramanian

ae43721

2014-10-27 11:17:26 -0700

[diff] [blame]

412

@classmethod

413

def get_bug_link(cls, bug_id):

414

"""Generate a bug link for the given bug_id.

415

416

@param bug_id: The id of the bug.

417

@return: A link, eg: https://crbug.com/<bug_id>.

418

"""

Allen Li

2016-09-14 19:05:47 -0700

[diff] [blame]

419

return reporting_utils.link_crbug(bug_id)

Prashanth Balasubramanian

ae43721

2014-10-27 11:17:26 -0700

[diff] [blame]

420

421

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

422

def __init__(self, anchor, server, job_string, bug_info=None, reason=None,

Simran Basi

2015-02-03 15:50:18 -0800

[diff] [blame]

423

retry_count=0, testname=None):

J. Richard Barnette

2013-08-23 11:24:21 -0700

[diff] [blame]

424

"""Initialize the LogLink by generating the log URL.

425

426

@param anchor The link text.

Alex Miller

2013-10-30 15:18:57 -0700

[diff] [blame]

427

@param server The hostname of the server this suite ran on.

J. Richard Barnette

2013-08-23 11:24:21 -0700

[diff] [blame]

428

@param job_string The job whose logs we'd like to link to.

429

@param bug_info Info about the bug, if one was filed.

Fang Deng

53c6ff5

2014-02-24 17:51:24 -0800

[diff] [blame]

430

@param reason A string representing the reason of failure if any.

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

431

@param retry_count How many times the test has been retried.

Simran Basi

2015-02-03 15:50:18 -0800

[diff] [blame]

432

@param testname Optional Arg that supplies the testname.

Craig Harrison

2012-08-23 16:48:49 -0700

[diff] [blame]

433

"""

434

self.anchor = anchor

Allen Li

2016-09-14 19:05:47 -0700

[diff] [blame]

435

self.url = _URL_PATTERN % (server, job_string)

Fang Deng

53c6ff5

2014-02-24 17:51:24 -0800

[diff] [blame]

436

self.reason = reason

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

437

self.retry_count = retry_count

Simran Basi

2015-02-03 15:50:18 -0800

[diff] [blame]

438

self.testname = testname

J. Richard Barnette

2013-08-23 11:24:21 -0700

[diff] [blame]

439

if bug_info:

440

self.bug_id, self.bug_count = bug_info

441

else:

442

self.bug_id = None

443

self.bug_count = None

Craig Harrison

2012-08-23 16:48:49 -0700

[diff] [blame]

444

445

Allen Li

2016-09-14 15:19:20 -0700

[diff] [blame]

446

@property

447

def bug_url(self):

448

"""URL of associated bug."""

449

if self.bug_id:

450

return reporting_utils.link_crbug(self.bug_id)

else:

return None

@property

def _bug_count_text(self):

457

"""Return bug count as human friendly text."""

458

if self.bug_count is None:

459

bug_info = 'unknown number of reports'

460

elif self.bug_count == 1:

461

bug_info = 'new report'

462

else:

463

bug_info = '%s reports' % self.bug_count

return bug_info

Ningning Xia

2016-04-19 14:06:03 -0700

[diff] [blame]

467

def GenerateBuildbotLinks(self):

J. Richard Barnette

2013-08-23 11:24:21 -0700

[diff] [blame]

468

"""Generate a link formatted to meet buildbot expectations.

469

Ningning Xia

2016-04-19 14:06:03 -0700

[diff] [blame]

470

If there is a bug associated with this link, report a link to the bug

471

and a link to the job logs;

J. Richard Barnette

2013-08-23 11:24:21 -0700

[diff] [blame]

472

otherwise report a link to the job logs.

Craig Harrison

2012-08-23 16:48:49 -0700

[diff] [blame]

473

Ningning Xia

2016-04-19 14:06:03 -0700

[diff] [blame]

474

@return A list of links formatted for the buildbot log annotator.

Craig Harrison

2012-08-23 16:48:49 -0700

[diff] [blame]

475

"""

Ningning Xia

2016-04-19 14:06:03 -0700

[diff] [blame]

476

bug_info_strings = []

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

477

info_strings = []

Ningning Xia

2016-04-19 14:06:03 -0700

[diff] [blame]

478

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

479

if self.retry_count > 0:

480

info_strings.append('retry_count: %d' % self.retry_count)

Ningning Xia

2016-04-19 14:06:03 -0700

[diff] [blame]

481

bug_info_strings.append('retry_count: %d' % self.retry_count)

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

482

Fang Deng

53c6ff5

2014-02-24 17:51:24 -0800

[diff] [blame]

483

if self.reason:

Allen Li

2016-09-14 15:19:20 -0700

[diff] [blame]

484

bug_info_strings.append(self.reason)

485

info_strings.append(self.reason)

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

486

Allen Li

2016-09-14 15:19:20 -0700

[diff] [blame]

487

# Add the bug link to buildbot_links

488

if self.bug_url:

489

bug_info_strings.append(self._bug_count_text)

Ningning Xia

2016-04-19 14:06:03 -0700

[diff] [blame]

490

Allen Li

2016-09-14 15:19:20 -0700

[diff] [blame]

491

bug_anchor_text = self._format_anchor_text(self._BUG_LINK_PREFIX,

492

bug_info_strings)

493

494

yield annotations.StepLink(bug_anchor_text, self.bug_url)

495

496

anchor_text = self._format_anchor_text(self._LOG_LINK_PREFIX,

497

info_strings)

498

yield annotations.StepLink(anchor_text, self.url)

Ningning Xia

2016-04-19 14:06:03 -0700

[diff] [blame]

499

500

Allen Li

2016-09-14 15:19:20 -0700

[diff] [blame]

501

def _format_anchor_text(self, prefix, info_strings):

502

"""Format anchor text given a prefix and info strings.

Ningning Xia

2016-04-19 14:06:03 -0700

[diff] [blame]

503

504

@param prefix The prefix of the anchor text.

505

@param info_strings The infos presented in the anchor text.

506

@return A anchor_text with the right prefix and info strings.

507

"""

Allen Li

2016-09-14 15:19:20 -0700

[diff] [blame]

508

anchor_text = '[{prefix}]: {anchor}'.format(

509

prefix=prefix,

510

anchor=self.anchor.strip())

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

511

if info_strings:

Allen Li

2016-09-14 15:19:20 -0700

[diff] [blame]

512

info_text = ', '.join(info_strings)

513

anchor_text += ': ' + info_text

Ningning Xia

2016-04-19 14:06:03 -0700

[diff] [blame]

514

return anchor_text

Craig Harrison

2012-08-23 16:48:49 -0700

[diff] [blame]

515

Allen Li

2016-09-14 15:19:20 -0700

[diff] [blame]

516

@property

517

def text_link(self):

518

"""Link to the job's logs, for consumption by a human.

Craig Harrison

2012-08-23 16:48:49 -0700

[diff] [blame]

519

Craig Harrison

d845157

2012-08-31 10:29:33 -0700

[diff] [blame]

520

@return A link formatted for human readability.

Craig Harrison

2012-08-23 16:48:49 -0700

[diff] [blame]

521

"""

Aviv Keshet

269848b

2016-10-03 00:13:19 -0700

[diff] [blame]

522

return '%s %s' % (self.anchor, self.url)

Craig Harrison

2012-08-23 16:48:49 -0700

[diff] [blame]

523

524

Simran Basi

2015-02-03 15:50:18 -0800

[diff] [blame]

525

def GenerateWmatrixRetryLink(self):

526

"""Generate a link to the wmatrix retry dashboard.

527

528

@return A link formatted for the buildbot log annotator.

529

"""

Allen Li

2016-09-14 15:19:20 -0700

[diff] [blame]

530

if not self.testname or self.testname in self._SKIP_RETRY_DASHBOARD:

Simran Basi

2015-02-03 15:50:18 -0800

[diff] [blame]

531

return None

Allen Li

2016-09-14 15:19:20 -0700

[diff] [blame]

532

return annotations.StepLink(

533

text='[Flake-Dashboard]: %s' % self.testname,

534

url=reporting_utils.link_retry_url(self.testname))

Simran Basi

2015-02-03 15:50:18 -0800

[diff] [blame]

535

536

Chris Masone

2012-04-30 14:35:28 -0700

[diff] [blame]

537

class Timings(object):

538

"""Timings for important events during a suite.

539

540

All timestamps are datetime.datetime objects.

541

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

542

@var suite_job_id: the afe job id of the suite job for which

543

we are recording the timing for.

544

@var download_start_time: the time the devserver starts staging

545

the build artifacts. Recorded in create_suite_job.

546

@var payload_end_time: the time when the artifacts only necessary to start

547

installsing images onto DUT's are staged.

548

Recorded in create_suite_job.

549

@var artifact_end_time: the remaining artifacts are downloaded after we kick

550

off the reimaging job, at which point we record

551

artifact_end_time. Recorded in dynamic_suite.py.

Chris Masone

2012-04-30 14:35:28 -0700

[diff] [blame]

552

@var suite_start_time: the time the suite started.

Chris Masone

2012-04-30 14:35:28 -0700

[diff] [blame]

553

@var tests_start_time: the time the first test started running.

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

554

@var tests_end_time: the time the last test finished running.

Chris Masone

2012-04-30 14:35:28 -0700

[diff] [blame]

555

"""

beeps

2013-03-22 13:15:49 -0700

[diff] [blame]

556

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

557

def __init__(self, suite_job_id):

558

self.suite_job_id = suite_job_id

559

# Timings related to staging artifacts on devserver.

560

self.download_start_time = None

561

self.payload_end_time = None

562

self.artifact_end_time = None

beeps

2013-03-22 13:15:49 -0700

[diff] [blame]

563

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

564

# The test_start_time, but taken off the view that corresponds to the

565

# suite instead of an individual test.

566

self.suite_start_time = None

beeps

2013-03-22 13:15:49 -0700

[diff] [blame]

567

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

568

# Earliest and Latest tests in the set of TestViews passed to us.

569

self.tests_start_time = None

570

self.tests_end_time = None

571

Chris Masone

2012-04-30 14:35:28 -0700

[diff] [blame]

572

Chris Masone

d9f13c5

2012-08-29 10:37:08 -0700

[diff] [blame]

573

def RecordTiming(self, view):

574

"""Given a test report view, extract and record pertinent time info.

Chris Masone

2012-04-30 14:35:28 -0700

[diff] [blame]

575

576

get_detailed_test_views() returns a list of entries that provide

577

info about the various parts of a suite run. This method can take

578

any one of these entries and look up timestamp info we might want

579

and record it.

580

Chris Masone

2012-09-06 16:00:07 -0700

[diff] [blame]

581

If timestamps are unavailable, datetime.datetime.min/max will be used.

582

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

583

@param view: A TestView object.

Chris Masone

2012-04-30 14:35:28 -0700

[diff] [blame]

584

"""

Chris Masone

2012-09-06 16:00:07 -0700

[diff] [blame]

585

start_candidate = datetime.min

586

end_candidate = datetime.max

587

if view['test_started_time']:

Dan Shi

2014-08-10 23:38:40 -0700

[diff] [blame]

588

start_candidate = time_utils.time_string_to_datetime(

589

view['test_started_time'])

Chris Masone

2012-09-06 16:00:07 -0700

[diff] [blame]

590

if view['test_finished_time']:

Dan Shi

2014-08-10 23:38:40 -0700

[diff] [blame]

591

end_candidate = time_utils.time_string_to_datetime(

592

view['test_finished_time'])

Chris Masone

2012-09-06 16:00:07 -0700

[diff] [blame]

593

Shuqian Zhao

2016-02-24 11:27:26 -0800

[diff] [blame]

594

if view.get_testname() == TestView.SUITE_JOB:

Chris Masone

2012-04-30 14:35:28 -0700

[diff] [blame]

595

self.suite_start_time = start_candidate

Chris Masone

2012-04-30 14:35:28 -0700

[diff] [blame]

596

else:

597

self._UpdateFirstTestStartTime(start_candidate)

598

self._UpdateLastTestEndTime(end_candidate)

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

599

if view['afe_job_id'] == self.suite_job_id and 'job_keyvals' in view:

Chris Masone

d9f13c5

2012-08-29 10:37:08 -0700

[diff] [blame]

600

keyvals = view['job_keyvals']

Dan Shi

2014-08-10 23:38:40 -0700

[diff] [blame]

601

self.download_start_time = time_utils.time_string_to_datetime(

602

keyvals.get(constants.DOWNLOAD_STARTED_TIME),

603

handle_type_error=True)

beeps

2013-03-22 13:15:49 -0700

[diff] [blame]

604

Dan Shi

2014-08-10 23:38:40 -0700

[diff] [blame]

605

self.payload_end_time = time_utils.time_string_to_datetime(

606

keyvals.get(constants.PAYLOAD_FINISHED_TIME),

607

handle_type_error=True)

beeps

2013-03-22 13:15:49 -0700

[diff] [blame]

608

Dan Shi

2014-08-10 23:38:40 -0700

[diff] [blame]

609

self.artifact_end_time = time_utils.time_string_to_datetime(

610

keyvals.get(constants.ARTIFACT_FINISHED_TIME),

611

handle_type_error=True)

Chris Masone

44e4d6c

2012-08-15 14:25:53 -0700

[diff] [blame]

612

Chris Masone

2012-04-30 14:35:28 -0700

[diff] [blame]

613

614

def _UpdateFirstTestStartTime(self, candidate):

615

"""Update self.tests_start_time, iff candidate is an earlier time.

616

617

@param candidate: a datetime.datetime object.

618

"""

619

if not self.tests_start_time or candidate < self.tests_start_time:

620

self.tests_start_time = candidate

621

622

623

def _UpdateLastTestEndTime(self, candidate):

624

"""Update self.tests_end_time, iff candidate is a later time.

625

626

@param candidate: a datetime.datetime object.

627

"""

628

if not self.tests_end_time or candidate > self.tests_end_time:

629

self.tests_end_time = candidate

def __str__(self):

return ('\n'

'Suite timings:\n'

Chris Masone

2012-05-01 16:52:31 -0700

[diff] [blame]

635

'Downloads started at %s\n'

636

'Payload downloads ended at %s\n'

Chris Masone

2012-04-30 14:35:28 -0700

[diff] [blame]

637

'Suite started at %s\n'

Chris Masone

2012-05-01 16:52:31 -0700

[diff] [blame]

638

'Artifact downloads ended (at latest) at %s\n'

Chris Masone

2012-04-30 14:35:28 -0700

[diff] [blame]

639

'Testing started at %s\n'

Chris Masone

2012-05-01 16:52:31 -0700

[diff] [blame]

640

'Testing ended at %s\n' % (self.download_start_time,

641

self.payload_end_time,

642

self.suite_start_time,

Chris Masone

2012-05-01 16:52:31 -0700

[diff] [blame]

643

self.artifact_end_time,

Chris Masone

2012-04-30 14:35:28 -0700

[diff] [blame]

644

self.tests_start_time,

645

self.tests_end_time))

646

647

beeps

2013-03-22 13:15:49 -0700

[diff] [blame]

648

def SendResultsToStatsd(self, suite, build, board):

649

"""

650

Sends data to statsd.

651

652

1. Makes a data_key of the form: run_suite.$board.$branch.$suite

653

eg: stats/gauges/<hostname>/run_suite/<board>/<branch>/<suite>/

654

2. Computes timings for several start and end event pairs.

Alex Miller

9a1987a

2013-08-21 15:51:16 -0700

[diff] [blame]

655

3. Sends all timing values to statsd.

beeps

2013-03-22 13:15:49 -0700

[diff] [blame]

656

657

@param suite: scheduled suite that we want to record the results of.

658

@param build: the build that this suite ran on.

659

eg: 'lumpy-release/R26-3570.0.0'

660

@param board: the board that this suite ran on.

661

"""

662

if sys.version_info < (2, 7):

663

logging.error('Sending run_suite perf data to statsd requires'

664

'python 2.7 or greater.')

665

return

666

MK Ryu

c9c0c3f

2014-10-27 14:36:01 -0700

[diff] [blame]

667

# Constructs the key used for logging statsd timing data.

668

data_key = utils.get_data_key('run_suite', suite, build, board)

beeps

2013-03-22 13:15:49 -0700

[diff] [blame]

669

670

# Since we don't want to try subtracting corrupted datetime values

Dan Shi

2014-08-10 23:38:40 -0700

[diff] [blame]

671

# we catch TypeErrors in time_utils.time_string_to_datetime and insert

672

# None instead. This means that even if, say,

673

# keyvals.get(constants.ARTIFACT_FINISHED_TIME) returns a corrupt

674

# value the member artifact_end_time is set to None.

beeps

2013-03-22 13:15:49 -0700

[diff] [blame]

675

if self.download_start_time:

676

if self.payload_end_time:

Gabe Black

2015-02-04 23:55:15 -0800

[diff] [blame]

677

autotest_stats.Timer(data_key).send('payload_download_time',

678

(self.payload_end_time -

679

self.download_start_time).total_seconds())

beeps

2013-03-22 13:15:49 -0700

[diff] [blame]

680

681

if self.artifact_end_time:

Gabe Black

2015-02-04 23:55:15 -0800

[diff] [blame]

682

autotest_stats.Timer(data_key).send('artifact_download_time',

683

(self.artifact_end_time -

684

self.download_start_time).total_seconds())

beeps

2013-03-22 13:15:49 -0700

[diff] [blame]

685

686

if self.tests_end_time:

687

if self.suite_start_time:

Gabe Black

2015-02-04 23:55:15 -0800

[diff] [blame]

688

autotest_stats.Timer(data_key).send('suite_run_time',

689

(self.tests_end_time -

690

self.suite_start_time).total_seconds())

beeps

2013-03-22 13:15:49 -0700

[diff] [blame]

691

692

if self.tests_start_time:

Gabe Black

2015-02-04 23:55:15 -0800

[diff] [blame]

693

autotest_stats.Timer(data_key).send('tests_run_time',

694

(self.tests_end_time -

695

self.tests_start_time).total_seconds())

beeps

2013-03-22 13:15:49 -0700

[diff] [blame]

696

beeps

2013-03-22 13:15:49 -0700

[diff] [blame]

697

Alex Miller

2013-10-30 15:18:57 -0700

[diff] [blame]

698

699

def instance_for_pool(pool_name):

700

"""

701

Return the hostname of the server that should be used to service a suite

702

for the specified pool.

703

704

@param pool_name: The pool (without 'pool:' to schedule the suite against.

705

@return: The correct host that should be used to service this suite run.

706

"""

707

return CONFIG.get_config_value(

708

'POOL_INSTANCE_SHARDING', pool_name,

709

default=_DEFAULT_AUTOTEST_INSTANCE)

710

711

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

712

class TestView(object):

713

"""Represents a test view and provides a set of helper functions."""

714

715

Shuqian Zhao

2016-02-24 11:27:26 -0800

[diff] [blame]

716

SUITE_JOB = 'Suite job'

Fang Deng

2014-09-12 14:16:11 -0700

[diff] [blame]

717

INFRA_TESTS = ['provision']

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

718

719

Simran Basi

2015-10-14 19:05:00 -0700

[diff] [blame]

720

def __init__(self, view, afe_job, suite_name, build, user,

721

solo_test_run=False):

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

722

"""Init a TestView object representing a tko test view.

723

724

@param view: A dictionary representing a tko test view.

Fang Deng

2014-06-12 18:21:55 -0700

[diff] [blame]

725

@param afe_job: An instance of frontend.afe.models.Job

726

representing the job that kicked off the test.

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

727

@param suite_name: The name of the suite

728

that the test belongs to.

729

@param build: The build for which the test is run.

Simran Basi

2015-10-12 15:36:45 -0700

[diff] [blame]

730

@param user: The user for which the test is run.

Simran Basi

2015-10-14 19:05:00 -0700

[diff] [blame]

731

@param solo_test_run: This is a solo test run not part of a suite.

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

732

"""

733

self.view = view

Fang Deng

2014-06-12 18:21:55 -0700

[diff] [blame]

734

self.afe_job = afe_job

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

735

self.suite_name = suite_name

736

self.build = build

Simran Basi

2015-10-14 19:05:00 -0700

[diff] [blame]

737

self.is_suite_view = afe_job.parent_job is None and not solo_test_run

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

738

# This is the test name that will be shown in the output.

739

self.testname = None

Simran Basi

2015-10-12 15:36:45 -0700

[diff] [blame]

740

self.user = user

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

741

Fang Deng

2014-06-12 18:21:55 -0700

[diff] [blame]

742

# The case that a job was aborted before it got a chance to run

743

# usually indicates suite has timed out (unless aborted by user).

744

# In this case, the abort reason will be None.

745

# Update the reason with proper information.

746

if (self.is_relevant_suite_view() and

Shuqian Zhao

2016-02-24 11:27:26 -0800

[diff] [blame]

747

not self.get_testname() == self.SUITE_JOB and

Fang Deng

2014-06-12 18:21:55 -0700

[diff] [blame]

748

self.view['status'] == 'ABORT' and

749

not self.view['reason']):

750

self.view['reason'] = 'Timed out, did not run.'

751

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

752

753

def __getitem__(self, key):

754

"""Overload __getitem__ so that we can still use []

755

756

@param key: A key of the tko test view.

757

758

@returns: The value of an attribute in the view.

759

760

"""

761

return self.view[key]

762

763

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

764

def __iter__(self):

765

"""Overload __iter__ so that it supports 'in' operator."""

766

return iter(self.view)

767

768

769

def get_testname(self):

770

"""Get test name that should be shown in the output.

771

772

Formalize the test_name we got from the test view.

773

774

Remove 'build/suite' prefix if any. And append 'experimental' prefix

775

for experimental tests if their names do not start with 'experimental'.

776

777

If one runs a test in control file via the following code,

778

job.runtest('my_Test', tag='tag')

779

for most of the cases, view['test_name'] would look like 'my_Test.tag'.

780

If this is the case, this method will just return the original

781

test name, i.e. 'my_Test.tag'.

782

783

There are four special cases.

784

1) A test view is for the suite job's SERVER_JOB.

Shuqian Zhao

2016-02-24 11:27:26 -0800

[diff] [blame]

785

In this case, this method will return 'Suite job'.

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

786

Simran Basi

2015-10-14 19:05:00 -0700

[diff] [blame]

787

2) A test view is of a child job or a solo test run not part of a

788

suite, and for a SERVER_JOB or CLIENT_JOB.

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

789

In this case, we will take the job name, remove the build/suite

790

prefix from the job name, and append the rest to 'SERVER_JOB'

791

or 'CLIENT_JOB' as a prefix. So the names returned by this

792

method will look like:

793

'experimental_Telemetry Smoothness Measurement_SERVER_JOB'

794

'experimental_dummy_Pass_SERVER_JOB'

795

'dummy_Fail_SERVER_JOB'

796

Fang Deng

2014-06-12 18:21:55 -0700

[diff] [blame]

797

3) A test view is of a suite job and its status is ABORT.

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

798

In this case, the view['test_name'] is the child job's name.

799

If it is an experimental test, 'experimental' will be part

800

of the name. For instance,

801

'lumpy-release/R35-5712.0.0/perf_v2/

802

experimental_Telemetry Smoothness Measurement'

803

'lumpy-release/R35-5712.0.0/dummy/experimental_dummy_Pass'

804

'lumpy-release/R35-5712.0.0/dummy/dummy_Fail'

805

The above names will be converted to the following:

806

'experimental_Telemetry Smoothness Measurement'

807

'experimental_dummy_Pass'

808

'dummy_Fail'

809

Fang Deng

2014-06-12 18:21:55 -0700

[diff] [blame]

810

4) A test view's status is of a suite job and its status is TEST_NA.

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

811

In this case, the view['test_name'] is the NAME field of the control

812

file. If it is an experimental test, 'experimental' will part of

813

the name. For instance,

814

'experimental_Telemetry Smoothness Measurement'

815

'experimental_dummy_Pass'

816

'dummy_Fail'

817

This method will not modify these names.

818

819

@returns: Test name after normalization.

820

821

"""

822

if self.testname is not None:

823

return self.testname

824

825

if (self.is_suite_view and

826

self.view['test_name'].startswith('SERVER_JOB')):

Shuqian Zhao

2016-02-24 11:27:26 -0800

[diff] [blame]

827

# Rename suite job's SERVER_JOB to 'Suite job'.

828

self.testname = self.SUITE_JOB

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

829

return self.testname

830

831

if (self.view['test_name'].startswith('SERVER_JOB') or

832

self.view['test_name'].startswith('CLIENT_JOB')):

833

# Append job name as a prefix for SERVER_JOB and CLIENT_JOB

834

testname= '%s_%s' % (self.view['job_name'], self.view['test_name'])

835

else:

836

testname = self.view['test_name']

837

experimental = self.is_experimental()

838

# Remove the build and suite name from testname if any.

839

testname = tools.get_test_name(

840

self.build, self.suite_name, testname)

841

# If an experimental test was aborted, testname

842

# would include the 'experimental' prefix already.

843

prefix = constants.EXPERIMENTAL_PREFIX if (

844

experimental and not

845

testname.startswith(constants.EXPERIMENTAL_PREFIX)) else ''

846

self.testname = prefix + testname

return self.testname

def is_relevant_suite_view(self):

851

"""Checks whether this is a suite view we should care about.

852

853

@returns: True if it is relevant. False otherwise.

854

"""

Shuqian Zhao

2016-02-24 11:27:26 -0800

[diff] [blame]

855

return (self.get_testname() == self.SUITE_JOB or

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

856

(self.is_suite_view and

857

not self.view['test_name'].startswith('CLIENT_JOB') and

858

not self.view['subdir']))

def is_test(self):

"""Return whether the view is for an actual test.

863

864

@returns True if the view is for an actual test.

865

False if the view is for SERVER_JOB or CLIENT_JOB.

866

867

"""

868

return not (self.view['test_name'].startswith('SERVER_JOB') or

869

self.view['test_name'].startswith('CLIENT_JOB'))

def is_retry(self):

"""Check whether the view is for a retry.

874

875

@returns: True, if the view is for a retry; False otherwise.

876

877

"""

878

return self.view['job_keyvals'].get('retry_original_job_id') is not None

879

880

881

def is_experimental(self):

882

"""Check whether a test view is for an experimental test.

883

884

@returns: True if it is for an experimental test, False otherwise.

885

886

"""

887

return (self.view['job_keyvals'].get('experimental') == 'True' or

888

tools.get_test_name(self.build, self.suite_name,

889

self.view['test_name']).startswith('experimental'))

890

891

Fang Deng

2014-06-12 18:21:55 -0700

[diff] [blame]

892

def hit_timeout(self):

893

"""Check whether the corresponding job has hit its own timeout.

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

894

Fang Deng

2014-06-12 18:21:55 -0700

[diff] [blame]

895

Note this method should not be called for those test views

896

that belongs to a suite job and are determined as irrelevant

897

by is_relevant_suite_view. This is because they are associated

898

to the suite job, whose job start/finished time make no sense

899

to an irrelevant test view.

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

900

Fang Deng

2014-06-12 18:21:55 -0700

[diff] [blame]

901

@returns: True if the corresponding afe job has hit timeout.

902

False otherwise.

903

"""

904

if (self.is_relevant_suite_view() and

Shuqian Zhao

2016-02-24 11:27:26 -0800

[diff] [blame]

905

self.get_testname() != self.SUITE_JOB):

906

# Any relevant suite test view except SUITE_JOB

Fang Deng

2014-06-12 18:21:55 -0700

[diff] [blame]

907

# did not hit its own timeout because it was not ever run.

908

return False

909

start = (datetime.strptime(

Dan Shi

2014-08-10 23:38:40 -0700

[diff] [blame]

910

self.view['job_started_time'], time_utils.TIME_FMT)

Fang Deng

2014-06-12 18:21:55 -0700

[diff] [blame]

911

if self.view['job_started_time'] else None)

912

end = (datetime.strptime(

Dan Shi

2014-08-10 23:38:40 -0700

[diff] [blame]

913

self.view['job_finished_time'], time_utils.TIME_FMT)

Fang Deng

2014-06-12 18:21:55 -0700

[diff] [blame]

914

if self.view['job_finished_time'] else None)

915

if not start or not end:

916

return False

917

else:

918

return ((end - start).total_seconds()/60.0

919

> self.afe_job.max_runtime_mins)

920

921

922

def is_aborted(self):

923

"""Check if the view was aborted.

924

Shuqian Zhao

2016-02-24 11:27:26 -0800

[diff] [blame]

925

For suite job and child job test views, we check job keyval

Fang Deng

2014-06-12 18:21:55 -0700

[diff] [blame]

926

'aborted_by' and test status.

927

928

For relevant suite job test views, we only check test status

929

because the suite job keyval won't make sense to individual

930

test views.

931

932

@returns: True if the test was as aborted, False otherwise.

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

933

934

"""

Fang Deng

2014-06-12 18:21:55 -0700

[diff] [blame]

935

936

if (self.is_relevant_suite_view() and

Shuqian Zhao

2016-02-24 11:27:26 -0800

[diff] [blame]

937

self.get_testname() != self.SUITE_JOB):

Fang Deng

2014-06-12 18:21:55 -0700

[diff] [blame]

938

return self.view['status'] == 'ABORT'

939

else:

940

return (bool(self.view['job_keyvals'].get('aborted_by')) and

941

self.view['status'] in ['ABORT', 'RUNNING'])

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

942

943

944

def is_in_fail_status(self):

Fang Deng

2014-09-12 14:16:11 -0700

[diff] [blame]

945

"""Check if the given test's status corresponds to a failure.

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

946

947

@returns: True if the test's status is FAIL or ERROR. False otherwise.

948

949

"""

950

# All the statuses tests can have when they fail.

951

return self.view['status'] in ['FAIL', 'ERROR', 'ABORT']

952

953

Fang Deng

2014-09-12 14:16:11 -0700

[diff] [blame]

954

def is_infra_test(self):

955

"""Check whether this is a test that only lab infra is concerned.

956

957

@returns: True if only lab infra is concerned, False otherwise.

958

959

"""

960

return self.get_testname() in self.INFRA_TESTS

961

962

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

963

def get_buildbot_link_reason(self):

964

"""Generate the buildbot link reason for the test.

965

966

@returns: A string representing the reason.

967

968

"""

969

return ('%s: %s' % (self.view['status'], self.view['reason'])

970

if self.view['reason'] else self.view['status'])

971

972

973

def get_job_id_owner_str(self):

974

"""Generate the job_id_owner string for a test.

975

976

@returns: A string which looks like 135036-username

977

978

"""

Simran Basi

2015-10-12 15:36:45 -0700

[diff] [blame]

979

return '%s-%s' % (self.view['afe_job_id'], self.user)

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

980

981

982

def get_bug_info(self, suite_job_keyvals):

983

"""Get the bug info from suite_job_keyvals.

984

985

If a bug has been filed for the test, its bug info (bug id and counts)

986

will be stored in the suite job's keyvals. This method attempts to

987

retrieve bug info of the test from |suite_job_keyvals|. It will return

988

None if no bug info is found. No need to check bug info if the view is

Shuqian Zhao

2016-02-24 11:27:26 -0800

[diff] [blame]

989

SUITE_JOB.

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

990

991

@param suite_job_keyvals: The job keyval dictionary of the suite job.

992

All the bug info about child jobs are stored in

993

suite job's keyvals.

994

995

@returns: None if there is no bug info, or a pair with the

996

id of the bug, and the count of the number of

997

times the bug has been seen.

998

999

"""

Shuqian Zhao

2016-02-24 11:27:26 -0800

[diff] [blame]

1000

if self.get_testname() == self.SUITE_JOB:

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1001

return None

1002

if (self.view['test_name'].startswith('SERVER_JOB') or

1003

self.view['test_name'].startswith('CLIENT_JOB')):

1004

# Append job name as a prefix for SERVER_JOB and CLIENT_JOB

1005

testname= '%s_%s' % (self.view['job_name'], self.view['test_name'])

1006

else:

1007

testname = self.view['test_name']

1008

1009

return tools.get_test_failure_bug_info(

1010

suite_job_keyvals, self.view['afe_job_id'],

testname)

def should_display_buildbot_link(self):

1015

"""Check whether a buildbot link should show for this view.

1016

Shuqian Zhao

2016-02-24 11:27:26 -0800

[diff] [blame]

1017

For suite job view, show buildbot link if it fails.

Fang Deng

2014-06-12 18:21:55 -0700

[diff] [blame]

1018

For normal test view,

1019

show buildbot link if it is a retry

1020

show buildbot link if it hits its own timeout.

1021

show buildbot link if it fails. This doesn't

1022

include the case where it was aborted but has

1023

not hit its own timeout (most likely it was aborted because

1024

suite has timed out).

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1025

1026

@returns: True if we should show the buildbot link.

1027

False otherwise.

1028

"""

1029

is_bad_status = (self.view['status'] != 'GOOD' and

1030

self.view['status'] != 'TEST_NA')

Shuqian Zhao

2016-02-24 11:27:26 -0800

[diff] [blame]

1031

if self.get_testname() == self.SUITE_JOB:

Fang Deng

2014-06-12 18:21:55 -0700

[diff] [blame]

return is_bad_status

else:

if self.is_retry():

return True

if is_bad_status:

return not self.is_aborted() or self.hit_timeout()

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1038

1039

Shuqian Zhao

2015-08-05 22:56:30 -0700

[diff] [blame]

1040

def get_control_file_attributes(self):

1041

"""Get the attributes from the control file of the test.

1042

1043

@returns: A list of test attribute or None.

1044

"""

1045

control_file = self.afe_job.control_file

1046

attributes = None

1047

if control_file:

1048

cd = control_data.parse_control_string(control_file)

1049

attributes = list(cd.attributes)

return attributes

Allen Li

2016-09-14 19:05:47 -0700

[diff] [blame]

1053

def log_buildbot_links(log_func, links):

1054

"""Output buildbot links to log.

1055

1056

@param log_func: Logging function to use.

1057

@param links: Iterable of LogLink instances.

1058

"""

1059

for link in links:

1060

for generated_link in link.GenerateBuildbotLinks():

1061

log_func(generated_link)

1062

wmatrix_link = link.GenerateWmatrixRetryLink()

1063

if wmatrix_link:

1064

log_func(wmatrix_link)

1065

1066

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1067

class ResultCollector(object):

Simran Basi

2015-10-14 19:05:00 -0700

[diff] [blame]

1068

"""Collect test results of a suite or a single test run.

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1069

1070

Once a suite job has finished, use this class to collect test results.

1071

`run` is the core method that is to be called first. Then the caller

1072

could retrieve information like return code, return message, is_aborted,

1073

and timings by accessing the collector's public attributes. And output

1074

the test results and links by calling the 'output_*' methods.

1075

1076

Here is a overview of what `run` method does.

1077

1078

1) Collect the suite job's results from tko_test_view_2.

1079

For the suite job, we only pull test views without a 'subdir'.

1080

A NULL subdir indicates that the test was _not_ executed. This could be

1081

that no child job was scheduled for this test or the child job got

1082

aborted before starts running.

1083

(Note 'SERVER_JOB'/'CLIENT_JOB' are handled specially)

1084

1085

2) Collect the child jobs' results from tko_test_view_2.

1086

For child jobs, we pull all the test views associated with them.

Allen Li

2016-09-14 19:05:47 -0700

[diff] [blame]

1087

(Note 'SERVER_JOB'/'CLIENT_JOB' are handled specially)

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1088

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1089

3) Generate web and buildbot links.

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1090

4) Compute timings of the suite run.

1091

5) Compute the return code based on test results.

1092

1093

@var _instance_server: The hostname of the server that is used

1094

to service the suite.

1095

@var _afe: The afe rpc client.

1096

@var _tko: The tko rpc client.

1097

@var _build: The build for which the suite is run,

1098

e.g. 'lumpy-release/R35-5712.0.0'

MK Ryu

2014-10-21 11:58:09 -0700

[diff] [blame]

1099

@var _board: The target board for which the suite is run,

1100

e.g., 'lumpy', 'link'.

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1101

@var _suite_name: The suite name, e.g. 'bvt', 'dummy'.

1102

@var _suite_job_id: The job id of the suite for which we are going to

1103

collect results.

Shuqian Zhao

2015-09-29 14:19:28 -0700

[diff] [blame]

1104

@var _original_suite_name: The suite name we record timing would be

1105

different from _suite_name when running

1106

suite_attr_wrapper.

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1107

@var _suite_views: A list of TestView objects, representing relevant

1108

test views of the suite job.

1109

@var _child_views: A list of TestView objects, representing test views

1110

of the child jobs.

1111

@var _test_views: A list of TestView objects, representing all test views

1112

from _suite_views and _child_views.

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1113

@var _web_links: A list of web links pointing to the results of jobs.

1114

@var _buildbot_links: A list of buildbot links for non-passing tests.

Simran Basi

2015-10-14 19:05:00 -0700

[diff] [blame]

1115

@var _solo_test_run: True if this is a single test run.

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1116

@var return_code: The exit code that should be returned by run_suite.

1117

@var return_message: Any message that should be displayed to explain

1118

the return code.

1119

@var is_aborted: Whether the suite was aborted or not.

1120

True, False or None (aborting status is unknown yet)

1121

@var timings: A Timing object that records the suite's timings.

"""

MK Ryu

2014-10-21 11:58:09 -0700

[diff] [blame]

1126

def __init__(self, instance_server, afe, tko, build, board,

Simran Basi

2015-10-12 15:36:45 -0700

[diff] [blame]

1127

suite_name, suite_job_id, original_suite_name=None,

Simran Basi

2015-10-14 19:05:00 -0700

[diff] [blame]

1128

user=None, solo_test_run=False):

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1129

self._instance_server = instance_server

1130

self._afe = afe

1131

self._tko = tko

1132

self._build = build

MK Ryu

2014-10-21 11:58:09 -0700

[diff] [blame]

1133

self._board = board

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1134

self._suite_name = suite_name

1135

self._suite_job_id = suite_job_id

Shuqian Zhao

2015-09-29 14:19:28 -0700

[diff] [blame]

1136

self._original_suite_name = original_suite_name or suite_name

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1137

self._suite_views = []

1138

self._child_views = []

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1139

self._test_views = []

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1140

self._retry_counts = {}

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1141

self._web_links = []

1142

self._buildbot_links = []

MK Ryu

2014-10-21 11:58:09 -0700

[diff] [blame]

1143

self._num_child_jobs = 0

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1144

self.return_code = None

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1145

self.return_message = ''

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1146

self.is_aborted = None

1147

self.timings = None

Simran Basi

2015-10-12 15:36:45 -0700

[diff] [blame]

1148

self._user = user or getpass.getuser()

Simran Basi

2015-10-14 19:05:00 -0700

[diff] [blame]

1149

self._solo_test_run = solo_test_run

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1150

1151

Allen Li

2016-09-14 19:05:47 -0700

[diff] [blame]

1152

@property

1153

def buildbot_links(self):

1154

"""Provide public access to buildbot links."""

1155

return self._buildbot_links

1156

1157

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1158

def _fetch_relevant_test_views_of_suite(self):

1159

"""Fetch relevant test views of the suite job.

1160

1161

For the suite job, there will be a test view for SERVER_JOB, and views

Allen Li

2016-09-14 19:05:47 -0700

[diff] [blame]

1162

for results of its child jobs. For example, assume we've created

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1163

a suite job (afe_job_id: 40) that runs dummy_Pass, dummy_Fail,

1164

dummy_Pass.bluetooth. Assume dummy_Pass was aborted before running while

1165

dummy_Path.bluetooth got TEST_NA as no duts have bluetooth.

1166

So the suite job's test views would look like

1167

_____________________________________________________________________

1168

1169

10 | 1000 |SERVER_JOB |---- |40 |GOOD

1170

11 | 1000 |dummy_Pass |NULL |40 |ABORT

1171

12 | 1000 |dummy_Fail.Fail |41-onwer/...|40 |FAIL

1172

13 | 1000 |dummy_Fail.Error |42-owner/...|40 |ERROR

1173

14 | 1000 |dummy_Pass.bluetooth|NULL |40 |TEST_NA

1174

1175

For a suite job, we only care about

1176

a) The test view for the suite job's SERVER_JOB

1177

b) The test views for real tests without a subdir. A NULL subdir

1178

indicates that a test didn't get executed.

1179

So, for the above example, we only keep test views whose test_idxs

1180

are 10, 11, 14.

1181

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1182

@returns: A list of TestView objects, representing relevant

1183

test views of the suite job.

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1184

1185

"""

Fang Deng

2014-06-12 18:21:55 -0700

[diff] [blame]

1186

suite_job = self._afe.get_jobs(id=self._suite_job_id)[0]

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1187

views = self._tko.run(call='get_detailed_test_views',

1188

afe_job_id=self._suite_job_id)

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1189

relevant_views = []

1190

for v in views:

Simran Basi

2015-10-14 19:05:00 -0700

[diff] [blame]

1191

v = TestView(v, suite_job, self._suite_name, self._build, self._user,

1192

solo_test_run=self._solo_test_run)

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1193

if v.is_relevant_suite_view():

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1194

relevant_views.append(v)

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1195

return relevant_views

1196

1197

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1198

def _compute_retry_count(self, view):

1199

"""Return how many times the test has been retried.

1200

1201

@param view: A TestView instance.

1202

@returns: An int value indicating the retry count.

1203

1204

"""

1205

old_job = view['job_keyvals'].get('retry_original_job_id')

count = 0

while old_job:

count += 1

views = self._tko.run(

1210

call='get_detailed_test_views', afe_job_id=old_job)

1211

old_job = (views[0]['job_keyvals'].get('retry_original_job_id')

if views else None)

return count

Simran Basi

2015-10-14 19:05:00 -0700

[diff] [blame]

1216

def _fetch_test_views_of_child_jobs(self, jobs=None):

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1217

"""Fetch test views of child jobs.

1218

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1219

@returns: A tuple (child_views, retry_counts)

1220

child_views is list of TestView objects, representing

1221

all valid views. retry_counts is a dictionary that maps

1222

test_idx to retry counts. It only stores retry

1223

counts that are greater than 0.

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1224

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1225

"""

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1226

child_views = []

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1227

retry_counts = {}

Simran Basi

2015-10-14 19:05:00 -0700

[diff] [blame]

1228

child_jobs = jobs or self._afe.get_jobs(parent_job_id=self._suite_job_id)

MK Ryu

2014-10-21 11:58:09 -0700

[diff] [blame]

1229

if child_jobs:

1230

self._num_child_jobs = len(child_jobs)

Fang Deng

2014-06-12 18:21:55 -0700

[diff] [blame]

1231

for job in child_jobs:

Simran Basi

2015-10-12 15:36:45 -0700

[diff] [blame]

1232

views = [TestView(v, job, self._suite_name, self._build, self._user)

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1233

for v in self._tko.run(

Fang Deng

2014-06-12 18:21:55 -0700

[diff] [blame]

1234

call='get_detailed_test_views', afe_job_id=job.id,

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1235

invalid=0)]

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1236

contains_test_failure = any(

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1237

v.is_test() and v['status'] != 'GOOD' for v in views)

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1238

for v in views:

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1239

if (v.is_test() or

1240

v['status'] != 'GOOD' and not contains_test_failure):

1241

# For normal test view, just keep it.

1242

# For SERVER_JOB or CLIENT_JOB, only keep it

1243

# if it fails and no other test failure.

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1244

child_views.append(v)

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1245

retry_count = self._compute_retry_count(v)

1246

if retry_count > 0:

1247

retry_counts[v['test_idx']] = retry_count

1248

return child_views, retry_counts

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1249

1250

1251

def _generate_web_and_buildbot_links(self):

1252

"""Generate web links and buildbot links."""

1253

# TODO(fdeng): If a job was aborted before it reaches Running

1254

# state, we read the test view from the suite job

1255

# and thus this method generates a link pointing to the

1256

# suite job's page for the aborted job. Need a fix.

1257

self._web_links = []

1258

self._buildbot_links = []

1259

# Bug info are stored in the suite job's keyvals.

Simran Basi

2015-10-14 19:05:00 -0700

[diff] [blame]

1260

if self._solo_test_run:

1261

suite_job_keyvals = {}

1262

else:

1263

suite_job_keyvals = self._suite_views[0]['job_keyvals']

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1264

for v in self._test_views:

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1265

retry_count = self._retry_counts.get(v['test_idx'], 0)

1266

bug_info = v.get_bug_info(suite_job_keyvals)

1267

job_id_owner = v.get_job_id_owner_str()

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1268

link = LogLink(

Allen Li

2016-09-02 11:52:34 -0700

[diff] [blame]

1269

anchor=v.get_testname(),

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1270

server=self._instance_server,

1271

job_string=job_id_owner,

Simran Basi

2015-02-03 15:50:18 -0800

[diff] [blame]

1272

bug_info=bug_info, retry_count=retry_count,

1273

testname=v.get_testname())

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1274

self._web_links.append(link)

1275

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1276

if v.should_display_buildbot_link():

1277

link.reason = v.get_buildbot_link_reason()

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1278

self._buildbot_links.append(link)

1279

1280

1281

def _record_timings(self):

1282

"""Record suite timings."""

1283

self.timings = Timings(self._suite_job_id)

1284

for v in self._test_views:

1285

self.timings.RecordTiming(v)

1286

1287

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1288

def _get_return_msg(self, code, tests_passed_after_retry):

1289

"""Return the proper message for a given return code.

1290

1291

@param code: An enum value of RETURN_CODES

1292

@param test_passed_after_retry: True/False, indicating

1293

whether there are test(s) that have passed after retry.

1294

1295

@returns: A string, representing the message.

1296

1297

"""

1298

if code == RETURN_CODES.INFRA_FAILURE:

Fang Deng

2014-09-12 14:16:11 -0700

[diff] [blame]

1299

return 'Suite job failed or provisioning failed.'

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1300

elif code == RETURN_CODES.SUITE_TIMEOUT:

1301

return ('Some test(s) was aborted before running,'

1302

' suite must have timed out.')

1303

elif code == RETURN_CODES.WARNING:

1304

if tests_passed_after_retry:

1305

return 'Some test(s) passed after retry.'

1306

else:

1307

return 'Some test(s) raised a warning.'

1308

elif code == RETURN_CODES.ERROR:

1309

return 'Some test(s) failed.'

else:

return ''

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1314

def _compute_return_code(self):

1315

"""Compute the exit code based on test results."""

1316

code = RETURN_CODES.OK

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1317

tests_passed_after_retry = False

1318

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1319

for v in self._test_views:

Fang Deng

2014-06-12 18:21:55 -0700

[diff] [blame]

1320

# The order of checking each case is important.

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1321

if v.is_experimental():

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1322

continue

Shuqian Zhao

2016-02-24 11:27:26 -0800

[diff] [blame]

1323

if v.get_testname() == TestView.SUITE_JOB:

Fang Deng

2014-06-12 18:21:55 -0700

[diff] [blame]

1324

if v.is_aborted() and v.hit_timeout():

1325

current_code = RETURN_CODES.SUITE_TIMEOUT

1326

elif v.is_in_fail_status():

1327

current_code = RETURN_CODES.INFRA_FAILURE

1328

elif v['status'] == 'WARN':

1329

current_code = RETURN_CODES.WARNING

1330

else:

1331

current_code = RETURN_CODES.OK

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1332

else:

Fang Deng

2014-06-12 18:21:55 -0700

[diff] [blame]

1333

if v.is_aborted() and v.is_relevant_suite_view():

1334

# The test was aborted before started

1335

# This gurantees that the suite has timed out.

1336

current_code = RETURN_CODES.SUITE_TIMEOUT

1337

elif v.is_aborted() and not v.hit_timeout():

1338

# The test was aborted, but

1339

# not due to a timeout. This is most likely

1340

# because the suite has timed out, but may

1341

# also because it was aborted by the user.

1342

# Since suite timing out is determined by checking

Shuqian Zhao

2016-02-24 11:27:26 -0800

[diff] [blame]

1343

# the suite job view, we simply ignore this view here.

Fang Deng

2014-06-12 18:21:55 -0700

[diff] [blame]

1344

current_code = RETURN_CODES.OK

1345

elif v.is_in_fail_status():

1346

# The test job failed.

Fang Deng

2014-09-12 14:16:11 -0700

[diff] [blame]

1347

if v.is_infra_test():

1348

current_code = RETURN_CODES.INFRA_FAILURE

1349

else:

1350

current_code = RETURN_CODES.ERROR

Fang Deng

2014-06-12 18:21:55 -0700

[diff] [blame]

1351

elif v['status'] == 'WARN':

1352

# The test/suite job raised a wanrning.

1353

current_code = RETURN_CODES.WARNING

1354

elif v.is_retry():

1355

# The test is a passing retry.

1356

current_code = RETURN_CODES.WARNING

1357

tests_passed_after_retry = True

1358

else:

1359

current_code = RETURN_CODES.OK

1360

code = get_worse_code(code, current_code)

1361

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1362

self.return_code = code

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1363

self.return_message = self._get_return_msg(

1364

code, tests_passed_after_retry)

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1365

1366

Allen Li

2016-09-02 11:52:34 -0700

[diff] [blame]

1367

def _make_test_results(self):

1368

"""Make TestResults for collected tests.

1369

1370

@returns: List of TestResult instances.

1371

"""

1372

test_results = []

1373

for test_view in self._test_views:

1374

test_result = TestResult(

1375

test_view=test_view,

1376

retry_count=self._retry_counts.get(test_view['test_idx'], 0))

1377

test_results.append(test_result)

return test_results

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1381

def output_results(self):

1382

"""Output test results, timings and web links."""

1383

# Output test results

Allen Li

2016-09-02 11:52:34 -0700

[diff] [blame]

1384

test_results = self._make_test_results()

1385

max_name_length = max(len(test_result.name)

1386

for test_result in test_results)

1387

for test_result in test_results:

1388

test_result.log_using(logging.info, max_name_length + 3)

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1389

# Output suite timings

1390

logging.info(self.timings)

1391

# Output links to test logs

1392

logging.info('\nLinks to test logs:')

1393

for link in self._web_links:

Allen Li

2016-09-14 15:19:20 -0700

[diff] [blame]

1394

logging.info(link.text_link)

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1395

logging.info('\n')

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1396

1397

Shuqian Zhao

2015-08-05 22:56:30 -0700

[diff] [blame]

1398

def get_results_dict(self):

1399

"""Write test results, timings and web links into a dict.

1400

1401

@returns: A dict of results in the format like:

1402

{

1403

'tests': {

1404

'test_1': {'status': 'PASSED', 'attributes': [1,2], ...}

1405

'test_2': {'status': 'FAILED', 'attributes': [1],...}

1406

}

1407

'suite_timings': {

1408

'download_start': '1998-07-17 00:00:00',

1409

'payload_download_end': '1998-07-17 00:00:05',

...

}

}

"""

output_dict = {}

tests_dict = output_dict.setdefault('tests', {})

1416

for v in self._test_views:

1417

test_name = v.get_testname()

1418

test_info = tests_dict.setdefault(test_name, {})

1419

test_info.update({

1420

'status': v['status'],

1421

'attributes': v.get_control_file_attributes() or list(),

1422

'reason': v['reason'],

1423

'retry_count': self._retry_counts.get(v['test_idx'], 0),

1424

})

1425

1426

# Write the links to test logs into the |tests_dict| of |output_dict|.

1427

# For test whose status is not 'GOOD', the link is also buildbot_link.

1428

for link in self._web_links:

1429

test_name = link.anchor.strip()

1430

test_info = tests_dict.get(test_name)

1431

if test_info:

1432

test_info['link_to_logs'] = link.url

1433

# Write the wmatrix link into the dict.

1434

if link in self._buildbot_links and link.testname:

Allen Li

2016-09-14 19:05:47 -0700

[diff] [blame]

1435

test_info['wmatrix_link'] \

1436

= reporting_utils.link_retry_url(link.testname)

Shuqian Zhao

2015-08-05 22:56:30 -0700

[diff] [blame]

1437

# Write the bug url into the dict.

1438

if link.bug_id:

Allen Li

2016-09-14 15:19:20 -0700

[diff] [blame]

1439

test_info['bug_url'] = link.bug_url

Shuqian Zhao

2015-08-05 22:56:30 -0700

[diff] [blame]

1440

1441

# Write the suite timings into |output_dict|

Allen Li

2c5d44b

2016-08-15 17:58:58 -0700

[diff] [blame]

1442

timings = self.timings

1443

if timings is not None:

1444

time_dict = output_dict.setdefault('suite_timings', {})

1445

time_dict.update({

1446

'download_start' : str(timings.download_start_time),

1447

'payload_download_end' : str(timings.payload_end_time),

1448

'suite_start' : str(timings.suite_start_time),

1449

'artifact_download_end' : str(timings.artifact_end_time),

1450

'tests_start' : str(timings.tests_start_time),

1451

'tests_end' : str(timings.tests_end_time),

1452

})

Shuqian Zhao

2015-08-05 22:56:30 -0700

[diff] [blame]

1453

1454

output_dict['suite_job_id'] = self._suite_job_id

return output_dict

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1459

def run(self):

1460

"""Collect test results.

1461

1462

This method goes through the following steps:

1463

Fetch relevent test views of the suite job.

1464

Fetch test views of child jobs

1465

Check whether the suite was aborted.

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1466

Generate links.

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1467

Calculate suite timings.

1468

Compute return code based on the test result.

1469

1470

"""

Simran Basi

2015-10-14 19:05:00 -0700

[diff] [blame]

1471

if self._solo_test_run:

1472

self._test_views, self.retry_count = (

1473

self._fetch_test_views_of_child_jobs(

1474

jobs=self._afe.get_jobs(id=self._suite_job_id)))

1475

else:

1476

self._suite_views = self._fetch_relevant_test_views_of_suite()

1477

self._child_views, self._retry_counts = (

1478

self._fetch_test_views_of_child_jobs())

1479

self._test_views = self._suite_views + self._child_views

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1480

# For hostless job in Starting status, there is no test view associated.

1481

# This can happen when a suite job in Starting status is aborted. When

1482

# the scheduler hits some limit, e.g., max_hostless_jobs_per_drone,

1483

# max_jobs_started_per_cycle, a suite job can stays in Starting status.

1484

if not self._test_views:

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1485

self.return_code = RETURN_CODES.INFRA_FAILURE

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1486

self.return_message = 'No test view was found.'

1487

return

1488

self.is_aborted = any([view['job_keyvals'].get('aborted_by')

1489

for view in self._suite_views])

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1490

self._generate_web_and_buildbot_links()

1491

self._record_timings()

1492

self._compute_return_code()

1493

1494

MK Ryu

2014-10-21 11:58:09 -0700

[diff] [blame]

1495

def gather_timing_stats(self):

1496

"""Collect timing related statistics."""

1497

# Send timings to statsd.

1498

self.timings.SendResultsToStatsd(

Shuqian Zhao

2015-09-29 14:19:28 -0700

[diff] [blame]

1499

self._original_suite_name, self._build, self._board)

MK Ryu

2014-10-21 11:58:09 -0700

[diff] [blame]

1500

1501

# Record suite runtime in metadata db.

Prathmesh Prabhu

a3713a0

2015-03-11 13:50:55 -0700

[diff] [blame]

1502

# Some failure modes can leave times unassigned, report sentinel value

1503

# in that case.

1504

runtime_in_secs = -1

1505

if (self.timings.tests_end_time is not None and

1506

self.timings.suite_start_time is not None):

Dan Shi

0723bf5

2015-06-24 10:52:38 -0700

[diff] [blame]

1507

runtime_in_secs = (self.timings.tests_end_time -

1508

self.timings.suite_start_time).total_seconds()

Prathmesh Prabhu

a3713a0

2015-03-11 13:50:55 -0700

[diff] [blame]

1509

MK Ryu

2014-10-21 11:58:09 -0700

[diff] [blame]

1510

job_overhead.record_suite_runtime(self._suite_job_id, self._suite_name,

1511

self._board, self._build, self._num_child_jobs, runtime_in_secs)

1512

1513

Allen Li

2016-12-08 13:51:31 -0800

[diff] [blame]

1514

def _make_builds_from_options(options):

1515

"""Create a dict of builds for creating a suite job.

Prashanth B

2014-05-08 18:01:27 -0700

[diff] [blame]

1516

Allen Li

2016-12-08 13:51:31 -0800

[diff] [blame]

1517

The returned dict maps version label prefixes to build names. Together,

1518

each key-value pair describes a complete label.

Prashanth B

2014-05-08 18:01:27 -0700

[diff] [blame]

1519

Allen Li

2016-12-08 13:51:31 -0800

[diff] [blame]

1520

@param options: SimpleNamespace from argument parsing.

1521

1522

@return: dict mapping version label prefixes to build names

Prashanth B

2014-05-08 18:01:27 -0700

[diff] [blame]

1523

"""

Dan Shi

2014-10-10 13:38:51 -0700

[diff] [blame]

1524

builds = {}

1525

if options.build:

Richard Barnette

6c2b70a

2017-01-26 13:40:51 -0800

[diff] [blame^]

1526

prefix = provision.get_version_label_prefix(options.build)

1527

builds[prefix] = options.build

Dan Shi

0723bf5

2015-06-24 10:52:38 -0700

[diff] [blame]

1528

if options.firmware_rw_build:

1529

builds[provision.FW_RW_VERSION_PREFIX] = options.firmware_rw_build

Dan Shi

2014-10-10 13:38:51 -0700

[diff] [blame]

1530

if options.firmware_ro_build:

1531

builds[provision.FW_RO_VERSION_PREFIX] = options.firmware_ro_build

Allen Li

2016-12-08 13:51:31 -0800

[diff] [blame]

return builds

@retry.retry(error.StageControlFileFailure, timeout_min=10)

1536

def create_suite(afe, options):

1537

"""Create a suite with retries.

1538

1539

@param afe: The afe object to insert the new suite job into.

1540

@param options: The options to use in creating the suite.

1541

1542

@return: The afe_job_id of the new suite job.

1543

"""

Prashanth B

2014-05-08 18:01:27 -0700

[diff] [blame]

1544

logging.info('%s Submitted create_suite_job rpc',

1545

diagnosis_utils.JobTimer.format_time(datetime.now()))

Allen Li

2016-12-08 12:50:22 -0800

[diff] [blame]

return afe.run(

'create_suite_job',

name=options.name,

board=options.board,

Allen Li

2016-12-08 13:51:31 -0800

[diff] [blame]

1550

builds=_make_builds_from_options(options),

Allen Li

2016-12-08 12:50:22 -0800

[diff] [blame]

1551

test_source_build=options.test_source_build,

Allen Li

0fd0889

2016-12-08 13:47:38 -0800

[diff] [blame]

1552

check_hosts=not options.no_wait,

Allen Li

2016-12-08 12:50:22 -0800

[diff] [blame]

1553

pool=options.pool,

1554

num=options.num,

Allen Li

d3758d4

2016-12-08 13:46:17 -0800

[diff] [blame]

1555

file_bugs=options.file_bugs,

Allen Li

603728a

2016-12-08 13:58:11 -0800

[diff] [blame]

1556

priority=options.priority,

Allen Li

2016-12-08 12:50:22 -0800

[diff] [blame]

1557

suite_args=options.suite_args,

Allen Li

0fd0889

2016-12-08 13:47:38 -0800

[diff] [blame]

1558

wait_for_results=not options.no_wait,

Allen Li

2016-12-08 12:50:22 -0800

[diff] [blame]

1559

timeout_mins=options.timeout_mins + options.delay_minutes,

1560

max_runtime_mins=options.max_runtime_mins + options.delay_minutes,

1561

job_retry=options.retry,

1562

max_retries=options.max_retries,

1563

suite_min_duts=options.suite_min_duts,

Allen Li

40599a3

2016-12-08 13:23:35 -0800

[diff] [blame]

1564

offload_failures_only=options.offload_failures_only,

Allen Li

2016-12-08 12:50:22 -0800

[diff] [blame]

1565

run_prod_code=options.run_prod_code,

1566

delay_minutes=options.delay_minutes,

1567

)

Prashanth B

2014-05-08 18:01:27 -0700

[diff] [blame]

1568

1569

Allen Li

2016-08-16 14:19:08 -0700

[diff] [blame]

1570

SuiteResult = namedtuple('SuiteResult', ['return_code', 'output_dict'])

1571

1572

Shuqian Zhao

2015-08-05 22:56:30 -0700

[diff] [blame]

1573

def main_without_exception_handling(options):

Aviv Keshet

1480c4a

2013-03-21 16:38:31 -0700

[diff] [blame]

1574

"""

Shuqian Zhao

2015-08-05 22:56:30 -0700

[diff] [blame]

1575

run_suite script without exception handling.

Shuqian Zhao

d235107

2015-08-06 01:48:23 +0000

[diff] [blame]

1576

Shuqian Zhao

2015-08-05 22:56:30 -0700

[diff] [blame]

1577

@param options: The parsed options.

1578

1579

@returns: A tuple contains the return_code of run_suite and the dictionary

1580

of the output.

1581

1582

"""

Shuqian Zhao

ab1bedc

2015-06-02 11:12:28 -0700

[diff] [blame]

1583

# If indicate to use the new style suite control file, convert the args

1584

if options.use_suite_attr:

1585

options = change_options_for_suite_attr(options)

1586

Chris Masone

3a85064

2012-07-11 11:11:18 -0700

[diff] [blame]

1587

log_name = 'run_suite-default.log'

Fang Deng

2015-02-20 14:49:47 -0800

[diff] [blame]

1588

if options.build:

Chris Masone

3a85064

2012-07-11 11:11:18 -0700

[diff] [blame]

1589

# convert build name from containing / to containing only _

1590

log_name = 'run_suite-%s.log' % options.build.replace('/', '_')

1591

log_dir = os.path.join(common.autotest_dir, 'logs')

1592

if os.path.exists(log_dir):

1593

log_name = os.path.join(log_dir, log_name)

Alex Miller

88762a8

2013-09-04 15:41:28 -0700

[diff] [blame]

1594

MK Ryu

8318435

2014-12-10 14:59:40 -0800

[diff] [blame]

1595

utils.setup_logging(logfile=log_name)

Alex Miller

88762a8

2013-09-04 15:41:28 -0700

[diff] [blame]

1596

John Carey

1425d29

2016-09-30 15:25:09 -0700

[diff] [blame]

1597

if not options.bypass_labstatus and not options.web:

Fang Deng

2014-09-25 10:18:48 -0700

[diff] [blame]

1598

utils.check_lab_status(options.build)

Prashanth Balasubramanian

673016d

2014-11-04 10:40:48 -0800

[diff] [blame]

1599

instance_server = (options.web if options.web else

1600

instance_for_pool(options.pool))

Alex Miller

2013-10-30 15:18:57 -0700

[diff] [blame]

1601

afe = frontend_wrappers.RetryingAFE(server=instance_server,

Simran Basi

25effe3

2013-11-26 13:02:11 -0800

[diff] [blame]

1602

timeout_min=options.afe_timeout_mins,

Chris Masone

8ac6671

2012-02-15 14:21:02 -0800

[diff] [blame]

1603

delay_sec=options.delay_sec)

Alex Miller

2013-10-30 15:18:57 -0700

[diff] [blame]

1604

logging.info('Autotest instance: %s', instance_server)

Chris Masone

359c0fd

2012-03-13 15:18:59 -0700

[diff] [blame]

1605

Dan Shi

20952c1

2014-05-14 17:07:38 -0700

[diff] [blame]

1606

rpc_helper = diagnosis_utils.RPCHelper(afe)

Fang Deng

2015-02-20 14:49:47 -0800

[diff] [blame]

1607

is_real_time = True

Chris Masone

986459e

2012-04-11 11:36:48 -0700

[diff] [blame]

1608

if options.mock_job_id:

1609

job_id = int(options.mock_job_id)

Fang Deng

2015-02-20 14:49:47 -0800

[diff] [blame]

1610

existing_job = afe.get_jobs(id=job_id, finished=True)

if existing_job:

is_real_time = False

else:

existing_job = afe.get_jobs(id=job_id)

1615

if existing_job:

1616

job_created_on = time_utils.date_string_to_epoch_time(

1617

existing_job[0].created_on)

1618

else:

1619

raise utils.TestLabException('Failed to retrieve job: %d' % job_id)

Chris Masone

986459e

2012-04-11 11:36:48 -0700

[diff] [blame]

1620

else:

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1621

try:

Fang Deng

2015-02-20 14:49:47 -0800

[diff] [blame]

1622

rpc_helper.check_dut_availability(options.board, options.pool,

Ningning Xia

f2c206c

2016-04-13 14:15:51 -0700

[diff] [blame]

1623

options.minimum_duts,

1624

options.skip_duts_check)

Prashanth B

2014-05-08 18:01:27 -0700

[diff] [blame]

1625

job_id = create_suite(afe, options)

Fang Deng

2015-02-20 14:49:47 -0800

[diff] [blame]

1626

job_created_on = time.time()

Allen Li

2016-08-18 12:09:32 -0700

[diff] [blame]

1627

except diagnosis_utils.NotEnoughDutsError as e:

1628

e.add_suite_name(options.name)

1629

e.add_build(options.test_source_build)

1630

pool_health_bug = reporting.PoolHealthBug(e)

1631

bug_id = reporting.Reporter().report(pool_health_bug).bug_id

1632

if bug_id is not None:

Allen Li

2016-09-14 15:19:20 -0700

[diff] [blame]

1633

logging.info(annotations.StepLink(

1634

text='Pool Health Bug',

1635

url=reporting_utils.link_crbug(bug_id)))

Allen Li

2016-08-18 12:09:32 -0700

[diff] [blame]

1636

e.add_bug_id(bug_id)

1637

raise e

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1638

except (error.CrosDynamicSuiteException,

1639

error.RPCException, proxy.JSONRPCException) as e:

Allen Li

c3aa769

2016-08-08 11:45:00 -0700

[diff] [blame]

1640

logging.exception('Error Message: %s', e)

1641

return (RETURN_CODES.INFRA_FAILURE, {'return_message': str(e)})

Prashanth B

2014-05-08 18:01:27 -0700

[diff] [blame]

1642

except AttributeError:

Shuqian Zhao

2015-08-05 22:56:30 -0700

[diff] [blame]

1643

return (RETURN_CODES.INVALID_OPTIONS, {})

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1644

Prashanth B

2014-03-14 12:36:29 -0700

[diff] [blame]

1645

job_timer = diagnosis_utils.JobTimer(

Fang Deng

2015-02-20 14:49:47 -0800

[diff] [blame]

1646

job_created_on, float(options.timeout_mins))

Aviv Keshet

9afee5e

2014-10-09 16:33:09 -0700

[diff] [blame]

1647

job_url = reporting_utils.link_job(job_id,

1648

instance_server=instance_server)

Prashanth B

2014-03-14 12:36:29 -0700

[diff] [blame]

1649

logging.info('%s Created suite job: %s',

1650

job_timer.format_time(job_timer.job_created_time),

Aviv Keshet

9afee5e

2014-10-09 16:33:09 -0700

[diff] [blame]

1651

job_url)

Allen Li

2016-09-14 15:19:20 -0700

[diff] [blame]

1652

logging.info(annotations.StepLink(

1653

text='Link to suite',

1654

url=job_url))

Aviv Keshet

db321de

2015-04-10 19:09:58 -0700

[diff] [blame]

1655

1656

if options.create_and_return:

Shuqian Zhao

2015-08-05 22:56:30 -0700

[diff] [blame]

1657

msg = '--create_and_return was specified, terminating now.'

1658

logging.info(msg)

1659

return (RETURN_CODES.OK, {'return_message':msg})

Aviv Keshet

db321de

2015-04-10 19:09:58 -0700

[diff] [blame]

1660

Allen Li

2016-09-14 14:44:59 -0700

[diff] [blame]

1661

if options.no_wait:

Allen Li

2016-08-16 14:19:08 -0700

[diff] [blame]

1662

return _handle_job_nowait(job_id, options, instance_server)

Allen Li

2016-09-14 14:44:59 -0700

[diff] [blame]

1663

else:

1664

return _handle_job_wait(afe, job_id, options, job_timer, is_real_time)

Allen Li

2016-08-16 14:19:08 -0700

[diff] [blame]

1665

1666

1667

def _handle_job_wait(afe, job_id, options, job_timer, is_real_time):

1668

"""Handle suite job synchronously.

1669

1670

@param afe AFE instance.

1671

@param job_id Suite job id.

1672

@param options Parsed options.

1673

@param job_timer JobTimer for suite job.

1674

@param is_real_time Whether or not to handle job timeout.

1675

1676

@return SuiteResult of suite job.

1677

"""

1678

code = RETURN_CODES.OK

1679

output_dict = {}

1680

rpc_helper = diagnosis_utils.RPCHelper(afe)

1681

instance_server = afe.server

1682

while not afe.get_jobs(id=job_id, finished=True):

1683

# Note that this call logs output, preventing buildbot's

1684

# 9000 second silent timeout from kicking in. Let there be no

1685

# doubt, this is a hack. The timeout is from upstream buildbot and

1686

# this is the easiest work around.

1687

if job_timer.first_past_halftime():

1688

rpc_helper.diagnose_job(job_id, instance_server)

1689

if job_timer.debug_output_timer.poll():

1690

logging.info('The suite job has another %s till timeout.',

1691

job_timer.timeout_hours - job_timer.elapsed_time())

1692

time.sleep(10)

xixuan

2017-01-13 12:51:22 +0800

[diff] [blame]

1693

logging.info('%s Suite job is finished.',

1694

diagnosis_utils.JobTimer.format_time(datetime.now()))

Allen Li

2016-08-16 14:19:08 -0700

[diff] [blame]

1695

# For most cases, ResultCollector should be able to determine whether

1696

# a suite has timed out by checking information in the test view.

1697

# However, occationally tko parser may fail on parsing the

1698

# job_finished time from the job's keyval file. So we add another

1699

# layer of timeout check in run_suite. We do the check right after

1700

# the suite finishes to make it as accurate as possible.

1701

# There is a minor race condition here where we might have aborted

1702

# for some reason other than a timeout, and the job_timer thinks

1703

# it's a timeout because of the jitter in waiting for results.

1704

# The consequence would be that run_suite exits with code

1705

# SUITE_TIMEOUT while it should have returned INFRA_FAILURE

1706

# instead, which should happen very rarely.

1707

# Note the timeout will have no sense when using -m option.

1708

is_suite_timeout = job_timer.is_suite_timeout()

1709

1710

# Extract the original suite name to record timing.

1711

original_suite_name = get_original_suite_name(options.name,

1712

options.suite_args)

1713

# Start collecting test results.

xixuan

2017-01-13 12:51:22 +0800

[diff] [blame]

1714

logging.info('%s Start collectint test results and dump them to json.',

1715

diagnosis_utils.JobTimer.format_time(datetime.now()))

Alex Miller

2013-10-30 15:18:57 -0700

[diff] [blame]

1716

TKO = frontend_wrappers.RetryingTKO(server=instance_server,

Simran Basi

25effe3

2013-11-26 13:02:11 -0800

[diff] [blame]

1717

timeout_min=options.afe_timeout_mins,

Chris Masone

8ac6671

2012-02-15 14:21:02 -0800

[diff] [blame]

1718

delay_sec=options.delay_sec)

Allen Li

2016-08-16 14:19:08 -0700

[diff] [blame]

1719

collector = ResultCollector(instance_server=instance_server,

1720

afe=afe, tko=TKO, build=options.build,

1721

board=options.board,

1722

suite_name=options.name,

1723

suite_job_id=job_id,

1724

original_suite_name=original_suite_name)

1725

collector.run()

1726

# Dump test outputs into json.

1727

output_dict = collector.get_results_dict()

1728

output_dict['autotest_instance'] = instance_server

1729

if not options.json_dump:

1730

collector.output_results()

1731

code = collector.return_code

1732

return_message = collector.return_message

1733

if is_real_time:

1734

# Do not record stats if the suite was aborted (either by a user

1735

# or through the golo rpc).

1736

# Also do not record stats if is_aborted is None, indicating

1737

# aborting status is unknown yet.

1738

if collector.is_aborted == False:

xixuan

2017-01-13 12:51:22 +0800

[diff] [blame]

1739

logging.info('%s Gathering timing stats for the suite job.',

1740

diagnosis_utils.JobTimer.format_time(datetime.now()))

Allen Li

2016-08-16 14:19:08 -0700

[diff] [blame]

1741

collector.gather_timing_stats()

J. Richard Barnette

712eb40

2013-08-13 18:03:00 -0700

[diff] [blame]

1742

Allen Li

2016-08-16 14:19:08 -0700

[diff] [blame]

1743

if collector.is_aborted == True and is_suite_timeout:

1744

# There are two possible cases when a suite times out.

1745

# 1. the suite job was aborted due to timing out

1746

# 2. the suite job succeeded, but some child jobs

1747

# were already aborted before the suite job exited.

1748

# The case 2 was handled by ResultCollector,

1749

# here we handle case 1.

1750

old_code = code

1751

code = get_worse_code(

1752

code, RETURN_CODES.SUITE_TIMEOUT)

1753

if old_code != code:

1754

return_message = 'Suite job timed out.'

1755

logging.info('Upgrade return code from %s to %s '

1756

'because suite job has timed out.',

1757

RETURN_CODES.get_string(old_code),

1758

RETURN_CODES.get_string(code))

Fang Deng

2015-02-20 14:49:47 -0800

[diff] [blame]

1759

xixuan

2017-01-13 12:51:22 +0800

[diff] [blame]

1760

logging.info('\n %s Attempting to display pool info: %s',

1761

diagnosis_utils.JobTimer.format_time(datetime.now()),

1762

options.pool)

Allen Li

2016-08-16 14:19:08 -0700

[diff] [blame]

1763

try:

1764

# Add some jitter to make up for any latency in

1765

# aborting the suite or checking for results.

1766

cutoff = (job_timer.timeout_hours +

Allen Li

2016-09-14 14:44:59 -0700

[diff] [blame]

1767

timedelta(hours=0.3))

Allen Li

2016-08-16 14:19:08 -0700

[diff] [blame]

1768

rpc_helper.diagnose_pool(

1769

options.board, options.pool, cutoff)

Allen Li

d4aa2fb

2016-12-08 14:03:54 -0800

[diff] [blame]

1770

except proxy.JSONRPCException:

Allen Li

2016-08-16 14:19:08 -0700

[diff] [blame]

1771

logging.warning('Unable to display pool info.')

Aviv Keshet

6b1122d

2016-06-20 13:29:52 -0700

[diff] [blame]

1772

Allen Li

2016-08-16 14:19:08 -0700

[diff] [blame]

1773

# And output return message.

1774

if return_message:

1775

logging.info('Reason: %s', return_message)

1776

output_dict['return_message'] = return_message

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1777

xixuan

2017-01-13 12:51:22 +0800

[diff] [blame]

1778

logging.info('\n %s Output below this line is for buildbot consumption:',

1779

diagnosis_utils.JobTimer.format_time(datetime.now()))

Allen Li

2016-09-14 19:05:47 -0700

[diff] [blame]

1780

log_buildbot_links(logging.info, collector._buildbot_links)

Allen Li

2016-08-16 14:19:08 -0700

[diff] [blame]

1781

return SuiteResult(code, output_dict)

Prashanth B

2014-03-14 12:36:29 -0700

[diff] [blame]

1782

Allen Li

2016-08-16 14:19:08 -0700

[diff] [blame]

1783

1784

def _handle_job_nowait(job_id, options, instance_server):

1785

"""Handle suite job asynchronously.

1786

1787

@param job_id Suite job id.

1788

@param options Parsed options.

1789

@param instance_server Autotest instance hostname.

1790

1791

@return SuiteResult of suite job.

1792

"""

1793

logging.info('Created suite job: %r', job_id)

1794

link = LogLink(options.name, instance_server,

1795

'%s-%s' % (job_id, getpass.getuser()))

1796

for generate_link in link.GenerateBuildbotLinks():

1797

logging.info(generate_link)

1798

logging.info('--no_wait specified; Exiting.')

1799

return SuiteResult(RETURN_CODES.OK,

1800

{'return_message': '--no_wait specified; Exiting.'})

Chris Masone

2012-02-14 14:18:01 -0800

[diff] [blame]

1801

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1802

Fang Deng

2014-09-18 17:52:06 -0700

[diff] [blame]

1803

def main():

1804

"""Entry point."""

Simran Basi

9f364a6

2015-12-07 14:15:19 -0800

[diff] [blame]

1805

utils.verify_not_root_user()

Shuqian Zhao

2015-08-05 22:56:30 -0700

[diff] [blame]

1806

Allen Li

2016-09-14 14:44:59 -0700

[diff] [blame]

1807

parser = make_parser()

1808

options = parser.parse_args()

Fang Deng

2014-09-18 17:52:06 -0700

[diff] [blame]

1809

try:

Shuqian Zhao

2015-08-05 22:56:30 -0700

[diff] [blame]

1810

# Silence the log when dumping outputs into json

1811

if options.json_dump:

1812

logging.disable(logging.CRITICAL)

1813

Allen Li

2016-09-14 14:44:59 -0700

[diff] [blame]

1814

if not verify_options(options):

Shuqian Zhao

2015-08-05 22:56:30 -0700

[diff] [blame]

1815

parser.print_help()

1816

code = RETURN_CODES.INVALID_OPTIONS

Allen Li

2016-09-14 19:05:47 -0700

[diff] [blame]

1817

output_dict = {'return_code': RETURN_CODES.INVALID_OPTIONS}

Shuqian Zhao

2015-08-05 22:56:30 -0700

[diff] [blame]

1818

else:

Allen Li

2016-09-14 19:05:47 -0700

[diff] [blame]

1819

code, output_dict = main_without_exception_handling(options)

Shuqian Zhao

2015-12-07 18:01:11 -0800

[diff] [blame]

1820

except diagnosis_utils.BoardNotAvailableError as e:

Allen Li

2016-09-14 19:05:47 -0700

[diff] [blame]

1821

output_dict = {'return_message': 'Skipping testing: %s' % e.message}

Shuqian Zhao

2015-12-07 18:01:11 -0800

[diff] [blame]

1822

code = RETURN_CODES.BOARD_NOT_AVAILABLE

1823

logging.info(output_dict['return_message'])

1824

except utils.TestLabException as e:

Allen Li

2016-09-14 19:05:47 -0700

[diff] [blame]

1825

output_dict = {'return_message': 'TestLabException: %s' % e}

Shuqian Zhao

2015-12-07 18:01:11 -0800

[diff] [blame]

1826

code = RETURN_CODES.INFRA_FAILURE

1827

logging.exception(output_dict['return_message'])

Fang Deng

2014-09-18 17:52:06 -0700

[diff] [blame]

1828

except Exception as e:

Allen Li

2016-09-14 19:05:47 -0700

[diff] [blame]

1829

output_dict = {

1830

'return_message': 'Unhandled run_suite exception: %s' % e

1831

}

Shuqian Zhao

2015-12-07 18:01:11 -0800

[diff] [blame]

1832

code = RETURN_CODES.INFRA_FAILURE

1833

logging.exception(output_dict['return_message'])

Shuqian Zhao

2015-08-05 22:56:30 -0700

[diff] [blame]

1834

1835

# Dump test outputs into json.

1836

output_dict['return_code'] = code

Shuqian Zhao

2015-08-05 22:56:30 -0700

[diff] [blame]

1837

if options.json_dump:

Allen Li

2016-09-14 19:05:47 -0700

[diff] [blame]

1838

output_json = json.dumps(output_dict, sort_keys=True)

Shuqian Zhao

74ca35d

2015-11-25 14:33:50 -0800

[diff] [blame]

1839

output_json_marked = '#JSON_START#%s#JSON_END#' % output_json.strip()

1840

sys.stdout.write(output_json_marked)

Fang Deng

2014-09-25 10:18:48 -0700

[diff] [blame]

1841

1842

logging.info('Will return from run_suite with status: %s',

1843

RETURN_CODES.get_string(code))

Gabe Black

2015-02-04 23:55:15 -0800

[diff] [blame]

1844

autotest_stats.Counter('run_suite.%s' %

1845

RETURN_CODES.get_string(code)).increment()

Fang Deng

2014-09-25 10:18:48 -0700

[diff] [blame]

1846

return code

Fang Deng

2014-09-18 17:52:06 -0700

[diff] [blame]

1847

1848

Chris Masone