Blame - site_utils/run_suite.py - platform/external/autotest

2012-02-14 14:18:01 -0800

[diff] [blame]

#!/usr/bin/python

#

# Use of this source code is governed by a BSD-style license that can be

5

# found in the LICENSE file.

6

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

7

Chris Masone

2012-02-14 14:18:01 -0800

[diff] [blame]

8

"""Tool for running suites of tests and waiting for completion.

9

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

10

The desired test suite will be scheduled with autotest. By default,

Chris Masone

2012-02-14 14:18:01 -0800

[diff] [blame]

11

this tool will block until the job is complete, printing a summary

12

at the end. Error conditions result in exceptions.

13

14

This is intended for use only with Chrome OS test suits that leverage the

15

dynamic suite infrastructure in server/cros/dynamic_suite.py.

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

16

17

This script exits with one of the following codes:

18

0 - OK: Suite finished successfully

19

1 - ERROR: Test(s) failed, or hits its own timeout

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

20

2 - WARNING: Test(s) raised a warning or passed on retry, none failed/timed out.

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

21

3 - INFRA_FAILURE: Infrastructure related issues, e.g.

22

* Lab is down

23

* Too many duts (defined as a constant) in repair failed status

24

* Suite job issues, like bug in dynamic suite,

25

user aborted the suite, lose a drone/all devservers/rpc server,

26

0 tests ran, etc.

Fang Deng

2014-09-12 14:16:11 -0700

[diff] [blame]

27

* provision failed

28

TODO(fdeng): crbug.com/413918, reexamine treating all provision

29

failures as INFRA failures.

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

30

4 - SUITE_TIMEOUT: Suite timed out, some tests ran,

31

none failed by the time the suite job was aborted. This will cover,

32

but not limited to, the following cases:

33

* A devserver failure that manifests as a timeout

34

* No DUTs available midway through a suite

35

* Provision/Reset/Cleanup took longer time than expected for new image

36

* A regression in scheduler tick time.

Fang Deng

2014-09-25 10:18:48 -0700

[diff] [blame]

37

5- BOARD_NOT_AVAILABLE: If there is no host for the requested board/pool.

38

6- INVALID_OPTIONS: If options are not valid.

Chris Masone

2012-02-14 14:18:01 -0800

[diff] [blame]

39

"""

40

Allen Li

2016-09-14 14:44:59 -0700

[diff] [blame]

41

import argparse

42

import ast

Allen Li

2016-08-16 14:19:08 -0700

[diff] [blame]

43

from collections import namedtuple

Chris Masone

2012-09-06 16:00:07 -0700

[diff] [blame]

44

from datetime import datetime

Allen Li

2016-09-14 14:44:59 -0700

[diff] [blame]

45

from datetime import timedelta

import getpass

import json

import logging

import os

import re

import sys

import time

Chris Masone

2012-09-06 16:00:07 -0700

[diff] [blame]

53

Chris Masone

2012-02-14 14:18:01 -0800

[diff] [blame]

54

import common

Allen Li

2016-09-14 15:19:20 -0700

[diff] [blame]

55

from chromite.lib import buildbot_annotations as annotations

56

Shuqian Zhao

2015-08-05 22:56:30 -0700

[diff] [blame]

57

from autotest_lib.client.common_lib import control_data

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

58

from autotest_lib.client.common_lib import error

J. Richard Barnette

3cbd76b

2013-11-27 12:11:25 -0800

[diff] [blame]

59

from autotest_lib.client.common_lib import global_config, enum

60

from autotest_lib.client.common_lib import priorities

Dan Shi

2014-08-10 23:38:40 -0700

[diff] [blame]

61

from autotest_lib.client.common_lib import time_utils

Gabe Black

2015-02-04 23:55:15 -0800

[diff] [blame]

62

from autotest_lib.client.common_lib.cros.graphite import autotest_stats

Prashanth B

2014-05-08 18:01:27 -0700

[diff] [blame]

63

from autotest_lib.client.common_lib.cros import retry

Prashanth B

2014-03-14 12:36:29 -0700

[diff] [blame]

64

from autotest_lib.frontend.afe.json_rpc import proxy

J. Richard Barnette

3cbd76b

2013-11-27 12:11:25 -0800

[diff] [blame]

65

from autotest_lib.server import utils

Dan Shi

2014-10-10 13:38:51 -0700

[diff] [blame]

66

from autotest_lib.server.cros import provision

Chris Masone

44e4d6c

2012-08-15 14:25:53 -0700

[diff] [blame]

67

from autotest_lib.server.cros.dynamic_suite import constants

Chris Masone

b493555

2012-08-14 12:05:54 -0700

[diff] [blame]

68

from autotest_lib.server.cros.dynamic_suite import frontend_wrappers

Allen Li

2016-08-18 12:09:32 -0700

[diff] [blame]

69

from autotest_lib.server.cros.dynamic_suite import reporting

Prashanth B

2014-03-14 12:36:29 -0700

[diff] [blame]

70

from autotest_lib.server.cros.dynamic_suite import reporting_utils

J. Richard Barnette

e7b98bb

2013-08-21 16:34:16 -0700

[diff] [blame]

71

from autotest_lib.server.cros.dynamic_suite import tools

Prashanth B

2014-03-14 12:36:29 -0700

[diff] [blame]

72

from autotest_lib.site_utils import diagnosis_utils

MK Ryu

2014-10-21 11:58:09 -0700

[diff] [blame]

73

from autotest_lib.site_utils import job_overhead

74

Chris Masone

2012-02-14 14:18:01 -0800

[diff] [blame]

75

Chris Masone

1120cdf

2012-02-27 17:35:07 -0800

[diff] [blame]

76

CONFIG = global_config.global_config

77

Allen Li

2016-09-14 19:05:47 -0700

[diff] [blame]

78

_DEFAULT_AUTOTEST_INSTANCE = CONFIG.get_config_value(

79

'SERVER', 'hostname', type=str)

80

_URL_PATTERN = CONFIG.get_config_value('CROS', 'log_url_pattern', type=str)

Simran Basi

2015-02-03 15:50:18 -0800

[diff] [blame]

81

Simran Basi

22aa9fe

2012-12-07 16:37:09 -0800

[diff] [blame]

82

# Return code that will be sent back to autotest_rpc_server.py

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

83

RETURN_CODES = enum.Enum(

Fang Deng

2014-09-18 17:52:06 -0700

[diff] [blame]

84

'OK', 'ERROR', 'WARNING', 'INFRA_FAILURE', 'SUITE_TIMEOUT',

Fang Deng

2014-09-25 10:18:48 -0700

[diff] [blame]

85

'BOARD_NOT_AVAILABLE', 'INVALID_OPTIONS')

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

86

# The severity of return code. If multiple codes

87

# apply, the script should always return the severest one.

88

# E.g. if we have a test failure and the suite also timed out,

89

# we should return 'ERROR'.

90

SEVERITY = {RETURN_CODES.OK: 0,

91

RETURN_CODES.WARNING: 1,

Fang Deng

2014-09-12 14:16:11 -0700

[diff] [blame]

92

RETURN_CODES.SUITE_TIMEOUT: 2,

93

RETURN_CODES.INFRA_FAILURE: 3,

Fang Deng

2014-09-25 10:18:48 -0700

[diff] [blame]

94

RETURN_CODES.ERROR: 4}

Simran Basi

bf6ebc9

2016-05-27 15:35:05 -0700

[diff] [blame]

95

ANDROID_BUILD_REGEX = r'.+/.+/P?([0-9]+|LATEST)'

Simran Basi

adf3131

2016-06-28 14:23:05 -0700

[diff] [blame]

96

ANDROID_TESTBED_BUILD_REGEX = ANDROID_BUILD_REGEX + '(,|(#[0-9]+))'

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

97

98

99

def get_worse_code(code1, code2):

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

100

"""Compare the severity of two codes and return the worse code.

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

101

102

@param code1: An enum value of RETURN_CODES

103

@param code2: An enum value of RETURN_CODES

104

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

105

@returns: the more severe one between code1 and code2.

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

106

107

"""

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

108

return code1 if SEVERITY[code1] >= SEVERITY[code2] else code2

Simran Basi

22aa9fe

2012-12-07 16:37:09 -0800

[diff] [blame]

109

Chris Masone

dfa0beba

2012-03-19 11:41:47 -0700

[diff] [blame]

110

Allen Li

2016-09-14 14:44:59 -0700

[diff] [blame]

111

def bool_str(x):

112

"""Boolean string type for option arguments.

113

114

@param x: string representation of boolean value.

"""

if x == 'True':

return True

elif x == 'False':

return False

else:

raise argparse.ArgumentTypeError(

123

'%s is not one of True or False' % (x,))

124

125

Allen Li

603728a

2016-12-08 13:58:11 -0800

[diff] [blame^]

126

def _get_priority_value(x):

127

"""Convert a priority representation to its int value.

128

129

Priorities can be described either by an int value (possibly as a string)

130

or a name string. This function coerces both forms to an int value.

131

132

This function is intended for casting command line arguments during

133

parsing.

134

135

@param x: priority value as an int, int string, or name string

136

137

@returns: int value of priority

"""

try:

return int(x)

except ValueError:

try:

return priorities.Priority.get_value(x)

144

except AttributeError:

145

raise argparse.ArgumentTypeError(

146

'Unknown priority level %s. Try one of %s.'

147

% (x, ', '.join(priorities.Priority.names)))

148

149

Allen Li

2016-09-14 14:44:59 -0700

[diff] [blame]

150

def make_parser():

151

"""Make ArgumentParser instance for run_suite.py."""

152

parser = argparse.ArgumentParser(

153

usage="%(prog)s [options]")

154

parser.add_argument("-b", "--board", dest="board")

155

parser.add_argument("-i", "--build", dest="build")

156

parser.add_argument(

157

"-w", "--web", dest="web", default=None,

158

help="Address of a webserver to receive suite requests.")

159

parser.add_argument(

160

'--firmware_rw_build', dest='firmware_rw_build', default=None,

161

help='Firmware build to be installed in dut RW firmware.')

162

parser.add_argument(

163

'--firmware_ro_build', dest='firmware_ro_build', default=None,

164

help='Firmware build to be installed in dut RO firmware.')

165

parser.add_argument(

166

'--test_source_build', dest='test_source_build', default=None,

167

help=('Build that contains the test code, '

168

'e.g., it can be the value of `--build`, '

169

'`--firmware_rw_build` or `--firmware_ro_build` '

170

'arguments. Default is None, that is, use the test '

171

'code from `--build` (CrOS image)'))

Chris Masone

359c0fd

2012-03-13 15:18:59 -0700

[diff] [blame]

172

# This should just be a boolean flag, but the autotest "proxy" code

173

# can't handle flags that don't take arguments.

Allen Li

2016-09-14 14:44:59 -0700

[diff] [blame]

174

parser.add_argument(

175

"-n", "--no_wait", dest="no_wait", default=False, type=bool_str,

176

help='Must pass "True" or "False" if used.')

Alex Miller

0032e93

2013-10-23 12:52:58 -0700

[diff] [blame]

177

# If you really want no pool, --pool="" will do it. USE WITH CARE.

Allen Li

2016-09-14 14:44:59 -0700

[diff] [blame]

178

parser.add_argument("-p", "--pool", dest="pool", default="suites")

179

parser.add_argument("-s", "--suite_name", dest="name")

180

parser.add_argument("-a", "--afe_timeout_mins", type=int,

181

dest="afe_timeout_mins", default=30)

182

parser.add_argument("-t", "--timeout_mins", type=int,

183

dest="timeout_mins", default=1440)

184

parser.add_argument("-x", "--max_runtime_mins", type=int,

185

dest="max_runtime_mins", default=1440)

186

parser.add_argument("-d", "--delay_sec", type=int,

187

dest="delay_sec", default=10)

188

parser.add_argument("-m", "--mock_job_id", dest="mock_job_id",

189

help="Attach to existing job id for already running "

190

"suite, and creates report.")

Aviv Keshet

db321de

2015-04-10 19:09:58 -0700

[diff] [blame]

191

# NOTE(akeshet): This looks similar to --no_wait, but behaves differently.

192

# --no_wait is passed in to the suite rpc itself and affects the suite,

193

# while this does not.

Allen Li

2016-09-14 14:44:59 -0700

[diff] [blame]

194

parser.add_argument("-c", "--create_and_return", dest="create_and_return",

195

action="store_true",

196

help="Create the suite and print the job id, then "

197

"finish immediately.")

198

parser.add_argument("-u", "--num", dest="num", type=int, default=None,

199

help="Run on at most NUM machines.")

Alex Miller

f43d0eb

2012-10-01 13:43:13 -0700

[diff] [blame]

200

# Same boolean flag issue applies here.

Allen Li

2016-09-14 14:44:59 -0700

[diff] [blame]

201

parser.add_argument(

202

"-f", "--file_bugs", dest="file_bugs", default=False, type=bool_str,

203

help=('File bugs on test failures. Must pass "True" or '

204

'"False" if used.'))

205

parser.add_argument("-l", "--bypass_labstatus", dest="bypass_labstatus",

206

action="store_true", help='Bypass lab status check.')

Alex Miller

88762a8

2013-09-04 15:41:28 -0700

[diff] [blame]

207

# We allow either a number or a string for the priority. This way, if you

208

# know what you're doing, one can specify a custom priority level between

209

# other levels.

Allen Li

2016-09-14 14:44:59 -0700

[diff] [blame]

210

parser.add_argument("-r", "--priority", dest="priority",

Allen Li

603728a

2016-12-08 13:58:11 -0800

[diff] [blame^]

211

type=_get_priority_value,

Allen Li

2016-09-14 14:44:59 -0700

[diff] [blame]

212

default=priorities.Priority.DEFAULT,

213

action="store",

214

help="Priority of suite. Either numerical value, or "

215

"one of (" + ", ".join(priorities.Priority.names)

216

+ ").")

217

parser.add_argument(

218

'--retry', dest='retry', default=False, type=bool_str, action='store',

219

help='Enable test retry. Must pass "True" or "False" if used.')

220

parser.add_argument('--max_retries', dest='max_retries', default=None,

221

type=int, action='store', help='Maximum retries'

222

'allowed at suite level. No limit if not specified.')

223

parser.add_argument('--minimum_duts', dest='minimum_duts', type=int,

224

default=0, action='store',

225

help='Check that the pool has at least such many '

226

'healthy machines, otherwise suite will not run. '

227

'Default to 0.')

228

parser.add_argument('--suite_min_duts', dest='suite_min_duts', type=int,

229

default=0, action='store',

230

help='Preferred minimum number of machines. Scheduler '

231

'will prioritize on getting such many machines for '

232

'the suite when it is competing with another suite '

233

'that has a higher priority but already got minimum '

234

'machines it needs. Default to 0.')

235

parser.add_argument("--suite_args", dest="suite_args",

236

default=None, action="store",

237

help="Argument string for suite control file.")

238

parser.add_argument('--offload_failures_only',

Allen Li

40599a3

2016-12-08 13:23:35 -0800

[diff] [blame]

239

dest='offload_failures_only', type=bool_str,

240

action='store', default=False,

Allen Li

2016-09-14 14:44:59 -0700

[diff] [blame]

241

help='Only enable gs_offloading for failed tests. '

242

'Successful tests will be deleted. Must pass "True"'

243

' or "False" if used.')

244

parser.add_argument('--use_suite_attr', dest='use_suite_attr',

245

action='store_true', default=False,

246

help='Advanced. Run the suite based on ATTRIBUTES of '

247

'control files, rather than SUITE.')

248

parser.add_argument('--json_dump', dest='json_dump', action='store_true',

249

default=False,

250

help='Dump the output of run_suite to stdout.')

251

parser.add_argument(

252

'--run_prod_code', dest='run_prod_code',

253

action='store_true', default=False,

254

help='Run the test code that lives in prod aka the test '

255

'code currently on the lab servers.')

256

parser.add_argument(

257

'--delay_minutes', type=int, default=0,

258

help=('Delay the creation of test jobs for a given '

259

'number of minutes. This argument can be used to '

260

'force provision jobs being delayed, which helps '

261

'to distribute loads across devservers.'))

262

parser.add_argument(

263

'--skip_duts_check', dest='skip_duts_check', action='store_true',

264

default=False, help='If True, skip minimum available DUTs check')

265

return parser

Chris Masone

2012-02-14 14:18:01 -0800

[diff] [blame]

266

267

Allen Li

2016-09-14 14:44:59 -0700

[diff] [blame]

268

def verify_options(options):

269

"""Verify the validity of options.

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

270

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

271

@param options: The parsed options to verify.

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

272

273

@returns: True if verification passes, False otherwise.

274

275

"""

Fang Deng

2015-02-20 14:49:47 -0800

[diff] [blame]

276

if options.mock_job_id and (

277

not options.build or not options.name or not options.board):

278

print ('When using -m, need to specify build, board and suite '

279

'name which you have used for creating the original job')

280

return False

281

else:

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

282

if not options.build:

283

print 'Need to specify which build to use'

284

return False

285

if not options.board:

286

print 'Need to specify board'

287

return False

288

if not options.name:

289

print 'Need to specify suite name'

290

return False

291

if options.num is not None and options.num < 1:

292

print 'Number of machines must be more than 0, if specified.'

293

return False

Allen Li

2016-09-14 14:44:59 -0700

[diff] [blame]

294

if not options.retry and options.max_retries is not None:

Fang Deng

443f195

2015-01-02 14:51:49 -0800

[diff] [blame]

295

print 'max_retries can only be used with --retry=True'

296

return False

Shuqian Zhao

ab1bedc

2015-06-02 11:12:28 -0700

[diff] [blame]

297

if options.use_suite_attr and options.suite_args is not None:

298

print ('The new suite control file cannot parse the suite_args: %s.'

299

'Please not specify any suite_args here.' % options.suite_args)

300

return False

Allen Li

2016-09-14 14:44:59 -0700

[diff] [blame]

301

if options.no_wait and options.retry:

Fang Deng

058860c

2014-05-15 15:41:50 -0700

[diff] [blame]

302

print 'Test retry is not available when using --no_wait=True'

Dan Shi

2014-10-10 13:38:51 -0700

[diff] [blame]

303

# Default to use the test code in CrOS build.

304

if not options.test_source_build and options.build:

305

options.test_source_build = options.build

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

return True

Shuqian Zhao

2015-06-02 11:12:28 -0700

[diff] [blame]

309

def change_options_for_suite_attr(options):

310

"""Change options to be prepared to run the suite_attr_wrapper.

311

312

If specify 'use_suite_attr' from the cmd line, it indicates to run the

313

new style suite control file, suite_attr_wrapper. Then, change the

Allen Li

2016-08-18 12:09:32 -0700

[diff] [blame]

314

options.name to 'suite_attr_wrapper', change the options.suite_args to

Shuqian Zhao

ab1bedc

2015-06-02 11:12:28 -0700

[diff] [blame]

315

include the arguments needed by suite_attr_wrapper.

316

317

@param options: The verified options.

318

319

@returns: The changed options.

320

321

"""

322

# Convert the suite_name to attribute boolean expression.

323

if type(options.name) is str:

324

attr_filter_val = 'suite:%s' % options.name

325

else:

326

attr_filter_val = ' or '.join(['suite:%s' % x for x in options.name])

327

328

# change the suite_args to be a dict of arguments for suite_attr_wrapper

329

# if suite_args is not None, store the values in 'other_args' of the dict

330

args_dict = {}

331

args_dict['attr_filter'] = attr_filter_val

332

options.suite_args = str(args_dict)

333

options.name = 'suite_attr_wrapper'

return options

Allen Li

2016-09-02 11:52:34 -0700

[diff] [blame]

338

class TestResult(object):

Aviv Keshet

1480c4a

2013-03-21 16:38:31 -0700

[diff] [blame]

339

Allen Li

2016-09-02 11:52:34 -0700

[diff] [blame]

340

"""Represents the result of a TestView."""

Aviv Keshet

1480c4a

2013-03-21 16:38:31 -0700

[diff] [blame]

341

Allen Li

2016-09-02 11:52:34 -0700

[diff] [blame]

342

def __init__(self, test_view, retry_count=0):

343

"""Initialize instance.

344

345

@param test_view: TestView instance.

346

@param retry_count: Retry count for test. Optional.

347

"""

348

self.name = test_view.get_testname()

349

self.status = test_view['status']

350

self.reason = test_view['reason']

351

self.retry_count = retry_count

352

353

_PRETTY_STATUS_MAP = {

354

'GOOD': '[ PASSED ]',

355

'TEST_NA': '[ INFO ]',

}

@property

def _pretty_status(self):

360

"""Pretty status string."""

361

return self._PRETTY_STATUS_MAP.get(self.status, '[ FAILED ]')

362

363

def log_using(self, log_function, name_column_width):

364

"""Log the test result using the given log function.

365

366

@param log_function: Log function to use. Example: logging.info

367

@param name_column_width: Width of name column for formatting.

368

"""

369

padded_name = self.name.ljust(name_column_width)

370

log_function('%s%s', padded_name, self._pretty_status)

371

if self.status != 'GOOD':

372

log_function('%s %s: %s', padded_name, self.status, self.reason)

373

if self.retry_count > 0:

374

log_function('%s retry_count: %s', padded_name, self.retry_count)

Chris Masone

2012-02-14 14:18:01 -0800

[diff] [blame]

375

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

376

Shuqian Zhao

2015-09-29 14:19:28 -0700

[diff] [blame]

377

def get_original_suite_name(suite_name, suite_args):

378

"""Get the original suite name when running suite_attr_wrapper.

379

380

@param suite_name: the name of the suite launched in afe. When it is

381

suite_attr_wrapper, the suite that actually running is

382

specified in the suite_args.

383

@param suite_args: the parsed option which contains the original suite name.

384

385

@returns: the original suite name.

386

387

"""

388

if suite_name == 'suite_attr_wrapper':

389

attrs = ast.literal_eval(suite_args).get('attr_filter', '')

390

suite_list = ([x[6:] for x in re.split('[() ]', attrs)

391

if x and x.startswith('suite:')])

392

return suite_list[0] if suite_list else suite_name

return suite_name

Craig Harrison

2012-08-23 16:48:49 -0700

[diff] [blame]

396

class LogLink(object):

J. Richard Barnette

2013-08-23 11:24:21 -0700

[diff] [blame]

397

"""Information needed to record a link in the logs.

Craig Harrison

2012-08-23 16:48:49 -0700

[diff] [blame]

398

J. Richard Barnette

2013-08-23 11:24:21 -0700

[diff] [blame]

399

Depending on context and the information provided at

400

construction time, the link may point to either to log files for

401

a job, or to a bug filed for a failure in the job.

Craig Harrison

2012-08-23 16:48:49 -0700

[diff] [blame]

402

J. Richard Barnette

2013-08-23 11:24:21 -0700

[diff] [blame]

403

@var anchor The link text.

404

@var url The link url.

405

@var bug_id Id of a bug to link to, or None.

406

"""

407

Kevin Cheng

2bdd372

2016-03-24 21:30:52 -0700

[diff] [blame]

408

# A list of tests that don't get retried so skip the dashboard.

409

_SKIP_RETRY_DASHBOARD = ['provision']

410

Ningning Xia

2016-04-19 14:06:03 -0700

[diff] [blame]

411

_BUG_LINK_PREFIX = 'Auto-Bug'

412

_LOG_LINK_PREFIX = 'Test-Logs'

413

J. Richard Barnette

2013-08-23 11:24:21 -0700

[diff] [blame]

414

Prashanth Balasubramanian

ae43721

2014-10-27 11:17:26 -0700

[diff] [blame]

415

@classmethod

416

def get_bug_link(cls, bug_id):

417

"""Generate a bug link for the given bug_id.

418

419

@param bug_id: The id of the bug.

420

@return: A link, eg: https://crbug.com/<bug_id>.

421

"""

Allen Li

2016-09-14 19:05:47 -0700

[diff] [blame]

422

return reporting_utils.link_crbug(bug_id)

Prashanth Balasubramanian

ae43721

2014-10-27 11:17:26 -0700

[diff] [blame]

423

424

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

425

def __init__(self, anchor, server, job_string, bug_info=None, reason=None,

Simran Basi

2015-02-03 15:50:18 -0800

[diff] [blame]

426

retry_count=0, testname=None):

J. Richard Barnette

2013-08-23 11:24:21 -0700

[diff] [blame]

427

"""Initialize the LogLink by generating the log URL.

428

429

@param anchor The link text.

Alex Miller

2013-10-30 15:18:57 -0700

[diff] [blame]

430

@param server The hostname of the server this suite ran on.

J. Richard Barnette

2013-08-23 11:24:21 -0700

[diff] [blame]

431

@param job_string The job whose logs we'd like to link to.

432

@param bug_info Info about the bug, if one was filed.

Fang Deng

53c6ff5

2014-02-24 17:51:24 -0800

[diff] [blame]

433

@param reason A string representing the reason of failure if any.

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

434

@param retry_count How many times the test has been retried.

Simran Basi

2015-02-03 15:50:18 -0800

[diff] [blame]

435

@param testname Optional Arg that supplies the testname.

Craig Harrison

2012-08-23 16:48:49 -0700

[diff] [blame]

436

"""

437

self.anchor = anchor

Allen Li

2016-09-14 19:05:47 -0700

[diff] [blame]

438

self.url = _URL_PATTERN % (server, job_string)

Fang Deng

53c6ff5

2014-02-24 17:51:24 -0800

[diff] [blame]

439

self.reason = reason

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

440

self.retry_count = retry_count

Simran Basi

2015-02-03 15:50:18 -0800

[diff] [blame]

441

self.testname = testname

J. Richard Barnette

2013-08-23 11:24:21 -0700

[diff] [blame]

442

if bug_info:

443

self.bug_id, self.bug_count = bug_info

444

else:

445

self.bug_id = None

446

self.bug_count = None

Craig Harrison

2012-08-23 16:48:49 -0700

[diff] [blame]

447

448

Allen Li

2016-09-14 15:19:20 -0700

[diff] [blame]

449

@property

450

def bug_url(self):

451

"""URL of associated bug."""

452

if self.bug_id:

453

return reporting_utils.link_crbug(self.bug_id)

else:

return None

@property

def _bug_count_text(self):

460

"""Return bug count as human friendly text."""

461

if self.bug_count is None:

462

bug_info = 'unknown number of reports'

463

elif self.bug_count == 1:

464

bug_info = 'new report'

465

else:

466

bug_info = '%s reports' % self.bug_count

return bug_info

Ningning Xia

2016-04-19 14:06:03 -0700

[diff] [blame]

470

def GenerateBuildbotLinks(self):

J. Richard Barnette

2013-08-23 11:24:21 -0700

[diff] [blame]

471

"""Generate a link formatted to meet buildbot expectations.

472

Ningning Xia

2016-04-19 14:06:03 -0700

[diff] [blame]

473

If there is a bug associated with this link, report a link to the bug

474

and a link to the job logs;

J. Richard Barnette

2013-08-23 11:24:21 -0700

[diff] [blame]

475

otherwise report a link to the job logs.

Craig Harrison

2012-08-23 16:48:49 -0700

[diff] [blame]

476

Ningning Xia

2016-04-19 14:06:03 -0700

[diff] [blame]

477

@return A list of links formatted for the buildbot log annotator.

Craig Harrison

2012-08-23 16:48:49 -0700

[diff] [blame]

478

"""

Ningning Xia

2016-04-19 14:06:03 -0700

[diff] [blame]

479

bug_info_strings = []

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

480

info_strings = []

Ningning Xia

2016-04-19 14:06:03 -0700

[diff] [blame]

481

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

482

if self.retry_count > 0:

483

info_strings.append('retry_count: %d' % self.retry_count)

Ningning Xia

2016-04-19 14:06:03 -0700

[diff] [blame]

484

bug_info_strings.append('retry_count: %d' % self.retry_count)

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

485

Fang Deng

53c6ff5

2014-02-24 17:51:24 -0800

[diff] [blame]

486

if self.reason:

Allen Li

2016-09-14 15:19:20 -0700

[diff] [blame]

487

bug_info_strings.append(self.reason)

488

info_strings.append(self.reason)

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

489

Allen Li

2016-09-14 15:19:20 -0700

[diff] [blame]

490

# Add the bug link to buildbot_links

491

if self.bug_url:

492

bug_info_strings.append(self._bug_count_text)

Ningning Xia

2016-04-19 14:06:03 -0700

[diff] [blame]

493

Allen Li

2016-09-14 15:19:20 -0700

[diff] [blame]

494

bug_anchor_text = self._format_anchor_text(self._BUG_LINK_PREFIX,

495

bug_info_strings)

496

497

yield annotations.StepLink(bug_anchor_text, self.bug_url)

498

499

anchor_text = self._format_anchor_text(self._LOG_LINK_PREFIX,

500

info_strings)

501

yield annotations.StepLink(anchor_text, self.url)

Ningning Xia

2016-04-19 14:06:03 -0700

[diff] [blame]

502

503

Allen Li

2016-09-14 15:19:20 -0700

[diff] [blame]

504

def _format_anchor_text(self, prefix, info_strings):

505

"""Format anchor text given a prefix and info strings.

Ningning Xia

2016-04-19 14:06:03 -0700

[diff] [blame]

506

507

@param prefix The prefix of the anchor text.

508

@param info_strings The infos presented in the anchor text.

509

@return A anchor_text with the right prefix and info strings.

510

"""

Allen Li

2016-09-14 15:19:20 -0700

[diff] [blame]

511

anchor_text = '[{prefix}]: {anchor}'.format(

512

prefix=prefix,

513

anchor=self.anchor.strip())

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

514

if info_strings:

Allen Li

2016-09-14 15:19:20 -0700

[diff] [blame]

515

info_text = ', '.join(info_strings)

516

anchor_text += ': ' + info_text

Ningning Xia

2016-04-19 14:06:03 -0700

[diff] [blame]

517

return anchor_text

Craig Harrison

2012-08-23 16:48:49 -0700

[diff] [blame]

518

Allen Li

2016-09-14 15:19:20 -0700

[diff] [blame]

519

@property

520

def text_link(self):

521

"""Link to the job's logs, for consumption by a human.

Craig Harrison

2012-08-23 16:48:49 -0700

[diff] [blame]

522

Craig Harrison

d845157

2012-08-31 10:29:33 -0700

[diff] [blame]

523

@return A link formatted for human readability.

Craig Harrison

2012-08-23 16:48:49 -0700

[diff] [blame]

524

"""

Aviv Keshet

269848b

2016-10-03 00:13:19 -0700

[diff] [blame]

525

return '%s %s' % (self.anchor, self.url)

Craig Harrison

2012-08-23 16:48:49 -0700

[diff] [blame]

526

527

Simran Basi

2015-02-03 15:50:18 -0800

[diff] [blame]

528

def GenerateWmatrixRetryLink(self):

529

"""Generate a link to the wmatrix retry dashboard.

530

531

@return A link formatted for the buildbot log annotator.

532

"""

Allen Li

2016-09-14 15:19:20 -0700

[diff] [blame]

533

if not self.testname or self.testname in self._SKIP_RETRY_DASHBOARD:

Simran Basi

2015-02-03 15:50:18 -0800

[diff] [blame]

534

return None

Allen Li

2016-09-14 15:19:20 -0700

[diff] [blame]

535

return annotations.StepLink(

536

text='[Flake-Dashboard]: %s' % self.testname,

537

url=reporting_utils.link_retry_url(self.testname))

Simran Basi

2015-02-03 15:50:18 -0800

[diff] [blame]

538

539

Chris Masone

2012-04-30 14:35:28 -0700

[diff] [blame]

540

class Timings(object):

541

"""Timings for important events during a suite.

542

543

All timestamps are datetime.datetime objects.

544

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

545

@var suite_job_id: the afe job id of the suite job for which

546

we are recording the timing for.

547

@var download_start_time: the time the devserver starts staging

548

the build artifacts. Recorded in create_suite_job.

549

@var payload_end_time: the time when the artifacts only necessary to start

550

installsing images onto DUT's are staged.

551

Recorded in create_suite_job.

552

@var artifact_end_time: the remaining artifacts are downloaded after we kick

553

off the reimaging job, at which point we record

554

artifact_end_time. Recorded in dynamic_suite.py.

Chris Masone

2012-04-30 14:35:28 -0700

[diff] [blame]

555

@var suite_start_time: the time the suite started.

Chris Masone

2012-04-30 14:35:28 -0700

[diff] [blame]

556

@var tests_start_time: the time the first test started running.

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

557

@var tests_end_time: the time the last test finished running.

Chris Masone

2012-04-30 14:35:28 -0700

[diff] [blame]

558

"""

beeps

2013-03-22 13:15:49 -0700

[diff] [blame]

559

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

560

def __init__(self, suite_job_id):

561

self.suite_job_id = suite_job_id

562

# Timings related to staging artifacts on devserver.

563

self.download_start_time = None

564

self.payload_end_time = None

565

self.artifact_end_time = None

beeps

2013-03-22 13:15:49 -0700

[diff] [blame]

566

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

567

# The test_start_time, but taken off the view that corresponds to the

568

# suite instead of an individual test.

569

self.suite_start_time = None

beeps

2013-03-22 13:15:49 -0700

[diff] [blame]

570

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

571

# Earliest and Latest tests in the set of TestViews passed to us.

572

self.tests_start_time = None

573

self.tests_end_time = None

574

Chris Masone

2012-04-30 14:35:28 -0700

[diff] [blame]

575

Chris Masone

d9f13c5

2012-08-29 10:37:08 -0700

[diff] [blame]

576

def RecordTiming(self, view):

577

"""Given a test report view, extract and record pertinent time info.

Chris Masone

2012-04-30 14:35:28 -0700

[diff] [blame]

578

579

get_detailed_test_views() returns a list of entries that provide

580

info about the various parts of a suite run. This method can take

581

any one of these entries and look up timestamp info we might want

582

and record it.

583

Chris Masone

2012-09-06 16:00:07 -0700

[diff] [blame]

584

If timestamps are unavailable, datetime.datetime.min/max will be used.

585

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

586

@param view: A TestView object.

Chris Masone

2012-04-30 14:35:28 -0700

[diff] [blame]

587

"""

Chris Masone

2012-09-06 16:00:07 -0700

[diff] [blame]

588

start_candidate = datetime.min

589

end_candidate = datetime.max

590

if view['test_started_time']:

Dan Shi

2014-08-10 23:38:40 -0700

[diff] [blame]

591

start_candidate = time_utils.time_string_to_datetime(

592

view['test_started_time'])

Chris Masone

2012-09-06 16:00:07 -0700

[diff] [blame]

593

if view['test_finished_time']:

Dan Shi

2014-08-10 23:38:40 -0700

[diff] [blame]

594

end_candidate = time_utils.time_string_to_datetime(

595

view['test_finished_time'])

Chris Masone

2012-09-06 16:00:07 -0700

[diff] [blame]

596

Shuqian Zhao

2016-02-24 11:27:26 -0800

[diff] [blame]

597

if view.get_testname() == TestView.SUITE_JOB:

Chris Masone

2012-04-30 14:35:28 -0700

[diff] [blame]

598

self.suite_start_time = start_candidate

Chris Masone

2012-04-30 14:35:28 -0700

[diff] [blame]

599

else:

600

self._UpdateFirstTestStartTime(start_candidate)

601

self._UpdateLastTestEndTime(end_candidate)

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

602

if view['afe_job_id'] == self.suite_job_id and 'job_keyvals' in view:

Chris Masone

d9f13c5

2012-08-29 10:37:08 -0700

[diff] [blame]

603

keyvals = view['job_keyvals']

Dan Shi

2014-08-10 23:38:40 -0700

[diff] [blame]

604

self.download_start_time = time_utils.time_string_to_datetime(

605

keyvals.get(constants.DOWNLOAD_STARTED_TIME),

606

handle_type_error=True)

beeps

2013-03-22 13:15:49 -0700

[diff] [blame]

607

Dan Shi

2014-08-10 23:38:40 -0700

[diff] [blame]

608

self.payload_end_time = time_utils.time_string_to_datetime(

609

keyvals.get(constants.PAYLOAD_FINISHED_TIME),

610

handle_type_error=True)

beeps

2013-03-22 13:15:49 -0700

[diff] [blame]

611

Dan Shi

2014-08-10 23:38:40 -0700

[diff] [blame]

612

self.artifact_end_time = time_utils.time_string_to_datetime(

613

keyvals.get(constants.ARTIFACT_FINISHED_TIME),

614

handle_type_error=True)

Chris Masone

44e4d6c

2012-08-15 14:25:53 -0700

[diff] [blame]

615

Chris Masone

2012-04-30 14:35:28 -0700

[diff] [blame]

616

617

def _UpdateFirstTestStartTime(self, candidate):

618

"""Update self.tests_start_time, iff candidate is an earlier time.

619

620

@param candidate: a datetime.datetime object.

621

"""

622

if not self.tests_start_time or candidate < self.tests_start_time:

623

self.tests_start_time = candidate

624

625

626

def _UpdateLastTestEndTime(self, candidate):

627

"""Update self.tests_end_time, iff candidate is a later time.

628

629

@param candidate: a datetime.datetime object.

630

"""

631

if not self.tests_end_time or candidate > self.tests_end_time:

632

self.tests_end_time = candidate

def __str__(self):

return ('\n'

'Suite timings:\n'

Chris Masone

2012-05-01 16:52:31 -0700

[diff] [blame]

638

'Downloads started at %s\n'

639

'Payload downloads ended at %s\n'

Chris Masone

2012-04-30 14:35:28 -0700

[diff] [blame]

640

'Suite started at %s\n'

Chris Masone

2012-05-01 16:52:31 -0700

[diff] [blame]

641

'Artifact downloads ended (at latest) at %s\n'

Chris Masone

2012-04-30 14:35:28 -0700

[diff] [blame]

642

'Testing started at %s\n'

Chris Masone

2012-05-01 16:52:31 -0700

[diff] [blame]

643

'Testing ended at %s\n' % (self.download_start_time,

644

self.payload_end_time,

645

self.suite_start_time,

Chris Masone

2012-05-01 16:52:31 -0700

[diff] [blame]

646

self.artifact_end_time,

Chris Masone

2012-04-30 14:35:28 -0700

[diff] [blame]

647

self.tests_start_time,

648

self.tests_end_time))

649

650

beeps

2013-03-22 13:15:49 -0700

[diff] [blame]

651

def SendResultsToStatsd(self, suite, build, board):

652

"""

653

Sends data to statsd.

654

655

1. Makes a data_key of the form: run_suite.$board.$branch.$suite

656

eg: stats/gauges/<hostname>/run_suite/<board>/<branch>/<suite>/

657

2. Computes timings for several start and end event pairs.

Alex Miller

9a1987a

2013-08-21 15:51:16 -0700

[diff] [blame]

658

3. Sends all timing values to statsd.

beeps

2013-03-22 13:15:49 -0700

[diff] [blame]

659

660

@param suite: scheduled suite that we want to record the results of.

661

@param build: the build that this suite ran on.

662

eg: 'lumpy-release/R26-3570.0.0'

663

@param board: the board that this suite ran on.

664

"""

665

if sys.version_info < (2, 7):

666

logging.error('Sending run_suite perf data to statsd requires'

667

'python 2.7 or greater.')

668

return

669

MK Ryu

c9c0c3f

2014-10-27 14:36:01 -0700

[diff] [blame]

670

# Constructs the key used for logging statsd timing data.

671

data_key = utils.get_data_key('run_suite', suite, build, board)

beeps

2013-03-22 13:15:49 -0700

[diff] [blame]

672

673

# Since we don't want to try subtracting corrupted datetime values

Dan Shi

2014-08-10 23:38:40 -0700

[diff] [blame]

674

# we catch TypeErrors in time_utils.time_string_to_datetime and insert

675

# None instead. This means that even if, say,

676

# keyvals.get(constants.ARTIFACT_FINISHED_TIME) returns a corrupt

677

# value the member artifact_end_time is set to None.

beeps

2013-03-22 13:15:49 -0700

[diff] [blame]

678

if self.download_start_time:

679

if self.payload_end_time:

Gabe Black

2015-02-04 23:55:15 -0800

[diff] [blame]

680

autotest_stats.Timer(data_key).send('payload_download_time',

681

(self.payload_end_time -

682

self.download_start_time).total_seconds())

beeps

2013-03-22 13:15:49 -0700

[diff] [blame]

683

684

if self.artifact_end_time:

Gabe Black

2015-02-04 23:55:15 -0800

[diff] [blame]

685

autotest_stats.Timer(data_key).send('artifact_download_time',

686

(self.artifact_end_time -

687

self.download_start_time).total_seconds())

beeps

2013-03-22 13:15:49 -0700

[diff] [blame]

688

689

if self.tests_end_time:

690

if self.suite_start_time:

Gabe Black

2015-02-04 23:55:15 -0800

[diff] [blame]

691

autotest_stats.Timer(data_key).send('suite_run_time',

692

(self.tests_end_time -

693

self.suite_start_time).total_seconds())

beeps

2013-03-22 13:15:49 -0700

[diff] [blame]

694

695

if self.tests_start_time:

Gabe Black

2015-02-04 23:55:15 -0800

[diff] [blame]

696

autotest_stats.Timer(data_key).send('tests_run_time',

697

(self.tests_end_time -

698

self.tests_start_time).total_seconds())

beeps

2013-03-22 13:15:49 -0700

[diff] [blame]

699

beeps

2013-03-22 13:15:49 -0700

[diff] [blame]

700

Alex Miller

2013-10-30 15:18:57 -0700

[diff] [blame]

701

702

def instance_for_pool(pool_name):

703

"""

704

Return the hostname of the server that should be used to service a suite

705

for the specified pool.

706

707

@param pool_name: The pool (without 'pool:' to schedule the suite against.

708

@return: The correct host that should be used to service this suite run.

709

"""

710

return CONFIG.get_config_value(

711

'POOL_INSTANCE_SHARDING', pool_name,

712

default=_DEFAULT_AUTOTEST_INSTANCE)

713

714

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

715

class TestView(object):

716

"""Represents a test view and provides a set of helper functions."""

717

718

Shuqian Zhao

2016-02-24 11:27:26 -0800

[diff] [blame]

719

SUITE_JOB = 'Suite job'

Fang Deng

2014-09-12 14:16:11 -0700

[diff] [blame]

720

INFRA_TESTS = ['provision']

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

721

722

Simran Basi

2015-10-14 19:05:00 -0700

[diff] [blame]

723

def __init__(self, view, afe_job, suite_name, build, user,

724

solo_test_run=False):

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

725

"""Init a TestView object representing a tko test view.

726

727

@param view: A dictionary representing a tko test view.

Fang Deng

2014-06-12 18:21:55 -0700

[diff] [blame]

728

@param afe_job: An instance of frontend.afe.models.Job

729

representing the job that kicked off the test.

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

730

@param suite_name: The name of the suite

731

that the test belongs to.

732

@param build: The build for which the test is run.

Simran Basi

2015-10-12 15:36:45 -0700

[diff] [blame]

733

@param user: The user for which the test is run.

Simran Basi

2015-10-14 19:05:00 -0700

[diff] [blame]

734

@param solo_test_run: This is a solo test run not part of a suite.

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

735

"""

736

self.view = view

Fang Deng

2014-06-12 18:21:55 -0700

[diff] [blame]

737

self.afe_job = afe_job

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

738

self.suite_name = suite_name

739

self.build = build

Simran Basi

2015-10-14 19:05:00 -0700

[diff] [blame]

740

self.is_suite_view = afe_job.parent_job is None and not solo_test_run

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

741

# This is the test name that will be shown in the output.

742

self.testname = None

Simran Basi

2015-10-12 15:36:45 -0700

[diff] [blame]

743

self.user = user

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

744

Fang Deng

2014-06-12 18:21:55 -0700

[diff] [blame]

745

# The case that a job was aborted before it got a chance to run

746

# usually indicates suite has timed out (unless aborted by user).

747

# In this case, the abort reason will be None.

748

# Update the reason with proper information.

749

if (self.is_relevant_suite_view() and

Shuqian Zhao

2016-02-24 11:27:26 -0800

[diff] [blame]

750

not self.get_testname() == self.SUITE_JOB and

Fang Deng

2014-06-12 18:21:55 -0700

[diff] [blame]

751

self.view['status'] == 'ABORT' and

752

not self.view['reason']):

753

self.view['reason'] = 'Timed out, did not run.'

754

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

755

756

def __getitem__(self, key):

757

"""Overload __getitem__ so that we can still use []

758

759

@param key: A key of the tko test view.

760

761

@returns: The value of an attribute in the view.

762

763

"""

764

return self.view[key]

765

766

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

767

def __iter__(self):

768

"""Overload __iter__ so that it supports 'in' operator."""

769

return iter(self.view)

770

771

772

def get_testname(self):

773

"""Get test name that should be shown in the output.

774

775

Formalize the test_name we got from the test view.

776

777

Remove 'build/suite' prefix if any. And append 'experimental' prefix

778

for experimental tests if their names do not start with 'experimental'.

779

780

If one runs a test in control file via the following code,

781

job.runtest('my_Test', tag='tag')

782

for most of the cases, view['test_name'] would look like 'my_Test.tag'.

783

If this is the case, this method will just return the original

784

test name, i.e. 'my_Test.tag'.

785

786

There are four special cases.

787

1) A test view is for the suite job's SERVER_JOB.

Shuqian Zhao

2016-02-24 11:27:26 -0800

[diff] [blame]

788

In this case, this method will return 'Suite job'.

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

789

Simran Basi

2015-10-14 19:05:00 -0700

[diff] [blame]

790

2) A test view is of a child job or a solo test run not part of a

791

suite, and for a SERVER_JOB or CLIENT_JOB.

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

792

In this case, we will take the job name, remove the build/suite

793

prefix from the job name, and append the rest to 'SERVER_JOB'

794

or 'CLIENT_JOB' as a prefix. So the names returned by this

795

method will look like:

796

'experimental_Telemetry Smoothness Measurement_SERVER_JOB'

797

'experimental_dummy_Pass_SERVER_JOB'

798

'dummy_Fail_SERVER_JOB'

799

Fang Deng

2014-06-12 18:21:55 -0700

[diff] [blame]

800

3) A test view is of a suite job and its status is ABORT.

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

801

In this case, the view['test_name'] is the child job's name.

802

If it is an experimental test, 'experimental' will be part

803

of the name. For instance,

804

'lumpy-release/R35-5712.0.0/perf_v2/

805

experimental_Telemetry Smoothness Measurement'

806

'lumpy-release/R35-5712.0.0/dummy/experimental_dummy_Pass'

807

'lumpy-release/R35-5712.0.0/dummy/dummy_Fail'

808

The above names will be converted to the following:

809

'experimental_Telemetry Smoothness Measurement'

810

'experimental_dummy_Pass'

811

'dummy_Fail'

812

Fang Deng

2014-06-12 18:21:55 -0700

[diff] [blame]

813

4) A test view's status is of a suite job and its status is TEST_NA.

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

814

In this case, the view['test_name'] is the NAME field of the control

815

file. If it is an experimental test, 'experimental' will part of

816

the name. For instance,

817

'experimental_Telemetry Smoothness Measurement'

818

'experimental_dummy_Pass'

819

'dummy_Fail'

820

This method will not modify these names.

821

822

@returns: Test name after normalization.

823

824

"""

825

if self.testname is not None:

826

return self.testname

827

828

if (self.is_suite_view and

829

self.view['test_name'].startswith('SERVER_JOB')):

Shuqian Zhao

2016-02-24 11:27:26 -0800

[diff] [blame]

830

# Rename suite job's SERVER_JOB to 'Suite job'.

831

self.testname = self.SUITE_JOB

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

832

return self.testname

833

834

if (self.view['test_name'].startswith('SERVER_JOB') or

835

self.view['test_name'].startswith('CLIENT_JOB')):

836

# Append job name as a prefix for SERVER_JOB and CLIENT_JOB

837

testname= '%s_%s' % (self.view['job_name'], self.view['test_name'])

838

else:

839

testname = self.view['test_name']

840

experimental = self.is_experimental()

841

# Remove the build and suite name from testname if any.

842

testname = tools.get_test_name(

843

self.build, self.suite_name, testname)

844

# If an experimental test was aborted, testname

845

# would include the 'experimental' prefix already.

846

prefix = constants.EXPERIMENTAL_PREFIX if (

847

experimental and not

848

testname.startswith(constants.EXPERIMENTAL_PREFIX)) else ''

849

self.testname = prefix + testname

return self.testname

def is_relevant_suite_view(self):

854

"""Checks whether this is a suite view we should care about.

855

856

@returns: True if it is relevant. False otherwise.

857

"""

Shuqian Zhao

2016-02-24 11:27:26 -0800

[diff] [blame]

858

return (self.get_testname() == self.SUITE_JOB or

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

859

(self.is_suite_view and

860

not self.view['test_name'].startswith('CLIENT_JOB') and

861

not self.view['subdir']))

def is_test(self):

"""Return whether the view is for an actual test.

866

867

@returns True if the view is for an actual test.

868

False if the view is for SERVER_JOB or CLIENT_JOB.

869

870

"""

871

return not (self.view['test_name'].startswith('SERVER_JOB') or

872

self.view['test_name'].startswith('CLIENT_JOB'))

def is_retry(self):

"""Check whether the view is for a retry.

877

878

@returns: True, if the view is for a retry; False otherwise.

879

880

"""

881

return self.view['job_keyvals'].get('retry_original_job_id') is not None

882

883

884

def is_experimental(self):

885

"""Check whether a test view is for an experimental test.

886

887

@returns: True if it is for an experimental test, False otherwise.

888

889

"""

890

return (self.view['job_keyvals'].get('experimental') == 'True' or

891

tools.get_test_name(self.build, self.suite_name,

892

self.view['test_name']).startswith('experimental'))

893

894

Fang Deng

2014-06-12 18:21:55 -0700

[diff] [blame]

895

def hit_timeout(self):

896

"""Check whether the corresponding job has hit its own timeout.

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

897

Fang Deng

2014-06-12 18:21:55 -0700

[diff] [blame]

898

Note this method should not be called for those test views

899

that belongs to a suite job and are determined as irrelevant

900

by is_relevant_suite_view. This is because they are associated

901

to the suite job, whose job start/finished time make no sense

902

to an irrelevant test view.

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

903

Fang Deng

2014-06-12 18:21:55 -0700

[diff] [blame]

904

@returns: True if the corresponding afe job has hit timeout.

905

False otherwise.

906

"""

907

if (self.is_relevant_suite_view() and

Shuqian Zhao

2016-02-24 11:27:26 -0800

[diff] [blame]

908

self.get_testname() != self.SUITE_JOB):

909

# Any relevant suite test view except SUITE_JOB

Fang Deng

2014-06-12 18:21:55 -0700

[diff] [blame]

910

# did not hit its own timeout because it was not ever run.

911

return False

912

start = (datetime.strptime(

Dan Shi

2014-08-10 23:38:40 -0700

[diff] [blame]

913

self.view['job_started_time'], time_utils.TIME_FMT)

Fang Deng

2014-06-12 18:21:55 -0700

[diff] [blame]

914

if self.view['job_started_time'] else None)

915

end = (datetime.strptime(

Dan Shi

2014-08-10 23:38:40 -0700

[diff] [blame]

916

self.view['job_finished_time'], time_utils.TIME_FMT)

Fang Deng

2014-06-12 18:21:55 -0700

[diff] [blame]

917

if self.view['job_finished_time'] else None)

918

if not start or not end:

919

return False

920

else:

921

return ((end - start).total_seconds()/60.0

922

> self.afe_job.max_runtime_mins)

923

924

925

def is_aborted(self):

926

"""Check if the view was aborted.

927

Shuqian Zhao

2016-02-24 11:27:26 -0800

[diff] [blame]

928

For suite job and child job test views, we check job keyval

Fang Deng

2014-06-12 18:21:55 -0700

[diff] [blame]

929

'aborted_by' and test status.

930

931

For relevant suite job test views, we only check test status

932

because the suite job keyval won't make sense to individual

933

test views.

934

935

@returns: True if the test was as aborted, False otherwise.

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

936

937

"""

Fang Deng

2014-06-12 18:21:55 -0700

[diff] [blame]

938

939

if (self.is_relevant_suite_view() and

Shuqian Zhao

2016-02-24 11:27:26 -0800

[diff] [blame]

940

self.get_testname() != self.SUITE_JOB):

Fang Deng

2014-06-12 18:21:55 -0700

[diff] [blame]

941

return self.view['status'] == 'ABORT'

942

else:

943

return (bool(self.view['job_keyvals'].get('aborted_by')) and

944

self.view['status'] in ['ABORT', 'RUNNING'])

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

945

946

947

def is_in_fail_status(self):

Fang Deng

2014-09-12 14:16:11 -0700

[diff] [blame]

948

"""Check if the given test's status corresponds to a failure.

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

949

950

@returns: True if the test's status is FAIL or ERROR. False otherwise.

951

952

"""

953

# All the statuses tests can have when they fail.

954

return self.view['status'] in ['FAIL', 'ERROR', 'ABORT']

955

956

Fang Deng

2014-09-12 14:16:11 -0700

[diff] [blame]

957

def is_infra_test(self):

958

"""Check whether this is a test that only lab infra is concerned.

959

960

@returns: True if only lab infra is concerned, False otherwise.

961

962

"""

963

return self.get_testname() in self.INFRA_TESTS

964

965

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

966

def get_buildbot_link_reason(self):

967

"""Generate the buildbot link reason for the test.

968

969

@returns: A string representing the reason.

970

971

"""

972

return ('%s: %s' % (self.view['status'], self.view['reason'])

973

if self.view['reason'] else self.view['status'])

974

975

976

def get_job_id_owner_str(self):

977

"""Generate the job_id_owner string for a test.

978

979

@returns: A string which looks like 135036-username

980

981

"""

Simran Basi

2015-10-12 15:36:45 -0700

[diff] [blame]

982

return '%s-%s' % (self.view['afe_job_id'], self.user)

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

983

984

985

def get_bug_info(self, suite_job_keyvals):

986

"""Get the bug info from suite_job_keyvals.

987

988

If a bug has been filed for the test, its bug info (bug id and counts)

989

will be stored in the suite job's keyvals. This method attempts to

990

retrieve bug info of the test from |suite_job_keyvals|. It will return

991

None if no bug info is found. No need to check bug info if the view is

Shuqian Zhao

2016-02-24 11:27:26 -0800

[diff] [blame]

992

SUITE_JOB.

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

993

994

@param suite_job_keyvals: The job keyval dictionary of the suite job.

995

All the bug info about child jobs are stored in

996

suite job's keyvals.

997

998

@returns: None if there is no bug info, or a pair with the

999

id of the bug, and the count of the number of

1000

times the bug has been seen.

1001

1002

"""

Shuqian Zhao

2016-02-24 11:27:26 -0800

[diff] [blame]

1003

if self.get_testname() == self.SUITE_JOB:

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1004

return None

1005

if (self.view['test_name'].startswith('SERVER_JOB') or

1006

self.view['test_name'].startswith('CLIENT_JOB')):

1007

# Append job name as a prefix for SERVER_JOB and CLIENT_JOB

1008

testname= '%s_%s' % (self.view['job_name'], self.view['test_name'])

1009

else:

1010

testname = self.view['test_name']

1011

1012

return tools.get_test_failure_bug_info(

1013

suite_job_keyvals, self.view['afe_job_id'],

testname)

def should_display_buildbot_link(self):

1018

"""Check whether a buildbot link should show for this view.

1019

Shuqian Zhao

2016-02-24 11:27:26 -0800

[diff] [blame]

1020

For suite job view, show buildbot link if it fails.

Fang Deng

2014-06-12 18:21:55 -0700

[diff] [blame]

1021

For normal test view,

1022

show buildbot link if it is a retry

1023

show buildbot link if it hits its own timeout.

1024

show buildbot link if it fails. This doesn't

1025

include the case where it was aborted but has

1026

not hit its own timeout (most likely it was aborted because

1027

suite has timed out).

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1028

1029

@returns: True if we should show the buildbot link.

1030

False otherwise.

1031

"""

1032

is_bad_status = (self.view['status'] != 'GOOD' and

1033

self.view['status'] != 'TEST_NA')

Shuqian Zhao

2016-02-24 11:27:26 -0800

[diff] [blame]

1034

if self.get_testname() == self.SUITE_JOB:

Fang Deng

2014-06-12 18:21:55 -0700

[diff] [blame]

return is_bad_status

else:

if self.is_retry():

return True

if is_bad_status:

return not self.is_aborted() or self.hit_timeout()

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1041

1042

Shuqian Zhao

2015-08-05 22:56:30 -0700

[diff] [blame]

1043

def get_control_file_attributes(self):

1044

"""Get the attributes from the control file of the test.

1045

1046

@returns: A list of test attribute or None.

1047

"""

1048

control_file = self.afe_job.control_file

1049

attributes = None

1050

if control_file:

1051

cd = control_data.parse_control_string(control_file)

1052

attributes = list(cd.attributes)

return attributes

Allen Li

2016-09-14 19:05:47 -0700

[diff] [blame]

1056

def log_buildbot_links(log_func, links):

1057

"""Output buildbot links to log.

1058

1059

@param log_func: Logging function to use.

1060

@param links: Iterable of LogLink instances.

1061

"""

1062

for link in links:

1063

for generated_link in link.GenerateBuildbotLinks():

1064

log_func(generated_link)

1065

wmatrix_link = link.GenerateWmatrixRetryLink()

1066

if wmatrix_link:

1067

log_func(wmatrix_link)

1068

1069

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1070

class ResultCollector(object):

Simran Basi

2015-10-14 19:05:00 -0700

[diff] [blame]

1071

"""Collect test results of a suite or a single test run.

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1072

1073

Once a suite job has finished, use this class to collect test results.

1074

`run` is the core method that is to be called first. Then the caller

1075

could retrieve information like return code, return message, is_aborted,

1076

and timings by accessing the collector's public attributes. And output

1077

the test results and links by calling the 'output_*' methods.

1078

1079

Here is a overview of what `run` method does.

1080

1081

1) Collect the suite job's results from tko_test_view_2.

1082

For the suite job, we only pull test views without a 'subdir'.

1083

A NULL subdir indicates that the test was _not_ executed. This could be

1084

that no child job was scheduled for this test or the child job got

1085

aborted before starts running.

1086

(Note 'SERVER_JOB'/'CLIENT_JOB' are handled specially)

1087

1088

2) Collect the child jobs' results from tko_test_view_2.

1089

For child jobs, we pull all the test views associated with them.

Allen Li

2016-09-14 19:05:47 -0700

[diff] [blame]

1090

(Note 'SERVER_JOB'/'CLIENT_JOB' are handled specially)

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1091

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1092

3) Generate web and buildbot links.

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1093

4) Compute timings of the suite run.

1094

5) Compute the return code based on test results.

1095

1096

@var _instance_server: The hostname of the server that is used

1097

to service the suite.

1098

@var _afe: The afe rpc client.

1099

@var _tko: The tko rpc client.

1100

@var _build: The build for which the suite is run,

1101

e.g. 'lumpy-release/R35-5712.0.0'

MK Ryu

2014-10-21 11:58:09 -0700

[diff] [blame]

1102

@var _board: The target board for which the suite is run,

1103

e.g., 'lumpy', 'link'.

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1104

@var _suite_name: The suite name, e.g. 'bvt', 'dummy'.

1105

@var _suite_job_id: The job id of the suite for which we are going to

1106

collect results.

Shuqian Zhao

2015-09-29 14:19:28 -0700

[diff] [blame]

1107

@var _original_suite_name: The suite name we record timing would be

1108

different from _suite_name when running

1109

suite_attr_wrapper.

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1110

@var _suite_views: A list of TestView objects, representing relevant

1111

test views of the suite job.

1112

@var _child_views: A list of TestView objects, representing test views

1113

of the child jobs.

1114

@var _test_views: A list of TestView objects, representing all test views

1115

from _suite_views and _child_views.

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1116

@var _web_links: A list of web links pointing to the results of jobs.

1117

@var _buildbot_links: A list of buildbot links for non-passing tests.

Simran Basi

2015-10-14 19:05:00 -0700

[diff] [blame]

1118

@var _solo_test_run: True if this is a single test run.

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1119

@var return_code: The exit code that should be returned by run_suite.

1120

@var return_message: Any message that should be displayed to explain

1121

the return code.

1122

@var is_aborted: Whether the suite was aborted or not.

1123

True, False or None (aborting status is unknown yet)

1124

@var timings: A Timing object that records the suite's timings.

"""

MK Ryu

2014-10-21 11:58:09 -0700

[diff] [blame]

1129

def __init__(self, instance_server, afe, tko, build, board,

Simran Basi

2015-10-12 15:36:45 -0700

[diff] [blame]

1130

suite_name, suite_job_id, original_suite_name=None,

Simran Basi

2015-10-14 19:05:00 -0700

[diff] [blame]

1131

user=None, solo_test_run=False):

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1132

self._instance_server = instance_server

1133

self._afe = afe

1134

self._tko = tko

1135

self._build = build

MK Ryu

2014-10-21 11:58:09 -0700

[diff] [blame]

1136

self._board = board

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1137

self._suite_name = suite_name

1138

self._suite_job_id = suite_job_id

Shuqian Zhao

2015-09-29 14:19:28 -0700

[diff] [blame]

1139

self._original_suite_name = original_suite_name or suite_name

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1140

self._suite_views = []

1141

self._child_views = []

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1142

self._test_views = []

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1143

self._retry_counts = {}

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1144

self._web_links = []

1145

self._buildbot_links = []

MK Ryu

2014-10-21 11:58:09 -0700

[diff] [blame]

1146

self._num_child_jobs = 0

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1147

self.return_code = None

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1148

self.return_message = ''

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1149

self.is_aborted = None

1150

self.timings = None

Simran Basi

2015-10-12 15:36:45 -0700

[diff] [blame]

1151

self._user = user or getpass.getuser()

Simran Basi

2015-10-14 19:05:00 -0700

[diff] [blame]

1152

self._solo_test_run = solo_test_run

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1153

1154

Allen Li

2016-09-14 19:05:47 -0700

[diff] [blame]

1155

@property

1156

def buildbot_links(self):

1157

"""Provide public access to buildbot links."""

1158

return self._buildbot_links

1159

1160

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1161

def _fetch_relevant_test_views_of_suite(self):

1162

"""Fetch relevant test views of the suite job.

1163

1164

For the suite job, there will be a test view for SERVER_JOB, and views

Allen Li

2016-09-14 19:05:47 -0700

[diff] [blame]

1165

for results of its child jobs. For example, assume we've created

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1166

a suite job (afe_job_id: 40) that runs dummy_Pass, dummy_Fail,

1167

dummy_Pass.bluetooth. Assume dummy_Pass was aborted before running while

1168

dummy_Path.bluetooth got TEST_NA as no duts have bluetooth.

1169

So the suite job's test views would look like

1170

_____________________________________________________________________

1171

1172

10 | 1000 |SERVER_JOB |---- |40 |GOOD

1173

11 | 1000 |dummy_Pass |NULL |40 |ABORT

1174

12 | 1000 |dummy_Fail.Fail |41-onwer/...|40 |FAIL

1175

13 | 1000 |dummy_Fail.Error |42-owner/...|40 |ERROR

1176

14 | 1000 |dummy_Pass.bluetooth|NULL |40 |TEST_NA

1177

1178

For a suite job, we only care about

1179

a) The test view for the suite job's SERVER_JOB

1180

b) The test views for real tests without a subdir. A NULL subdir

1181

indicates that a test didn't get executed.

1182

So, for the above example, we only keep test views whose test_idxs

1183

are 10, 11, 14.

1184

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1185

@returns: A list of TestView objects, representing relevant

1186

test views of the suite job.

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1187

1188

"""

Fang Deng

2014-06-12 18:21:55 -0700

[diff] [blame]

1189

suite_job = self._afe.get_jobs(id=self._suite_job_id)[0]

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1190

views = self._tko.run(call='get_detailed_test_views',

1191

afe_job_id=self._suite_job_id)

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1192

relevant_views = []

1193

for v in views:

Simran Basi

2015-10-14 19:05:00 -0700

[diff] [blame]

1194

v = TestView(v, suite_job, self._suite_name, self._build, self._user,

1195

solo_test_run=self._solo_test_run)

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1196

if v.is_relevant_suite_view():

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1197

relevant_views.append(v)

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1198

return relevant_views

1199

1200

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1201

def _compute_retry_count(self, view):

1202

"""Return how many times the test has been retried.

1203

1204

@param view: A TestView instance.

1205

@returns: An int value indicating the retry count.

1206

1207

"""

1208

old_job = view['job_keyvals'].get('retry_original_job_id')

count = 0

while old_job:

count += 1

views = self._tko.run(

1213

call='get_detailed_test_views', afe_job_id=old_job)

1214

old_job = (views[0]['job_keyvals'].get('retry_original_job_id')

if views else None)

return count

Simran Basi

2015-10-14 19:05:00 -0700

[diff] [blame]

1219

def _fetch_test_views_of_child_jobs(self, jobs=None):

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1220

"""Fetch test views of child jobs.

1221

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1222

@returns: A tuple (child_views, retry_counts)

1223

child_views is list of TestView objects, representing

1224

all valid views. retry_counts is a dictionary that maps

1225

test_idx to retry counts. It only stores retry

1226

counts that are greater than 0.

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1227

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1228

"""

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1229

child_views = []

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1230

retry_counts = {}

Simran Basi

2015-10-14 19:05:00 -0700

[diff] [blame]

1231

child_jobs = jobs or self._afe.get_jobs(parent_job_id=self._suite_job_id)

MK Ryu

2014-10-21 11:58:09 -0700

[diff] [blame]

1232

if child_jobs:

1233

self._num_child_jobs = len(child_jobs)

Fang Deng

2014-06-12 18:21:55 -0700

[diff] [blame]

1234

for job in child_jobs:

Simran Basi

2015-10-12 15:36:45 -0700

[diff] [blame]

1235

views = [TestView(v, job, self._suite_name, self._build, self._user)

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1236

for v in self._tko.run(

Fang Deng

2014-06-12 18:21:55 -0700

[diff] [blame]

1237

call='get_detailed_test_views', afe_job_id=job.id,

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1238

invalid=0)]

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1239

contains_test_failure = any(

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1240

v.is_test() and v['status'] != 'GOOD' for v in views)

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1241

for v in views:

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1242

if (v.is_test() or

1243

v['status'] != 'GOOD' and not contains_test_failure):

1244

# For normal test view, just keep it.

1245

# For SERVER_JOB or CLIENT_JOB, only keep it

1246

# if it fails and no other test failure.

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1247

child_views.append(v)

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1248

retry_count = self._compute_retry_count(v)

1249

if retry_count > 0:

1250

retry_counts[v['test_idx']] = retry_count

1251

return child_views, retry_counts

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1252

1253

1254

def _generate_web_and_buildbot_links(self):

1255

"""Generate web links and buildbot links."""

1256

# TODO(fdeng): If a job was aborted before it reaches Running

1257

# state, we read the test view from the suite job

1258

# and thus this method generates a link pointing to the

1259

# suite job's page for the aborted job. Need a fix.

1260

self._web_links = []

1261

self._buildbot_links = []

1262

# Bug info are stored in the suite job's keyvals.

Simran Basi

2015-10-14 19:05:00 -0700

[diff] [blame]

1263

if self._solo_test_run:

1264

suite_job_keyvals = {}

1265

else:

1266

suite_job_keyvals = self._suite_views[0]['job_keyvals']

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1267

for v in self._test_views:

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1268

retry_count = self._retry_counts.get(v['test_idx'], 0)

1269

bug_info = v.get_bug_info(suite_job_keyvals)

1270

job_id_owner = v.get_job_id_owner_str()

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1271

link = LogLink(

Allen Li

2016-09-02 11:52:34 -0700

[diff] [blame]

1272

anchor=v.get_testname(),

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1273

server=self._instance_server,

1274

job_string=job_id_owner,

Simran Basi

2015-02-03 15:50:18 -0800

[diff] [blame]

1275

bug_info=bug_info, retry_count=retry_count,

1276

testname=v.get_testname())

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1277

self._web_links.append(link)

1278

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1279

if v.should_display_buildbot_link():

1280

link.reason = v.get_buildbot_link_reason()

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1281

self._buildbot_links.append(link)

1282

1283

1284

def _record_timings(self):

1285

"""Record suite timings."""

1286

self.timings = Timings(self._suite_job_id)

1287

for v in self._test_views:

1288

self.timings.RecordTiming(v)

1289

1290

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1291

def _get_return_msg(self, code, tests_passed_after_retry):

1292

"""Return the proper message for a given return code.

1293

1294

@param code: An enum value of RETURN_CODES

1295

@param test_passed_after_retry: True/False, indicating

1296

whether there are test(s) that have passed after retry.

1297

1298

@returns: A string, representing the message.

1299

1300

"""

1301

if code == RETURN_CODES.INFRA_FAILURE:

Fang Deng

2014-09-12 14:16:11 -0700

[diff] [blame]

1302

return 'Suite job failed or provisioning failed.'

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1303

elif code == RETURN_CODES.SUITE_TIMEOUT:

1304

return ('Some test(s) was aborted before running,'

1305

' suite must have timed out.')

1306

elif code == RETURN_CODES.WARNING:

1307

if tests_passed_after_retry:

1308

return 'Some test(s) passed after retry.'

1309

else:

1310

return 'Some test(s) raised a warning.'

1311

elif code == RETURN_CODES.ERROR:

1312

return 'Some test(s) failed.'

else:

return ''

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1317

def _compute_return_code(self):

1318

"""Compute the exit code based on test results."""

1319

code = RETURN_CODES.OK

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1320

tests_passed_after_retry = False

1321

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1322

for v in self._test_views:

Fang Deng

2014-06-12 18:21:55 -0700

[diff] [blame]

1323

# The order of checking each case is important.

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1324

if v.is_experimental():

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1325

continue

Shuqian Zhao

2016-02-24 11:27:26 -0800

[diff] [blame]

1326

if v.get_testname() == TestView.SUITE_JOB:

Fang Deng

2014-06-12 18:21:55 -0700

[diff] [blame]

1327

if v.is_aborted() and v.hit_timeout():

1328

current_code = RETURN_CODES.SUITE_TIMEOUT

1329

elif v.is_in_fail_status():

1330

current_code = RETURN_CODES.INFRA_FAILURE

1331

elif v['status'] == 'WARN':

1332

current_code = RETURN_CODES.WARNING

1333

else:

1334

current_code = RETURN_CODES.OK

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1335

else:

Fang Deng

2014-06-12 18:21:55 -0700

[diff] [blame]

1336

if v.is_aborted() and v.is_relevant_suite_view():

1337

# The test was aborted before started

1338

# This gurantees that the suite has timed out.

1339

current_code = RETURN_CODES.SUITE_TIMEOUT

1340

elif v.is_aborted() and not v.hit_timeout():

1341

# The test was aborted, but

1342

# not due to a timeout. This is most likely

1343

# because the suite has timed out, but may

1344

# also because it was aborted by the user.

1345

# Since suite timing out is determined by checking

Shuqian Zhao

2016-02-24 11:27:26 -0800

[diff] [blame]

1346

# the suite job view, we simply ignore this view here.

Fang Deng

2014-06-12 18:21:55 -0700

[diff] [blame]

1347

current_code = RETURN_CODES.OK

1348

elif v.is_in_fail_status():

1349

# The test job failed.

Fang Deng

2014-09-12 14:16:11 -0700

[diff] [blame]

1350

if v.is_infra_test():

1351

current_code = RETURN_CODES.INFRA_FAILURE

1352

else:

1353

current_code = RETURN_CODES.ERROR

Fang Deng

2014-06-12 18:21:55 -0700

[diff] [blame]

1354

elif v['status'] == 'WARN':

1355

# The test/suite job raised a wanrning.

1356

current_code = RETURN_CODES.WARNING

1357

elif v.is_retry():

1358

# The test is a passing retry.

1359

current_code = RETURN_CODES.WARNING

1360

tests_passed_after_retry = True

1361

else:

1362

current_code = RETURN_CODES.OK

1363

code = get_worse_code(code, current_code)

1364

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1365

self.return_code = code

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1366

self.return_message = self._get_return_msg(

1367

code, tests_passed_after_retry)

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1368

1369

Allen Li

2016-09-02 11:52:34 -0700

[diff] [blame]

1370

def _make_test_results(self):

1371

"""Make TestResults for collected tests.

1372

1373

@returns: List of TestResult instances.

1374

"""

1375

test_results = []

1376

for test_view in self._test_views:

1377

test_result = TestResult(

1378

test_view=test_view,

1379

retry_count=self._retry_counts.get(test_view['test_idx'], 0))

1380

test_results.append(test_result)

return test_results

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1384

def output_results(self):

1385

"""Output test results, timings and web links."""

1386

# Output test results

Allen Li

2016-09-02 11:52:34 -0700

[diff] [blame]

1387

test_results = self._make_test_results()

1388

max_name_length = max(len(test_result.name)

1389

for test_result in test_results)

1390

for test_result in test_results:

1391

test_result.log_using(logging.info, max_name_length + 3)

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1392

# Output suite timings

1393

logging.info(self.timings)

1394

# Output links to test logs

1395

logging.info('\nLinks to test logs:')

1396

for link in self._web_links:

Allen Li

2016-09-14 15:19:20 -0700

[diff] [blame]

1397

logging.info(link.text_link)

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1398

logging.info('\n')

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1399

1400

Shuqian Zhao

2015-08-05 22:56:30 -0700

[diff] [blame]

1401

def get_results_dict(self):

1402

"""Write test results, timings and web links into a dict.

1403

1404

@returns: A dict of results in the format like:

1405

{

1406

'tests': {

1407

'test_1': {'status': 'PASSED', 'attributes': [1,2], ...}

1408

'test_2': {'status': 'FAILED', 'attributes': [1],...}

1409

}

1410

'suite_timings': {

1411

'download_start': '1998-07-17 00:00:00',

1412

'payload_download_end': '1998-07-17 00:00:05',

...

}

}

"""

output_dict = {}

tests_dict = output_dict.setdefault('tests', {})

1419

for v in self._test_views:

1420

test_name = v.get_testname()

1421

test_info = tests_dict.setdefault(test_name, {})

1422

test_info.update({

1423

'status': v['status'],

1424

'attributes': v.get_control_file_attributes() or list(),

1425

'reason': v['reason'],

1426

'retry_count': self._retry_counts.get(v['test_idx'], 0),

1427

})

1428

1429

# Write the links to test logs into the |tests_dict| of |output_dict|.

1430

# For test whose status is not 'GOOD', the link is also buildbot_link.

1431

for link in self._web_links:

1432

test_name = link.anchor.strip()

1433

test_info = tests_dict.get(test_name)

1434

if test_info:

1435

test_info['link_to_logs'] = link.url

1436

# Write the wmatrix link into the dict.

1437

if link in self._buildbot_links and link.testname:

Allen Li

2016-09-14 19:05:47 -0700

[diff] [blame]

1438

test_info['wmatrix_link'] \

1439

= reporting_utils.link_retry_url(link.testname)

Shuqian Zhao

2015-08-05 22:56:30 -0700

[diff] [blame]

1440

# Write the bug url into the dict.

1441

if link.bug_id:

Allen Li

2016-09-14 15:19:20 -0700

[diff] [blame]

1442

test_info['bug_url'] = link.bug_url

Shuqian Zhao

2015-08-05 22:56:30 -0700

[diff] [blame]

1443

1444

# Write the suite timings into |output_dict|

Allen Li

2c5d44b

2016-08-15 17:58:58 -0700

[diff] [blame]

1445

timings = self.timings

1446

if timings is not None:

1447

time_dict = output_dict.setdefault('suite_timings', {})

1448

time_dict.update({

1449

'download_start' : str(timings.download_start_time),

1450

'payload_download_end' : str(timings.payload_end_time),

1451

'suite_start' : str(timings.suite_start_time),

1452

'artifact_download_end' : str(timings.artifact_end_time),

1453

'tests_start' : str(timings.tests_start_time),

1454

'tests_end' : str(timings.tests_end_time),

1455

})

Shuqian Zhao

2015-08-05 22:56:30 -0700

[diff] [blame]

1456

1457

output_dict['suite_job_id'] = self._suite_job_id

return output_dict

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1462

def run(self):

1463

"""Collect test results.

1464

1465

This method goes through the following steps:

1466

Fetch relevent test views of the suite job.

1467

Fetch test views of child jobs

1468

Check whether the suite was aborted.

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1469

Generate links.

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1470

Calculate suite timings.

1471

Compute return code based on the test result.

1472

1473

"""

Simran Basi

2015-10-14 19:05:00 -0700

[diff] [blame]

1474

if self._solo_test_run:

1475

self._test_views, self.retry_count = (

1476

self._fetch_test_views_of_child_jobs(

1477

jobs=self._afe.get_jobs(id=self._suite_job_id)))

1478

else:

1479

self._suite_views = self._fetch_relevant_test_views_of_suite()

1480

self._child_views, self._retry_counts = (

1481

self._fetch_test_views_of_child_jobs())

1482

self._test_views = self._suite_views + self._child_views

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1483

# For hostless job in Starting status, there is no test view associated.

1484

# This can happen when a suite job in Starting status is aborted. When

1485

# the scheduler hits some limit, e.g., max_hostless_jobs_per_drone,

1486

# max_jobs_started_per_cycle, a suite job can stays in Starting status.

1487

if not self._test_views:

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1488

self.return_code = RETURN_CODES.INFRA_FAILURE

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1489

self.return_message = 'No test view was found.'

1490

return

1491

self.is_aborted = any([view['job_keyvals'].get('aborted_by')

1492

for view in self._suite_views])

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1493

self._generate_web_and_buildbot_links()

1494

self._record_timings()

1495

self._compute_return_code()

1496

1497

MK Ryu

2014-10-21 11:58:09 -0700

[diff] [blame]

1498

def gather_timing_stats(self):

1499

"""Collect timing related statistics."""

1500

# Send timings to statsd.

1501

self.timings.SendResultsToStatsd(

Shuqian Zhao

2015-09-29 14:19:28 -0700

[diff] [blame]

1502

self._original_suite_name, self._build, self._board)

MK Ryu

2014-10-21 11:58:09 -0700

[diff] [blame]

1503

1504

# Record suite runtime in metadata db.

Prathmesh Prabhu

a3713a0

2015-03-11 13:50:55 -0700

[diff] [blame]

1505

# Some failure modes can leave times unassigned, report sentinel value

1506

# in that case.

1507

runtime_in_secs = -1

1508

if (self.timings.tests_end_time is not None and

1509

self.timings.suite_start_time is not None):

Dan Shi

0723bf5

2015-06-24 10:52:38 -0700

[diff] [blame]

1510

runtime_in_secs = (self.timings.tests_end_time -

1511

self.timings.suite_start_time).total_seconds()

Prathmesh Prabhu

a3713a0

2015-03-11 13:50:55 -0700

[diff] [blame]

1512

MK Ryu

2014-10-21 11:58:09 -0700

[diff] [blame]

1513

job_overhead.record_suite_runtime(self._suite_job_id, self._suite_name,

1514

self._board, self._build, self._num_child_jobs, runtime_in_secs)

1515

1516

Allen Li

2016-12-08 13:51:31 -0800

[diff] [blame]

1517

def _make_builds_from_options(options):

1518

"""Create a dict of builds for creating a suite job.

Prashanth B

2014-05-08 18:01:27 -0700

[diff] [blame]

1519

Allen Li

2016-12-08 13:51:31 -0800

[diff] [blame]

1520

The returned dict maps version label prefixes to build names. Together,

1521

each key-value pair describes a complete label.

Prashanth B

2014-05-08 18:01:27 -0700

[diff] [blame]

1522

Allen Li

2016-12-08 13:51:31 -0800

[diff] [blame]

1523

@param options: SimpleNamespace from argument parsing.

1524

1525

@return: dict mapping version label prefixes to build names

Prashanth B

2014-05-08 18:01:27 -0700

[diff] [blame]

1526

"""

Dan Shi

2014-10-10 13:38:51 -0700

[diff] [blame]

1527

builds = {}

1528

if options.build:

Justin Giorgi

455ec8d

2016-08-12 16:17:05 -0700

[diff] [blame]

1529

if re.match(ANDROID_TESTBED_BUILD_REGEX, options.build, re.I):

Simran Basi

adf3131

2016-06-28 14:23:05 -0700

[diff] [blame]

1530

builds[provision.TESTBED_BUILD_VERSION_PREFIX] = options.build

Justin Giorgi

455ec8d

2016-08-12 16:17:05 -0700

[diff] [blame]

1531

elif re.match(ANDROID_BUILD_REGEX, options.build, re.I):

Simran Basi

5ace6f2

2016-01-06 17:30:44 -0800

[diff] [blame]

1532

builds[provision.ANDROID_BUILD_VERSION_PREFIX] = options.build

1533

else:

1534

builds[provision.CROS_VERSION_PREFIX] = options.build

Dan Shi

0723bf5

2015-06-24 10:52:38 -0700

[diff] [blame]

1535

if options.firmware_rw_build:

1536

builds[provision.FW_RW_VERSION_PREFIX] = options.firmware_rw_build

Dan Shi

2014-10-10 13:38:51 -0700

[diff] [blame]

1537

if options.firmware_ro_build:

1538

builds[provision.FW_RO_VERSION_PREFIX] = options.firmware_ro_build

Allen Li

2016-12-08 13:51:31 -0800

[diff] [blame]

return builds

@retry.retry(error.StageControlFileFailure, timeout_min=10)

1543

def create_suite(afe, options):

1544

"""Create a suite with retries.

1545

1546

@param afe: The afe object to insert the new suite job into.

1547

@param options: The options to use in creating the suite.

1548

1549

@return: The afe_job_id of the new suite job.

1550

"""

Prashanth B

2014-05-08 18:01:27 -0700

[diff] [blame]

1551

logging.info('%s Submitted create_suite_job rpc',

1552

diagnosis_utils.JobTimer.format_time(datetime.now()))

Allen Li

2016-12-08 12:50:22 -0800

[diff] [blame]

return afe.run(

'create_suite_job',

name=options.name,

board=options.board,

build=options.build,

Allen Li

2016-12-08 13:51:31 -0800

[diff] [blame]

1558

builds=_make_builds_from_options(options),

Allen Li

2016-12-08 12:50:22 -0800

[diff] [blame]

1559

test_source_build=options.test_source_build,

Allen Li

0fd0889

2016-12-08 13:47:38 -0800

[diff] [blame]

1560

check_hosts=not options.no_wait,

Allen Li

2016-12-08 12:50:22 -0800

[diff] [blame]

1561

pool=options.pool,

1562

num=options.num,

Allen Li

d3758d4

2016-12-08 13:46:17 -0800

[diff] [blame]

1563

file_bugs=options.file_bugs,

Allen Li

603728a

2016-12-08 13:58:11 -0800

[diff] [blame^]

1564

priority=options.priority,

Allen Li

2016-12-08 12:50:22 -0800

[diff] [blame]

1565

suite_args=options.suite_args,

Allen Li

0fd0889

2016-12-08 13:47:38 -0800

[diff] [blame]

1566

wait_for_results=not options.no_wait,

Allen Li

2016-12-08 12:50:22 -0800

[diff] [blame]

1567

timeout_mins=options.timeout_mins + options.delay_minutes,

1568

max_runtime_mins=options.max_runtime_mins + options.delay_minutes,

1569

job_retry=options.retry,

1570

max_retries=options.max_retries,

1571

suite_min_duts=options.suite_min_duts,

Allen Li

40599a3

2016-12-08 13:23:35 -0800

[diff] [blame]

1572

offload_failures_only=options.offload_failures_only,

Allen Li

2016-12-08 12:50:22 -0800

[diff] [blame]

1573

run_prod_code=options.run_prod_code,

1574

delay_minutes=options.delay_minutes,

1575

)

Prashanth B

2014-05-08 18:01:27 -0700

[diff] [blame]

1576

1577

Allen Li

2016-08-16 14:19:08 -0700

[diff] [blame]

1578

SuiteResult = namedtuple('SuiteResult', ['return_code', 'output_dict'])

1579

1580

Shuqian Zhao

2015-08-05 22:56:30 -0700

[diff] [blame]

1581

def main_without_exception_handling(options):

Aviv Keshet

1480c4a

2013-03-21 16:38:31 -0700

[diff] [blame]

1582

"""

Shuqian Zhao

2015-08-05 22:56:30 -0700

[diff] [blame]

1583

run_suite script without exception handling.

Shuqian Zhao

d235107

2015-08-06 01:48:23 +0000

[diff] [blame]

1584

Shuqian Zhao

2015-08-05 22:56:30 -0700

[diff] [blame]

1585

@param options: The parsed options.

1586

1587

@returns: A tuple contains the return_code of run_suite and the dictionary

1588

of the output.

1589

1590

"""

Shuqian Zhao

ab1bedc

2015-06-02 11:12:28 -0700

[diff] [blame]

1591

# If indicate to use the new style suite control file, convert the args

1592

if options.use_suite_attr:

1593

options = change_options_for_suite_attr(options)

1594

Chris Masone

3a85064

2012-07-11 11:11:18 -0700

[diff] [blame]

1595

log_name = 'run_suite-default.log'

Fang Deng

2015-02-20 14:49:47 -0800

[diff] [blame]

1596

if options.build:

Chris Masone

3a85064

2012-07-11 11:11:18 -0700

[diff] [blame]

1597

# convert build name from containing / to containing only _

1598

log_name = 'run_suite-%s.log' % options.build.replace('/', '_')

1599

log_dir = os.path.join(common.autotest_dir, 'logs')

1600

if os.path.exists(log_dir):

1601

log_name = os.path.join(log_dir, log_name)

Alex Miller

88762a8

2013-09-04 15:41:28 -0700

[diff] [blame]

1602

MK Ryu

8318435

2014-12-10 14:59:40 -0800

[diff] [blame]

1603

utils.setup_logging(logfile=log_name)

Alex Miller

88762a8

2013-09-04 15:41:28 -0700

[diff] [blame]

1604

John Carey

1425d29

2016-09-30 15:25:09 -0700

[diff] [blame]

1605

if not options.bypass_labstatus and not options.web:

Fang Deng

2014-09-25 10:18:48 -0700

[diff] [blame]

1606

utils.check_lab_status(options.build)

Prashanth Balasubramanian

673016d

2014-11-04 10:40:48 -0800

[diff] [blame]

1607

instance_server = (options.web if options.web else

1608

instance_for_pool(options.pool))

Alex Miller

2013-10-30 15:18:57 -0700

[diff] [blame]

1609

afe = frontend_wrappers.RetryingAFE(server=instance_server,

Simran Basi

25effe3

2013-11-26 13:02:11 -0800

[diff] [blame]

1610

timeout_min=options.afe_timeout_mins,

Chris Masone

8ac6671

2012-02-15 14:21:02 -0800

[diff] [blame]

1611

delay_sec=options.delay_sec)

Alex Miller

2013-10-30 15:18:57 -0700

[diff] [blame]

1612

logging.info('Autotest instance: %s', instance_server)

Chris Masone

359c0fd

2012-03-13 15:18:59 -0700

[diff] [blame]

1613

Dan Shi

20952c1

2014-05-14 17:07:38 -0700

[diff] [blame]

1614

rpc_helper = diagnosis_utils.RPCHelper(afe)

Fang Deng

2015-02-20 14:49:47 -0800

[diff] [blame]

1615

is_real_time = True

Chris Masone

986459e

2012-04-11 11:36:48 -0700

[diff] [blame]

1616

if options.mock_job_id:

1617

job_id = int(options.mock_job_id)

Fang Deng

2015-02-20 14:49:47 -0800

[diff] [blame]

1618

existing_job = afe.get_jobs(id=job_id, finished=True)

if existing_job:

is_real_time = False

else:

existing_job = afe.get_jobs(id=job_id)

1623

if existing_job:

1624

job_created_on = time_utils.date_string_to_epoch_time(

1625

existing_job[0].created_on)

1626

else:

1627

raise utils.TestLabException('Failed to retrieve job: %d' % job_id)

Chris Masone

986459e

2012-04-11 11:36:48 -0700

[diff] [blame]

1628

else:

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1629

try:

Fang Deng

2015-02-20 14:49:47 -0800

[diff] [blame]

1630

rpc_helper.check_dut_availability(options.board, options.pool,

Ningning Xia

f2c206c

2016-04-13 14:15:51 -0700

[diff] [blame]

1631

options.minimum_duts,

1632

options.skip_duts_check)

Prashanth B

2014-05-08 18:01:27 -0700

[diff] [blame]

1633

job_id = create_suite(afe, options)

Fang Deng

2015-02-20 14:49:47 -0800

[diff] [blame]

1634

job_created_on = time.time()

Allen Li

2016-08-18 12:09:32 -0700

[diff] [blame]

1635

except diagnosis_utils.NotEnoughDutsError as e:

1636

e.add_suite_name(options.name)

1637

e.add_build(options.test_source_build)

1638

pool_health_bug = reporting.PoolHealthBug(e)

1639

bug_id = reporting.Reporter().report(pool_health_bug).bug_id

1640

if bug_id is not None:

Allen Li

2016-09-14 15:19:20 -0700

[diff] [blame]

1641

logging.info(annotations.StepLink(

1642

text='Pool Health Bug',

1643

url=reporting_utils.link_crbug(bug_id)))

Allen Li

2016-08-18 12:09:32 -0700

[diff] [blame]

1644

e.add_bug_id(bug_id)

1645

raise e

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1646

except (error.CrosDynamicSuiteException,

1647

error.RPCException, proxy.JSONRPCException) as e:

Allen Li

c3aa769

2016-08-08 11:45:00 -0700

[diff] [blame]

1648

logging.exception('Error Message: %s', e)

1649

return (RETURN_CODES.INFRA_FAILURE, {'return_message': str(e)})

Prashanth B

2014-05-08 18:01:27 -0700

[diff] [blame]

1650

except AttributeError:

Shuqian Zhao

2015-08-05 22:56:30 -0700

[diff] [blame]

1651

return (RETURN_CODES.INVALID_OPTIONS, {})

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1652

Prashanth B

2014-03-14 12:36:29 -0700

[diff] [blame]

1653

job_timer = diagnosis_utils.JobTimer(

Fang Deng

2015-02-20 14:49:47 -0800

[diff] [blame]

1654

job_created_on, float(options.timeout_mins))

Aviv Keshet

9afee5e

2014-10-09 16:33:09 -0700

[diff] [blame]

1655

job_url = reporting_utils.link_job(job_id,

1656

instance_server=instance_server)

Prashanth B

2014-03-14 12:36:29 -0700

[diff] [blame]

1657

logging.info('%s Created suite job: %s',

1658

job_timer.format_time(job_timer.job_created_time),

Aviv Keshet

9afee5e

2014-10-09 16:33:09 -0700

[diff] [blame]

1659

job_url)

Allen Li

2016-09-14 15:19:20 -0700

[diff] [blame]

1660

logging.info(annotations.StepLink(

1661

text='Link to suite',

1662

url=job_url))

Aviv Keshet

db321de

2015-04-10 19:09:58 -0700

[diff] [blame]

1663

1664

if options.create_and_return:

Shuqian Zhao

2015-08-05 22:56:30 -0700

[diff] [blame]

1665

msg = '--create_and_return was specified, terminating now.'

1666

logging.info(msg)

1667

return (RETURN_CODES.OK, {'return_message':msg})

Aviv Keshet

db321de

2015-04-10 19:09:58 -0700

[diff] [blame]

1668

Allen Li

2016-09-14 14:44:59 -0700

[diff] [blame]

1669

if options.no_wait:

Allen Li

2016-08-16 14:19:08 -0700

[diff] [blame]

1670

return _handle_job_nowait(job_id, options, instance_server)

Allen Li

2016-09-14 14:44:59 -0700

[diff] [blame]

1671

else:

1672

return _handle_job_wait(afe, job_id, options, job_timer, is_real_time)

Allen Li

2016-08-16 14:19:08 -0700

[diff] [blame]

1673

1674

1675

def _handle_job_wait(afe, job_id, options, job_timer, is_real_time):

1676

"""Handle suite job synchronously.

1677

1678

@param afe AFE instance.

1679

@param job_id Suite job id.

1680

@param options Parsed options.

1681

@param job_timer JobTimer for suite job.

1682

@param is_real_time Whether or not to handle job timeout.

1683

1684

@return SuiteResult of suite job.

1685

"""

1686

code = RETURN_CODES.OK

1687

output_dict = {}

1688

rpc_helper = diagnosis_utils.RPCHelper(afe)

1689

instance_server = afe.server

1690

while not afe.get_jobs(id=job_id, finished=True):

1691

# Note that this call logs output, preventing buildbot's

1692

# 9000 second silent timeout from kicking in. Let there be no

1693

# doubt, this is a hack. The timeout is from upstream buildbot and

1694

# this is the easiest work around.

1695

if job_timer.first_past_halftime():

1696

rpc_helper.diagnose_job(job_id, instance_server)

1697

if job_timer.debug_output_timer.poll():

1698

logging.info('The suite job has another %s till timeout.',

1699

job_timer.timeout_hours - job_timer.elapsed_time())

1700

time.sleep(10)

1701

# For most cases, ResultCollector should be able to determine whether

1702

# a suite has timed out by checking information in the test view.

1703

# However, occationally tko parser may fail on parsing the

1704

# job_finished time from the job's keyval file. So we add another

1705

# layer of timeout check in run_suite. We do the check right after

1706

# the suite finishes to make it as accurate as possible.

1707

# There is a minor race condition here where we might have aborted

1708

# for some reason other than a timeout, and the job_timer thinks

1709

# it's a timeout because of the jitter in waiting for results.

1710

# The consequence would be that run_suite exits with code

1711

# SUITE_TIMEOUT while it should have returned INFRA_FAILURE

1712

# instead, which should happen very rarely.

1713

# Note the timeout will have no sense when using -m option.

1714

is_suite_timeout = job_timer.is_suite_timeout()

1715

1716

# Extract the original suite name to record timing.

1717

original_suite_name = get_original_suite_name(options.name,

1718

options.suite_args)

1719

# Start collecting test results.

Alex Miller

2013-10-30 15:18:57 -0700

[diff] [blame]

1720

TKO = frontend_wrappers.RetryingTKO(server=instance_server,

Simran Basi

25effe3

2013-11-26 13:02:11 -0800

[diff] [blame]

1721

timeout_min=options.afe_timeout_mins,

Chris Masone

8ac6671

2012-02-15 14:21:02 -0800

[diff] [blame]

1722

delay_sec=options.delay_sec)

Allen Li

2016-08-16 14:19:08 -0700

[diff] [blame]

1723

collector = ResultCollector(instance_server=instance_server,

1724

afe=afe, tko=TKO, build=options.build,

1725

board=options.board,

1726

suite_name=options.name,

1727

suite_job_id=job_id,

1728

original_suite_name=original_suite_name)

1729

collector.run()

1730

# Dump test outputs into json.

1731

output_dict = collector.get_results_dict()

1732

output_dict['autotest_instance'] = instance_server

1733

if not options.json_dump:

1734

collector.output_results()

1735

code = collector.return_code

1736

return_message = collector.return_message

1737

if is_real_time:

1738

# Do not record stats if the suite was aborted (either by a user

1739

# or through the golo rpc).

1740

# Also do not record stats if is_aborted is None, indicating

1741

# aborting status is unknown yet.

1742

if collector.is_aborted == False:

1743

collector.gather_timing_stats()

J. Richard Barnette

712eb40

2013-08-13 18:03:00 -0700

[diff] [blame]

1744

Allen Li

2016-08-16 14:19:08 -0700

[diff] [blame]

1745

if collector.is_aborted == True and is_suite_timeout:

1746

# There are two possible cases when a suite times out.

1747

# 1. the suite job was aborted due to timing out

1748

# 2. the suite job succeeded, but some child jobs

1749

# were already aborted before the suite job exited.

1750

# The case 2 was handled by ResultCollector,

1751

# here we handle case 1.

1752

old_code = code

1753

code = get_worse_code(

1754

code, RETURN_CODES.SUITE_TIMEOUT)

1755

if old_code != code:

1756

return_message = 'Suite job timed out.'

1757

logging.info('Upgrade return code from %s to %s '

1758

'because suite job has timed out.',

1759

RETURN_CODES.get_string(old_code),

1760

RETURN_CODES.get_string(code))

Fang Deng

2015-02-20 14:49:47 -0800

[diff] [blame]

1761

Allen Li

2016-08-16 14:19:08 -0700

[diff] [blame]

1762

logging.info('\nAttempting to display pool info: %s', options.pool)

1763

try:

1764

# Add some jitter to make up for any latency in

1765

# aborting the suite or checking for results.

1766

cutoff = (job_timer.timeout_hours +

Allen Li

2016-09-14 14:44:59 -0700

[diff] [blame]

1767

timedelta(hours=0.3))

Allen Li

2016-08-16 14:19:08 -0700

[diff] [blame]

1768

rpc_helper.diagnose_pool(

1769

options.board, options.pool, cutoff)

1770

except proxy.JSONRPCException as e:

1771

logging.warning('Unable to display pool info.')

Aviv Keshet

6b1122d

2016-06-20 13:29:52 -0700

[diff] [blame]

1772

Allen Li

2016-08-16 14:19:08 -0700

[diff] [blame]

1773

# And output return message.

1774

if return_message:

1775

logging.info('Reason: %s', return_message)

1776

output_dict['return_message'] = return_message

Fang Deng

2014-05-07 17:17:04 -0700

[diff] [blame]

1777

Allen Li

2016-08-16 14:19:08 -0700

[diff] [blame]

1778

logging.info('\nOutput below this line is for buildbot consumption:')

Allen Li

2016-09-14 19:05:47 -0700

[diff] [blame]

1779

log_buildbot_links(logging.info, collector._buildbot_links)

Allen Li

2016-08-16 14:19:08 -0700

[diff] [blame]

1780

return SuiteResult(code, output_dict)

Prashanth B

2014-03-14 12:36:29 -0700

[diff] [blame]

1781

Allen Li

2016-08-16 14:19:08 -0700

[diff] [blame]

1782

1783

def _handle_job_nowait(job_id, options, instance_server):

1784

"""Handle suite job asynchronously.

1785

1786

@param job_id Suite job id.

1787

@param options Parsed options.

1788

@param instance_server Autotest instance hostname.

1789

1790

@return SuiteResult of suite job.

1791

"""

1792

logging.info('Created suite job: %r', job_id)

1793

link = LogLink(options.name, instance_server,

1794

'%s-%s' % (job_id, getpass.getuser()))

1795

for generate_link in link.GenerateBuildbotLinks():

1796

logging.info(generate_link)

1797

logging.info('--no_wait specified; Exiting.')

1798

return SuiteResult(RETURN_CODES.OK,

1799

{'return_message': '--no_wait specified; Exiting.'})

Chris Masone

2012-02-14 14:18:01 -0800

[diff] [blame]

1800

Fang Deng

2014-04-07 15:39:47 -0700

[diff] [blame]

1801

Fang Deng

2014-09-18 17:52:06 -0700

[diff] [blame]

1802

def main():

1803

"""Entry point."""

Simran Basi

9f364a6

2015-12-07 14:15:19 -0800

[diff] [blame]

1804

utils.verify_not_root_user()

Shuqian Zhao

2015-08-05 22:56:30 -0700

[diff] [blame]

1805

Allen Li

2016-09-14 14:44:59 -0700

[diff] [blame]

1806

parser = make_parser()

1807

options = parser.parse_args()

Fang Deng

2014-09-18 17:52:06 -0700

[diff] [blame]

1808

try:

Shuqian Zhao

2015-08-05 22:56:30 -0700

[diff] [blame]

1809

# Silence the log when dumping outputs into json

1810

if options.json_dump:

1811

logging.disable(logging.CRITICAL)

1812

Allen Li

2016-09-14 14:44:59 -0700

[diff] [blame]

1813

if not verify_options(options):

Shuqian Zhao

2015-08-05 22:56:30 -0700

[diff] [blame]

1814

parser.print_help()

1815

code = RETURN_CODES.INVALID_OPTIONS

Allen Li

2016-09-14 19:05:47 -0700

[diff] [blame]

1816

output_dict = {'return_code': RETURN_CODES.INVALID_OPTIONS}

Shuqian Zhao

2015-08-05 22:56:30 -0700

[diff] [blame]

1817

else:

Allen Li

2016-09-14 19:05:47 -0700

[diff] [blame]

1818

code, output_dict = main_without_exception_handling(options)

Shuqian Zhao

2015-12-07 18:01:11 -0800

[diff] [blame]

1819

except diagnosis_utils.BoardNotAvailableError as e:

Allen Li

2016-09-14 19:05:47 -0700

[diff] [blame]

1820

output_dict = {'return_message': 'Skipping testing: %s' % e.message}

Shuqian Zhao

2015-12-07 18:01:11 -0800

[diff] [blame]

1821

code = RETURN_CODES.BOARD_NOT_AVAILABLE

1822

logging.info(output_dict['return_message'])

1823

except utils.TestLabException as e:

Allen Li

2016-09-14 19:05:47 -0700

[diff] [blame]

1824

output_dict = {'return_message': 'TestLabException: %s' % e}

Shuqian Zhao

2015-12-07 18:01:11 -0800

[diff] [blame]

1825

code = RETURN_CODES.INFRA_FAILURE

1826

logging.exception(output_dict['return_message'])

Fang Deng

2014-09-18 17:52:06 -0700

[diff] [blame]

1827

except Exception as e:

Allen Li

2016-09-14 19:05:47 -0700

[diff] [blame]

1828

output_dict = {

1829

'return_message': 'Unhandled run_suite exception: %s' % e

1830

}

Shuqian Zhao

2015-12-07 18:01:11 -0800

[diff] [blame]

1831

code = RETURN_CODES.INFRA_FAILURE

1832

logging.exception(output_dict['return_message'])

Shuqian Zhao

2015-08-05 22:56:30 -0700

[diff] [blame]

1833

1834

# Dump test outputs into json.

1835

output_dict['return_code'] = code

Shuqian Zhao

2015-08-05 22:56:30 -0700

[diff] [blame]

1836

if options.json_dump:

Allen Li

2016-09-14 19:05:47 -0700

[diff] [blame]

1837

output_json = json.dumps(output_dict, sort_keys=True)

Shuqian Zhao

74ca35d

2015-11-25 14:33:50 -0800

[diff] [blame]

1838

output_json_marked = '#JSON_START#%s#JSON_END#' % output_json.strip()

1839

sys.stdout.write(output_json_marked)

Fang Deng

2014-09-25 10:18:48 -0700

[diff] [blame]

1840

1841

logging.info('Will return from run_suite with status: %s',

1842

RETURN_CODES.get_string(code))

Gabe Black

2015-02-04 23:55:15 -0800

[diff] [blame]

1843

autotest_stats.Counter('run_suite.%s' %

1844

RETURN_CODES.get_string(code)).increment()

Fang Deng

2014-09-25 10:18:48 -0700

[diff] [blame]

1845

return code

Fang Deng

2014-09-18 17:52:06 -0700

[diff] [blame]

1846

1847

Chris Masone