Blame - server/cros/dynamic_suite/suite.py - platform/external/autotest

2012-08-15 14:25:53 -0700

[diff] [blame]

1

2

# Use of this source code is governed by a BSD-style license that can be

3

# found in the LICENSE file.

4

Allen Li

ee36ab8

2017-07-07 15:46:40 -0700

[diff] [blame]

5

import abc

Fang Deng

2015-01-02 14:51:49 -0800

[diff] [blame]

6

import datetime

7

import difflib

Allen Li

2017-02-28 18:43:24 -0800

[diff] [blame]

8

import functools

Fang Deng

2015-01-02 14:51:49 -0800

[diff] [blame]

import hashlib

import logging

import operator

import os

import re

Fang Deng

2015-01-02 14:51:49 -0800

[diff] [blame]

14

import sys

Allen Li

2017-02-28 18:43:24 -0800

[diff] [blame]

15

import warnings

Chris Masone

2012-08-15 14:25:53 -0700

[diff] [blame]

import common

J. Richard Barnette

2014-04-02 10:27:33 -0700

[diff] [blame]

19

from autotest_lib.frontend.afe.json_rpc import proxy

Fang Deng

2014-03-17 15:19:46 -0700

[diff] [blame]

20

from autotest_lib.client.common_lib import enum

Dan Shi

dfea368

2014-08-10 23:38:40 -0700

[diff] [blame]

21

from autotest_lib.client.common_lib import error

Simran Basi

5ace6f2

2016-01-06 17:30:44 -0800

[diff] [blame]

22

from autotest_lib.client.common_lib import global_config

Alex Miller

7d658cf

2013-09-04 16:00:35 -0700

[diff] [blame]

23

from autotest_lib.client.common_lib import priorities

Dan Shi

dfea368

2014-08-10 23:38:40 -0700

[diff] [blame]

24

from autotest_lib.client.common_lib import time_utils

25

from autotest_lib.client.common_lib import utils

Xixuan Wu

eb1acc4

2017-11-22 15:46:03 -0800

[diff] [blame]

26

from autotest_lib.frontend.afe import model_attributes

Fang Deng

2014-03-17 15:19:46 -0700

[diff] [blame]

27

from autotest_lib.frontend.afe.json_rpc import proxy

Dan Shi

2014-10-10 13:38:51 -0700

[diff] [blame]

28

from autotest_lib.server.cros import provision

Chris Masone

2012-08-15 14:25:53 -0700

[diff] [blame]

29

from autotest_lib.server.cros.dynamic_suite import constants

30

from autotest_lib.server.cros.dynamic_suite import control_file_getter

31

from autotest_lib.server.cros.dynamic_suite import frontend_wrappers

Alex Miller

2012-12-19 13:38:31 -0800

[diff] [blame]

32

from autotest_lib.server.cros.dynamic_suite import job_status

Xixuan Wu

2018-04-25 17:04:51 -0700

[diff] [blame]

33

from autotest_lib.server.cros.dynamic_suite import suite_common

J. Richard Barnette

e7b98bb

2013-08-21 16:34:16 -0700

[diff] [blame]

34

from autotest_lib.server.cros.dynamic_suite import tools

35

from autotest_lib.server.cros.dynamic_suite.job_status import Status

Chris Masone

2012-08-15 14:25:53 -0700

[diff] [blame]

36

Shuqian Zhao

ab46881

2015-04-08 14:40:38 -0700

[diff] [blame]

37

try:

38

from chromite.lib import boolparse_lib

39

from chromite.lib import cros_logging as logging

40

except ImportError:

41

print 'Unable to import chromite.'

42

print 'This script must be either:'

43

print ' - Be run in the chroot.'

44

print ' - (not yet supported) be run after running '

45

print ' ../utils/build_externals.py'

Fang Deng

2014-03-17 15:19:46 -0700

[diff] [blame]

46

Shuqian Zhao

490f78f

2016-01-20 13:18:40 -0800

[diff] [blame]

47

_FILE_BUG_SUITES = ['au', 'bvt', 'bvt-cq', 'bvt-inline', 'paygen_au_beta',

48

'paygen_au_canary', 'paygen_au_dev', 'paygen_au_stable',

49

'sanity', 'push_to_prod']

Simran Basi

5ace6f2

2016-01-06 17:30:44 -0800

[diff] [blame]

50

_AUTOTEST_DIR = global_config.global_config.get_config_value(

51

'SCHEDULER', 'drone_installation_directory')

Xixuan Wu

2018-04-30 17:17:10 -0700

[diff] [blame]

52

Shuqian Zhao

e33ba4a

2015-09-11 18:51:43 -0700

[diff] [blame]

53

Fang Deng

2014-03-17 15:19:46 -0700

[diff] [blame]

54

class RetryHandler(object):

55

"""Maintain retry information.

56

57

@var _retry_map: A dictionary that stores retry history.

58

The key is afe job id. The value is a dictionary.

59

{job_id: {'state':RetryHandler.States, 'retry_max':int}}

60

- state:

61

The retry state of a job.

62

NOT_ATTEMPTED:

63

We haven't done anything about the job.

64

ATTEMPTED:

65

We've made an attempt to schedule a retry job. The

66

scheduling may or may not be successful, e.g.

67

it might encounter an rpc error. Note failure

68

in scheduling a retry is different from a retry job failure.

69

For each job, we only attempt to schedule a retry once.

70

For example, assume we have a test with JOB_RETRIES=5 and

71

its second retry job failed. When we attempt to create

72

a third retry job to retry the second, we hit an rpc

73

error. In such case, we will give up on all following

74

retries.

75

RETRIED:

76

A retry job has already been successfully

77

scheduled.

78

- retry_max:

79

The maximum of times the job can still

80

be retried, taking into account retries

81

that have occurred.

82

@var _retry_level: A retry might be triggered only if the result

83

is worse than the level.

Fang Deng

2015-01-02 14:51:49 -0800

[diff] [blame]

84

@var _max_retries: Maximum retry limit at suite level.

85

Regardless how many times each individual test

86

has been retried, the total number of retries happening in

87

the suite can't exceed _max_retries.

Fang Deng

2014-03-17 15:19:46 -0700

[diff] [blame]

88

"""

89

90

States = enum.Enum('NOT_ATTEMPTED', 'ATTEMPTED', 'RETRIED',

91

start_value=1, step=1)

92

Fang Deng

2015-01-02 14:51:49 -0800

[diff] [blame]

93

def __init__(self, initial_jobs_to_tests, retry_level='WARN',

94

max_retries=None):

Fang Deng

2014-03-17 15:19:46 -0700

[diff] [blame]

95

"""Initialize RetryHandler.

96

97

@param initial_jobs_to_tests: A dictionary that maps a job id to

98

a ControlData object. This dictionary should contain

99

jobs that are originally scheduled by the suite.

100

@param retry_level: A retry might be triggered only if the result is

101

worse than the level.

Fang Deng

2015-01-02 14:51:49 -0800

[diff] [blame]

102

@param max_retries: Integer, maxmium total retries allowed

103

for the suite. Default to None, no max.

Fang Deng

2014-03-17 15:19:46 -0700

[diff] [blame]

104

"""

105

self._retry_map = {}

106

self._retry_level = retry_level

Fang Deng

2015-01-02 14:51:49 -0800

[diff] [blame]

107

self._max_retries = (max_retries

108

if max_retries is not None else sys.maxint)

Fang Deng

2014-03-17 15:19:46 -0700

[diff] [blame]

109

for job_id, test in initial_jobs_to_tests.items():

110

if test.job_retries > 0:

Allen Li

fb89e2b

2017-01-03 12:47:58 -0800

[diff] [blame]

111

self._add_job(new_job_id=job_id,

112

retry_max=test.job_retries)

Jacob Kopczynski

2018-01-10 17:25:38 -0800

[diff] [blame]

113

else:

114

logging.debug("Test %s has no retries", test.name)

Fang Deng

2014-03-17 15:19:46 -0700

[diff] [blame]

115

116

Allen Li

fb89e2b

2017-01-03 12:47:58 -0800

[diff] [blame]

117

def _add_job(self, new_job_id, retry_max):

Fang Deng

2014-03-17 15:19:46 -0700

[diff] [blame]

118

"""Add a newly-created job to the retry map.

119

120

@param new_job_id: The afe_job_id of a newly created job.

121

@param retry_max: The maximum of times that we could retry

122

the test if the job fails.

123

124

@raises ValueError if new_job_id is already in retry map.

125

126

"""

127

if new_job_id in self._retry_map:

128

raise ValueError('add_job called when job is already in retry map.')

129

130

self._retry_map[new_job_id] = {

131

'state': self.States.NOT_ATTEMPTED,

132

'retry_max': retry_max}

133

134

Allen Li

0cd1926

2017-01-03 12:56:08 -0800

[diff] [blame]

135

def _suite_max_reached(self):

Fang Deng

2015-01-02 14:51:49 -0800

[diff] [blame]

136

"""Return whether maximum retry limit for a suite has been reached."""

Fang Deng

e4326d6

2015-01-06 13:15:15 -0800

[diff] [blame]

137

return self._max_retries <= 0

Fang Deng

2015-01-02 14:51:49 -0800

[diff] [blame]

138

139

Fang Deng

2014-03-17 15:19:46 -0700

[diff] [blame]

140

def add_retry(self, old_job_id, new_job_id):

141

"""Record a retry.

142

143

Update retry map with the retry information.

144

145

@param old_job_id: The afe_job_id of the job that is retried.

146

@param new_job_id: The afe_job_id of the retry job.

147

148

@raises KeyError if old_job_id isn't in the retry map.

149

@raises ValueError if we have already retried or made an attempt

150

to retry the old job.

151

152

"""

153

old_record = self._retry_map[old_job_id]

154

if old_record['state'] != self.States.NOT_ATTEMPTED:

155

raise ValueError(

156

'We have already retried or attempted to retry job %d' %

157

old_job_id)

158

old_record['state'] = self.States.RETRIED

Allen Li

fb89e2b

2017-01-03 12:47:58 -0800

[diff] [blame]

159

self._add_job(new_job_id=new_job_id,

160

retry_max=old_record['retry_max'] - 1)

Fang Deng

2015-01-02 14:51:49 -0800

[diff] [blame]

161

self._max_retries -= 1

Fang Deng

2014-03-17 15:19:46 -0700

[diff] [blame]

162

163

164

def set_attempted(self, job_id):

165

"""Set the state of the job to ATTEMPTED.

166

167

@param job_id: afe_job_id of a job.

168

169

@raises KeyError if job_id isn't in the retry map.

170

@raises ValueError if the current state is not NOT_ATTEMPTED.

171

172

"""

173

current_state = self._retry_map[job_id]['state']

174

if current_state != self.States.NOT_ATTEMPTED:

175

# We are supposed to retry or attempt to retry each job

176

# only once. Raise an error if this is not the case.

177

raise ValueError('Unexpected state transition: %s -> %s' %

178

(self.States.get_string(current_state),

179

self.States.get_string(self.States.ATTEMPTED)))

180

else:

181

self._retry_map[job_id]['state'] = self.States.ATTEMPTED

182

183

184

def has_following_retry(self, result):

185

"""Check whether there will be a following retry.

186

187

We have the following cases for a given job id (result.id),

188

- no retry map entry -> retry not required, no following retry

189

- has retry map entry:

190

- already retried -> has following retry

191

- has not retried

192

(this branch can be handled by checking should_retry(result))

193

- retry_max == 0 --> the last retry job, no more retry

194

- retry_max > 0

195

- attempted, but has failed in scheduling a

196

following retry due to rpc error --> no more retry

197

- has not attempped --> has following retry if test failed.

198

199

@param result: A result, encapsulating the status of the job.

200

201

@returns: True, if there will be a following retry.

202

False otherwise.

203

204

"""

Allen Li

2ee2a26

2017-01-03 13:21:10 -0800

[diff] [blame]

205

return (result.test_executed

206

and result.id in self._retry_map

207

and (self._retry_map[result.id]['state'] == self.States.RETRIED

208

or self._should_retry(result)))

Allen Li

5cb0065

2017-01-03 13:06:30 -0800

[diff] [blame]

209

210

211

def _should_retry(self, result):

212

"""Check whether we should retry a job based on its result.

213

214

We will retry the job that corresponds to the result

215

when all of the following are true.

216

a) The test was actually executed, meaning that if

217

a job was aborted before it could ever reach the state

218

of 'Running', the job will not be retried.

219

b) The result is worse than |self._retry_level| which

220

defaults to 'WARN'.

221

c) The test requires retry, i.e. the job has an entry in the retry map.

222

d) We haven't made any retry attempt yet, i.e. state == NOT_ATTEMPTED

223

Note that if a test has JOB_RETRIES=5, and the second time

224

it was retried it hit an rpc error, we will give up on

225

all following retries.

226

e) The job has not reached its retry max, i.e. retry_max > 0

227

228

@param result: A result, encapsulating the status of the job.

229

230

@returns: True if we should retry the job.

231

232

"""

233

return (

xixuan

bf854f8

2017-04-20 10:40:15 -0700

[diff] [blame]

234

result.test_executed

235

and result.id in self._retry_map

236

and not self._suite_max_reached()

Allen Li

5cb0065

2017-01-03 13:06:30 -0800

[diff] [blame]

237

and result.is_worse_than(

238

job_status.Status(self._retry_level, '', 'reason'))

Allen Li

5cb0065

2017-01-03 13:06:30 -0800

[diff] [blame]

239

and self._retry_map[result.id]['state'] == self.States.NOT_ATTEMPTED

240

and self._retry_map[result.id]['retry_max'] > 0

241

)

Fang Deng

2014-03-17 15:19:46 -0700

[diff] [blame]

242

Jacob Kopczynski

2018-01-10 17:25:38 -0800

[diff] [blame]

243

def _should_retry_local_job(self, job_id):

244

"""Check whether we should retry a job based on information available

245

for a local job without a Result object.

246

247

We will retry the job that corresponds to the result

248

when all of the following are true.

249

a) The test requires retry, i.e. the job has an entry in the retry map.

250

b) We haven't made any retry attempt yet for this job, i.e.

251

state == NOT_ATTEMPTED

252

If the job is aborted, we will give up on all following retries,

253

regardless of max_retries.

254

c) The job has not reached its retry max, i.e. retry_max > 0

255

256

@param job_id: the id for the job, to look up relevant information.

257

258

@returns: True if we should retry the job.

259

260

"""

261

if self._suite_max_reached():

262

logging.debug('suite max_retries reached, not retrying.')

263

return False

264

if job_id not in self._retry_map:

265

logging.debug('job_id not in retry map, not retrying.')

266

return False

267

if self._retry_map[job_id]['state'] != self.States.NOT_ATTEMPTED:

268

logging.debug("job state was %s not 'Not Attempted', not retrying",

269

self._retry_map[job_id]['state'])

270

return False

271

if self._retry_map[job_id]['retry_max'] <= 0:

272

logging.debug('test-level retries exhausted, not retrying')

return False

return True

def job_present(self, job_id):

278

"""Check whether a job id present in the retry map.

279

280

@param job_id: afe_job_id of a job.

281

282

@returns: A True if the job is present, False if not.

283

"""

284

return bool(self._retry_map.get(job_id))

285

286

Fang Deng

2014-03-17 15:19:46 -0700

[diff] [blame]

287

288

def get_retry_max(self, job_id):

289

"""Get the maximum times the job can still be retried.

290

291

@param job_id: afe_job_id of a job.

292

293

@returns: An int, representing the maximum times the job can still be

294

retried.

295

@raises KeyError if job_id isn't in the retry map.

296

297

"""

298

return self._retry_map[job_id]['retry_max']

299

300

Allen Li

da198fd

2017-03-29 17:22:13 -0700

[diff] [blame]

301

class _SuiteChildJobCreator(object):

302

"""Create test jobs for a suite."""

303

Allen Li

2017-03-29 17:31:35 -0700

[diff] [blame]

304

def __init__(

305

self,

Allen Li

2017-03-29 17:48:46 -0700

[diff] [blame]

306

tag,

Allen Li

2017-03-29 17:37:43 -0700

[diff] [blame]

307

builds,

Allen Li

2017-03-29 17:31:35 -0700

[diff] [blame]

308

board,

Allen Li

2017-03-29 17:58:23 -0700

[diff] [blame]

309

afe=None,

Allen Li

2017-03-29 17:58:23 -0700

[diff] [blame]

310

max_runtime_mins=24*60,

311

timeout_mins=24*60,

Allen Li

2017-03-29 17:48:46 -0700

[diff] [blame]

312

suite_job_id=None,

Allen Li

2017-03-29 17:31:35 -0700

[diff] [blame]

313

ignore_deps=False,

Allen Li

37e1a29

2017-02-28 18:28:41 -0800

[diff] [blame]

314

extra_deps=(),

Allen Li

2017-03-29 17:58:23 -0700

[diff] [blame]

315

priority=priorities.Priority.DEFAULT,

Allen Li

2017-03-29 17:48:46 -0700

[diff] [blame]

316

offload_failures_only=False,

Shuhei Takahashi

2017-06-14 20:02:26 +0900

[diff] [blame]

317

test_source_build=None,

Aviv Keshet

2017-10-30 12:53:01 -0700

[diff] [blame]

318

job_keyvals=None,

319

):

Allen Li

2017-03-29 17:31:35 -0700

[diff] [blame]

"""

Constructor

Allen Li

2017-03-29 17:48:46 -0700

[diff] [blame]

323

@param tag: a string with which to tag jobs run in this suite.

Allen Li

2017-03-29 17:37:43 -0700

[diff] [blame]

324

@param builds: the builds on which we're running this suite.

Allen Li

2017-03-29 17:31:35 -0700

[diff] [blame]

325

@param board: the board on which we're running this suite.

Allen Li

2017-03-29 17:58:23 -0700

[diff] [blame]

326

@param afe: an instance of AFE as defined in server/frontend.py.

Allen Li

2017-03-29 17:58:23 -0700

[diff] [blame]

327

@param max_runtime_mins: Maximum suite runtime, in minutes.

328

@param timeout_mins: Maximum job lifetime, in minutes.

Allen Li

2017-03-29 17:48:46 -0700

[diff] [blame]

329

@param suite_job_id: Job id that will act as parent id to all sub jobs.

330

Default: None

Allen Li

2017-03-29 17:31:35 -0700

[diff] [blame]

331

@param ignore_deps: True if jobs should ignore the DEPENDENCIES

332

attribute and skip applying of dependency labels.

333

(Default:False)

334

@param extra_deps: A list of strings which are the extra DEPENDENCIES

335

to add to each test being scheduled.

Allen Li

2017-03-29 17:58:23 -0700

[diff] [blame]

336

@param priority: Integer priority level. Higher is more important.

Allen Li

2017-03-29 17:48:46 -0700

[diff] [blame]

337

@param offload_failures_only: Only enable gs_offloading for failed

338

jobs.

339

@param test_source_build: Build that contains the server-side test code.

Shuhei Takahashi

2017-06-14 20:02:26 +0900

[diff] [blame]

340

@param job_keyvals: General job keyvals to be inserted into keyval file,

341

which will be used by tko/parse later.

Allen Li

2017-03-29 17:31:35 -0700

[diff] [blame]

342

"""

Allen Li

2017-03-29 17:48:46 -0700

[diff] [blame]

343

self._tag = tag

Allen Li

2017-03-29 17:37:43 -0700

[diff] [blame]

344

self._builds = builds

Allen Li

2017-03-29 17:31:35 -0700

[diff] [blame]

345

self._board = board

Allen Li

2017-03-29 17:58:23 -0700

[diff] [blame]

346

self._afe = afe or frontend_wrappers.RetryingAFE(timeout_min=30,

347

delay_sec=10,

348

debug=False)

Allen Li

2017-03-29 17:58:23 -0700

[diff] [blame]

349

self._max_runtime_mins = max_runtime_mins

350

self._timeout_mins = timeout_mins

Allen Li

2017-03-29 17:48:46 -0700

[diff] [blame]

351

self._suite_job_id = suite_job_id

Allen Li

2017-03-29 17:31:35 -0700

[diff] [blame]

352

self._ignore_deps = ignore_deps

Allen Li

37e1a29

2017-02-28 18:28:41 -0800

[diff] [blame]

353

self._extra_deps = tuple(extra_deps)

Allen Li

2017-03-29 17:58:23 -0700

[diff] [blame]

354

self._priority = priority

Allen Li

2017-03-29 17:48:46 -0700

[diff] [blame]

355

self._offload_failures_only = offload_failures_only

356

self._test_source_build = test_source_build

Shuhei Takahashi

2017-06-14 20:02:26 +0900

[diff] [blame]

357

self._job_keyvals = job_keyvals

Allen Li

2017-03-29 17:31:35 -0700

[diff] [blame]

358

359

Allen Li

2017-03-29 17:37:43 -0700

[diff] [blame]

360

@property

361

def cros_build(self):

362

"""Return the CrOS build or the first build in the builds dict."""

363

# TODO(ayatane): Note that the builds dict isn't ordered. I'm not

364

# sure what the implications of this are, but it's probably not a

365

# good thing.

366

return self._builds.get(provision.CROS_VERSION_PREFIX,

367

self._builds.values()[0])

368

369

Allen Li

2017-03-29 17:58:23 -0700

[diff] [blame]

370

def create_job(self, test, retry_for=None):

371

"""

372

Thin wrapper around frontend.AFE.create_job().

373

374

@param test: ControlData object for a test to run.

375

@param retry_for: If the to-be-created job is a retry for an

376

old job, the afe_job_id of the old job will

377

be passed in as |retry_for|, which will be

378

recorded in the new job's keyvals.

379

@returns: A frontend.Job object with an added test_name member.

380

test_name is used to preserve the higher level TEST_NAME

381

name of the job.

382

"""

Keith Haddow

782e2a8

2017-09-26 15:44:51 -0700

[diff] [blame]

383

# For a system running multiple suites which share tests, the priority

384

# overridden may lead to unexpected scheduling order that adds extra

385

# provision jobs.

386

test_priority = self._priority

387

if utils.is_moblab():

388

test_priority = max(self._priority, test.priority)

389

Xixuan Wu

eb1acc4

2017-11-22 15:46:03 -0800

[diff] [blame]

390

reboot_before = (model_attributes.RebootBefore.NEVER if test.fast

391

else None)

392

Allen Li

2017-03-29 17:58:23 -0700

[diff] [blame]

393

test_obj = self._afe.create_job(

394

control_file=test.text,

395

name=tools.create_job_name(

396

self._test_source_build or self.cros_build,

397

self._tag,

398

test.name),

399

control_type=test.test_type.capitalize(),

400

meta_hosts=[self._board]*test.sync_count,

401

dependencies=self._create_job_deps(test),

402

keyvals=self._create_keyvals_for_test_job(test, retry_for),

403

max_runtime_mins=self._max_runtime_mins,

404

timeout_mins=self._timeout_mins,

405

parent_job_id=self._suite_job_id,

Xixuan Wu

eb1acc4

2017-11-22 15:46:03 -0800

[diff] [blame]

406

reboot_before=reboot_before,

407

run_reset=not test.fast,

Keith Haddow

782e2a8

2017-09-26 15:44:51 -0700

[diff] [blame]

408

priority=test_priority,

Allen Li

2017-03-29 17:58:23 -0700

[diff] [blame]

409

synch_count=test.sync_count,

410

require_ssp=test.require_ssp)

411

412

test_obj.test_name = test.name

return test_obj

Allen Li

2017-03-29 17:31:35 -0700

[diff] [blame]

416

def _create_job_deps(self, test):

417

"""Create job deps list for a test job.

418

419

@returns: A list of dependency strings.

420

"""

421

if self._ignore_deps:

422

job_deps = []

423

else:

424

job_deps = list(test.dependencies)

425

job_deps.extend(self._extra_deps)

Allen Li

2017-03-29 17:31:35 -0700

[diff] [blame]

426

return job_deps

427

Allen Li

da198fd

2017-03-29 17:22:13 -0700

[diff] [blame]

428

Allen Li

2017-03-29 17:48:46 -0700

[diff] [blame]

429

def _create_keyvals_for_test_job(self, test, retry_for=None):

430

"""Create keyvals dict for creating a test job.

431

432

@param test: ControlData object for a test to run.

433

@param retry_for: If the to-be-created job is a retry for an

434

old job, the afe_job_id of the old job will

435

be passed in as |retry_for|, which will be

436

recorded in the new job's keyvals.

437

@returns: A keyvals dict for creating the test job.

438

"""

439

keyvals = {

440

constants.JOB_BUILD_KEY: self.cros_build,

441

constants.JOB_SUITE_KEY: self._tag,

442

constants.JOB_EXPERIMENTAL_KEY: test.experimental,

443

constants.JOB_BUILDS_KEY: self._builds

444

}

445

# test_source_build is saved to job_keyvals so scheduler can retrieve

446

# the build name from database when compiling autoserv commandline.

447

# This avoid a database change to add a new field in afe_jobs.

448

#

449

# Only add `test_source_build` to job keyvals if the build is different

450

# from the CrOS build or the job uses more than one build, e.g., both

451

# firmware and CrOS will be updated in the dut.

452

# This is for backwards compatibility, so the update Autotest code can

453

# compile an autoserv command line to run in a SSP container using

454

# previous builds.

455

if (self._test_source_build and

456

(self.cros_build != self._test_source_build or

457

len(self._builds) > 1)):

458

keyvals[constants.JOB_TEST_SOURCE_BUILD_KEY] = \

459

self._test_source_build

460

for prefix, build in self._builds.iteritems():

461

if prefix == provision.FW_RW_VERSION_PREFIX:

462

keyvals[constants.FWRW_BUILD]= build

463

elif prefix == provision.FW_RO_VERSION_PREFIX:

464

keyvals[constants.FWRO_BUILD] = build

465

# Add suite job id to keyvals so tko parser can read it from keyval

466

# file.

467

if self._suite_job_id:

468

keyvals[constants.PARENT_JOB_ID] = self._suite_job_id

469

# We drop the old job's id in the new job's keyval file so that

470

# later our tko parser can figure out the retry relationship and

471

# invalidate the results of the old job in tko database.

472

if retry_for:

473

keyvals[constants.RETRY_ORIGINAL_JOB_ID] = retry_for

474

if self._offload_failures_only:

475

keyvals[constants.JOB_OFFLOAD_FAILURES_KEY] = True

Shuhei Takahashi

2017-06-14 20:02:26 +0900

[diff] [blame]

476

if self._job_keyvals:

477

for key in constants.INHERITED_KEYVALS:

478

if key in self._job_keyvals:

479

keyvals[key] = self._job_keyvals[key]

Allen Li

2017-03-29 17:48:46 -0700

[diff] [blame]

return keyvals

Allen Li

2017-03-10 16:11:53 -0800

[diff] [blame]

483

class _ControlFileRetriever(object):

484

"""Retrieves control files.

485

486

This returns control data instances, unlike control file getters

487

which simply return the control file text contents.

Allen Li

066f587

2017-02-28 13:30:44 -0800

[diff] [blame]

488

"""

Allen Li

066f587

2017-02-28 13:30:44 -0800

[diff] [blame]

489

Allen Li

2017-03-14 13:36:26 -0700

[diff] [blame]

490

def __init__(self, cf_getter, forgiving_parser=True, run_prod_code=False,

491

test_args=None):

Allen Li

3674697

2017-03-10 16:17:46 -0800

[diff] [blame]

492

"""Initialize instance.

493

494

@param cf_getter: a control_file_getter.ControlFileGetter used to list

495

and fetch the content of control files

Allen Li

574fe4d

2017-03-10 16:11:53 -0800

[diff] [blame]

496

@param forgiving_parser: If False, will raise ControlVariableExceptions

497

if any are encountered when parsing control

498

files. Note that this can raise an exception

499

for syntax errors in unrelated files, because

500

we parse them before applying the predicate.

Allen Li

2017-03-14 13:36:26 -0700

[diff] [blame]

501

@param run_prod_code: If true, the retrieved tests will run the test

502

code that lives in prod aka the test code

503

currently on the lab servers by disabling

504

SSP for the discovered tests.

Allen Li

574fe4d

2017-03-10 16:11:53 -0800

[diff] [blame]

505

@param test_args: A dict of args to be seeded in test control file under

506

the name |args_dict|.

Allen Li

2017-03-14 13:36:26 -0700

[diff] [blame]

507

"""

508

self._cf_getter = cf_getter

509

self._forgiving_parser = forgiving_parser

510

self._run_prod_code = run_prod_code

511

self._test_args = test_args

512

513

Xixuan Wu

b2cf7fc

2018-05-04 17:37:24 -0700

[diff] [blame]

514

def retrieve_for_test(self, test_name):

Allen Li

b47f59a

2017-03-10 17:50:45 -0800

[diff] [blame]

515

"""Retrieve a test's control data.

516

517

This ignores forgiving_parser because we cannot return a

518

forgiving value.

519

520

@param test_name: Name of test to retrieve.

521

522

@raises ControlVariableException: There is a syntax error in a

523

control file.

524

525

@returns a ControlData object

526

"""

Xixuan Wu

9af2265

2018-05-14 10:50:54 -0700

[diff] [blame]

527

return suite_common.retrieve_control_data_for_test(

528

self._cf_getter, test_name)

Allen Li

b47f59a

2017-03-10 17:50:45 -0800

[diff] [blame]

529

530

Allen Li

2017-03-14 13:36:26 -0700

[diff] [blame]

531

def retrieve_for_suite(self, suite_name=''):

532

"""Scan through all tests and find all tests.

533

534

@param suite_name: If specified, this method will attempt to restrain

535

the search space to just this suite's control files.

Allen Li

066f587

2017-02-28 13:30:44 -0800

[diff] [blame]

536

Allen Li

574fe4d

2017-03-10 16:11:53 -0800

[diff] [blame]

537

@raises ControlVariableException: If forgiving_parser is False and there

538

is a syntax error in a control file.

539

540

@returns a dictionary of ControlData objects that based on given

541

parameters.

542

"""

Xixuan Wu

2018-04-30 17:17:10 -0700

[diff] [blame]

543

tests = suite_common.retrieve_for_suite(

544

self._cf_getter, suite_name, self._forgiving_parser,

545

self._test_args)

Allen Li

f8b0b70

2017-03-10 17:42:11 -0800

[diff] [blame]

546

if self._run_prod_code:

Xixuan Wu

2018-04-30 17:17:10 -0700

[diff] [blame]

547

for test in tests.itervalues():

548

test.require_ssp = False

Allen Li

f8b0b70

2017-03-10 17:42:11 -0800

[diff] [blame]

549

Xixuan Wu

2018-04-30 17:17:10 -0700

[diff] [blame]

550

return tests

Allen Li

1abded5

2017-03-10 16:37:57 -0800

[diff] [blame]

551

552

Allen Li

122cd09

2017-03-09 15:56:46 -0800

[diff] [blame]

553

def list_all_suites(build, devserver, cf_getter=None):

554

"""

555

Parses all ControlData objects with a SUITE tag and extracts all

556

defined suite names.

557

558

@param build: the build on which we're running this suite.

559

@param devserver: the devserver which contains the build.

560

@param cf_getter: control_file_getter.ControlFileGetter. Defaults to

561

using DevServerGetter.

562

563

@return list of suites

564

"""

565

if cf_getter is None:

566

cf_getter = _create_ds_getter(build, devserver)

567

568

suites = set()

569

predicate = lambda t: True

Allen Li

f487fa1

2017-05-17 16:46:32 -0700

[diff] [blame]

570

for test in find_and_parse_tests(cf_getter, predicate):

Allen Li

122cd09

2017-03-09 15:56:46 -0800

[diff] [blame]

571

suites.update(test.suite_tag_parts)

return list(suites)

Allen Li

2017-03-09 15:52:33 -0800

[diff] [blame]

575

def test_file_similarity_predicate(test_file_pattern):

576

"""Returns predicate that gets the similarity based on a test's file

577

name pattern.

578

579

Builds a predicate that takes in a parsed control file (a ControlData)

580

and returns a tuple of (file path, ratio), where ratio is the

581

similarity between the test file name and the given test_file_pattern.

582

583

@param test_file_pattern: regular expression (string) to match against

584

control file names.

585

@return a callable that takes a ControlData and and returns a tuple of

586

(file path, ratio), where ratio is the similarity between the

587

test file name and the given test_file_pattern.

588

"""

589

return lambda t: ((None, 0) if not hasattr(t, 'path') else

590

(t.path, difflib.SequenceMatcher(a=t.path,

591

b=test_file_pattern).ratio()))

592

593

Allen Li

b5b4a7a

2017-03-09 15:50:09 -0800

[diff] [blame]

594

def test_name_similarity_predicate(test_name):

595

"""Returns predicate that matched based on a test's name.

596

597

Builds a predicate that takes in a parsed control file (a ControlData)

598

and returns a tuple of (test name, ratio), where ratio is the similarity

599

between the test name and the given test_name.

600

601

@param test_name: the test name to base the predicate on.

602

@return a callable that takes a ControlData and returns a tuple of

603

(test name, ratio), where ratio is the similarity between the

604

test name and the given test_name.

605

"""

606

return lambda t: ((None, 0) if not hasattr(t, 'name') else

607

(t.name,

608

difflib.SequenceMatcher(a=t.name, b=test_name).ratio()))

609

610

Allen Li

e37d6ba

2017-03-09 15:49:25 -0800

[diff] [blame]

611

def matches_attribute_expression_predicate(test_attr_boolstr):

612

"""Returns predicate that matches based on boolean expression of

613

attributes.

614

615

Builds a predicate that takes in a parsed control file (a ControlData)

616

ans returns True if the test attributes satisfy the given attribute

617

boolean expression.

618

619

@param test_attr_boolstr: boolean expression of the attributes to be

620

test, like 'system:all and interval:daily'.

621

622

@return a callable that takes a ControlData and returns True if the test

623

attributes satisfy the given boolean expression.

624

"""

625

return lambda t: boolparse_lib.BoolstrResult(

626

test_attr_boolstr, t.attributes)

627

628

Allen Li

f29b48a

2017-03-09 15:48:41 -0800

[diff] [blame]

629

def test_file_matches_pattern_predicate(test_file_pattern):

630

"""Returns predicate that matches based on a test's file name pattern.

631

632

Builds a predicate that takes in a parsed control file (a ControlData)

633

and returns True if the test's control file name matches the given

634

regular expression.

635

636

@param test_file_pattern: regular expression (string) to match against

637

control file names.

638

@return a callable that takes a ControlData and and returns

639

True if control file name matches the pattern.

640

"""

641

return lambda t: hasattr(t, 'path') and re.match(test_file_pattern,

t.path)

Allen Li

2017-03-09 15:47:25 -0800

[diff] [blame]

645

def test_name_matches_pattern_predicate(test_name_pattern):

646

"""Returns predicate that matches based on a test's name pattern.

647

648

Builds a predicate that takes in a parsed control file (a ControlData)

649

and returns True if the test name matches the given regular expression.

650

651

@param test_name_pattern: regular expression (string) to match against

652

test names.

653

@return a callable that takes a ControlData and returns

654

True if the name fields matches the pattern.

655

"""

656

return lambda t: hasattr(t, 'name') and re.match(test_name_pattern,

t.name)

Allen Li

2017-03-09 15:46:32 -0800

[diff] [blame]

660

def test_name_equals_predicate(test_name):

661

"""Returns predicate that matched based on a test's name.

662

663

Builds a predicate that takes in a parsed control file (a ControlData)

664

and returns True if the test name is equal to |test_name|.

665

666

@param test_name: the test name to base the predicate on.

667

@return a callable that takes a ControlData and looks for |test_name|

668

in that ControlData's name.

669

"""

670

return lambda t: hasattr(t, 'name') and test_name == t.name

671

672

Allen Li

6e2fa4f

2017-03-09 15:45:43 -0800

[diff] [blame]

673

def name_in_tag_similarity_predicate(name):

674

"""Returns predicate that takes a control file and gets the similarity

675

of the suites in the control file and the given name.

676

677

Builds a predicate that takes in a parsed control file (a ControlData)

678

and returns a list of tuples of (suite name, ratio), where suite name

679

is each suite listed in the control file, and ratio is the similarity

680

between each suite and the given name.

681

682

@param name: the suite name to base the predicate on.

683

@return a callable that takes a ControlData and returns a list of tuples

684

of (suite name, ratio), where suite name is each suite listed in

685

the control file, and ratio is the similarity between each suite

686

and the given name.

687

"""

688

return lambda t: [(suite,

689

difflib.SequenceMatcher(a=suite, b=name).ratio())

690

for suite in t.suite_tag_parts] or [(None, 0)]

691

692

Allen Li

398ddbd

2017-03-09 15:44:25 -0800

[diff] [blame]

693

def name_in_tag_predicate(name):

694

"""Returns predicate that takes a control file and looks for |name|.

695

696

Builds a predicate that takes in a parsed control file (a ControlData)

697

and returns True if the SUITE tag is present and contains |name|.

698

699

@param name: the suite name to base the predicate on.

700

@return a callable that takes a ControlData and looks for |name| in that

701

ControlData object's suite member.

702

"""

Xixuan Wu

81b71cb

2019-01-10 16:00:30 -0800

[diff] [blame^]

703

return suite_common.name_in_tag_predicate(name)

Allen Li

398ddbd

2017-03-09 15:44:25 -0800

[diff] [blame]

704

705

Allen Li

a640d6d

2017-03-09 15:41:35 -0800

[diff] [blame]

706

def create_fs_getter(autotest_dir):

707

"""

708

@param autotest_dir: the place to find autotests.

709

@return a FileSystemGetter instance that looks under |autotest_dir|.

710

"""

711

# currently hard-coded places to look for tests.

712

subpaths = ['server/site_tests', 'client/site_tests',

713

'server/tests', 'client/tests']

714

directories = [os.path.join(autotest_dir, p) for p in subpaths]

715

return control_file_getter.FileSystemGetter(directories)

716

717

Allen Li

0f91587

2017-02-28 18:51:04 -0800

[diff] [blame]

718

def _create_ds_getter(build, devserver):

719

"""

720

@param build: the build on which we're running this suite.

721

@param devserver: the devserver which contains the build.

722

@return a FileSystemGetter instance that looks under |autotest_dir|.

723

"""

724

return control_file_getter.DevServerGetter(build, devserver)

725

726

Allen Li

3adae95

2017-03-10 17:18:12 -0800

[diff] [blame]

727

def _non_experimental_tests_predicate(test_data):

728

"""Test predicate for non-experimental tests."""

729

return not test_data.experimental

730

731

Allen Li

0b1fa38

2017-02-28 18:47:16 -0800

[diff] [blame]

732

def find_and_parse_tests(cf_getter, predicate, suite_name='',

733

add_experimental=False, forgiving_parser=True,

734

run_prod_code=False, test_args=None):

735

"""

736

Function to scan through all tests and find eligible tests.

737

738

Search through all tests based on given cf_getter, suite_name,

739

add_experimental and forgiving_parser, return the tests that match

740

given predicate.

741

742

@param cf_getter: a control_file_getter.ControlFileGetter used to list

743

and fetch the content of control files

744

@param predicate: a function that should return True when run over a

745

ControlData representation of a control file that should be in

746

this Suite.

747

@param suite_name: If specified, this method will attempt to restrain

748

the search space to just this suite's control files.

749

@param add_experimental: add tests with experimental attribute set.

750

@param forgiving_parser: If False, will raise ControlVariableExceptions

751

if any are encountered when parsing control

752

files. Note that this can raise an exception

753

for syntax errors in unrelated files, because

754

we parse them before applying the predicate.

755

@param run_prod_code: If true, the suite will run the test code that

756

lives in prod aka the test code currently on the

757

lab servers by disabling SSP for the discovered

758

tests.

759

@param test_args: A dict of args to be seeded in test control file.

760

761

@raises ControlVariableException: If forgiving_parser is False and there

762

is a syntax error in a control file.

763

764

@return list of ControlData objects that should be run, with control

765

file text added in |text| attribute. Results are sorted based

766

on the TIME setting in control file, slowest test comes first.

767

"""

Allen Li

bb60f44

2017-03-14 12:18:57 -0700

[diff] [blame]

768

logging.debug('Getting control file list for suite: %s', suite_name)

Xixuan Wu

2018-04-30 17:17:10 -0700

[diff] [blame]

769

retriever = _ControlFileRetriever(cf_getter,

770

forgiving_parser=forgiving_parser,

771

run_prod_code=run_prod_code,

772

test_args=test_args)

Allen Li

2017-03-14 13:36:26 -0700

[diff] [blame]

773

tests = retriever.retrieve_for_suite(suite_name)

Allen Li

3adae95

2017-03-10 17:18:12 -0800

[diff] [blame]

774

if not add_experimental:

775

predicate = _ComposedPredicate([predicate,

776

_non_experimental_tests_predicate])

Xixuan Wu

d964853

2018-05-04 18:06:53 -0700

[diff] [blame]

777

return suite_common.filter_tests(tests, predicate)

Allen Li

0b1fa38

2017-02-28 18:47:16 -0800

[diff] [blame]

778

779

Allen Li

da01219

2017-02-28 18:37:52 -0800

[diff] [blame]

780

def find_possible_tests(cf_getter, predicate, suite_name='', count=10):

781

"""

782

Function to scan through all tests and find possible tests.

783

784

Search through all tests based on given cf_getter, suite_name,

785

add_experimental and forgiving_parser. Use the given predicate to

786

calculate the similarity and return the top 10 matches.

787

788

@param cf_getter: a control_file_getter.ControlFileGetter used to list

789

and fetch the content of control files

790

@param predicate: a function that should return a tuple of (name, ratio)

791

when run over a ControlData representation of a control file that

792

should be in this Suite. `name` is the key to be compared, e.g.,

793

a suite name or test name. `ratio` is a value between [0,1]

794

indicating the similarity of `name` and the value to be compared.

795

@param suite_name: If specified, this method will attempt to restrain

796

the search space to just this suite's control files.

797

@param count: Number of suggestions to return, default to 10.

798

799

@return list of top names that similar to the given test, sorted by

800

match ratio.

801

"""

Allen Li

bb60f44

2017-03-14 12:18:57 -0700

[diff] [blame]

802

logging.debug('Getting control file list for suite: %s', suite_name)

Xixuan Wu

2018-04-30 17:17:10 -0700

[diff] [blame]

803

tests = _ControlFileRetriever(cf_getter).retrieve_for_suite(suite_name)

Allen Li

da01219

2017-02-28 18:37:52 -0800

[diff] [blame]

804

logging.debug('Parsed %s control files.', len(tests))

805

similarities = {}

806

for test in tests.itervalues():

807

ratios = predicate(test)

808

# Some predicates may return a list of tuples, e.g.,

809

# name_in_tag_similarity_predicate. Convert all returns to a list.

810

if not isinstance(ratios, list):

811

ratios = [ratios]

812

for name, ratio in ratios:

813

similarities[name] = ratio

814

return [s[0] for s in

815

sorted(similarities.items(), key=operator.itemgetter(1),

816

reverse=True)][:count]

817

818

Allen Li

2017-02-28 18:43:24 -0800

[diff] [blame]

819

def _deprecated_suite_method(func):

820

"""Decorator for deprecated Suite static methods.

821

822

TODO(ayatane): This is used to decorate functions that are called as

823

static methods on Suite.

824

"""

825

@functools.wraps(func)

826

def wrapper(*args, **kwargs):

Shuhei Takahashi

2017-06-14 20:02:26 +0900

[diff] [blame]

827

"""Wraps |func| for warning."""

Brian Norris

32add5d

2017-06-19 11:09:54 -0700

[diff] [blame]

828

warnings.warn('Calling method "%s" from Suite is deprecated' %

829

func.__name__)

Allen Li

2017-02-28 18:43:24 -0800

[diff] [blame]

830

return func(*args, **kwargs)

831

return staticmethod(wrapper)

832

833

Allen Li

2017-03-09 16:01:35 -0800

[diff] [blame]

834

class _BaseSuite(object):

Chris Masone

2012-08-15 14:25:53 -0700

[diff] [blame]

835

"""

836

A suite of tests, defined by some predicate over control file variables.

837

838

Given a place to search for control files a predicate to match the desired

839

tests, can gather tests and fire off jobs to run them, and then wait for

840

results.

841

842

@var _predicate: a function that should return True when run over a

843

ControlData representation of a control file that should be in

844

this Suite.

845

@var _tag: a string with which to tag jobs run in this suite.

Dan Shi

2014-10-10 13:38:51 -0700

[diff] [blame]

846

@var _builds: the builds on which we're running this suite.

Chris Masone

2012-08-15 14:25:53 -0700

[diff] [blame]

847

@var _afe: an instance of AFE as defined in server/frontend.py.

848

@var _tko: an instance of TKO as defined in server/frontend.py.

849

@var _jobs: currently scheduled jobs, if any.

Fang Deng

2014-03-17 15:19:46 -0700

[diff] [blame]

850

@var _jobs_to_tests: a dictionary that maps job ids to tests represented

851

ControlData objects.

Fang Deng

2014-03-17 15:19:46 -0700

[diff] [blame]

852

@var _retry: a bool value indicating whether jobs should be retried on

853

failure.

854

@var _retry_handler: a RetryHandler object.

855

Chris Masone

2012-08-15 14:25:53 -0700

[diff] [blame]

856

"""

857

Dan Shi

2014-10-10 13:38:51 -0700

[diff] [blame]

858

Allen Li

2016-12-09 18:04:26 -0800

[diff] [blame]

859

def __init__(

860

self,

Allen Li

2017-03-09 16:44:30 -0800

[diff] [blame]

861

tests,

Allen Li

2016-12-09 18:04:26 -0800

[diff] [blame]

862

tag,

863

builds,

864

board,

Allen Li

2016-12-09 18:04:26 -0800

[diff] [blame]

afe=None,

tko=None,

pool=None,

results_dir=None,

max_runtime_mins=24*60,

870

timeout_mins=24*60,

871

file_bugs=False,

Allen Li

2016-12-09 18:04:26 -0800

[diff] [blame]

872

suite_job_id=None,

873

ignore_deps=False,

Allen Li

493eefa

2016-12-09 18:05:35 -0800

[diff] [blame]

874

extra_deps=None,

Allen Li

2016-12-09 18:04:26 -0800

[diff] [blame]

875

priority=priorities.Priority.DEFAULT,

Allen Li

2016-12-09 18:04:26 -0800

[diff] [blame]

876

wait_for_results=True,

877

job_retry=False,

878

max_retries=sys.maxint,

879

offload_failures_only=False,

Shuqian Zhao

2017-02-13 16:22:58 -0800

[diff] [blame]

880

test_source_build=None,

Prathmesh Prabhu

2017-09-07 17:54:23 +0000

[diff] [blame]

881

job_keyvals=None,

Aviv Keshet

2017-10-30 12:53:01 -0700

[diff] [blame]

882

child_dependencies=(),

Prathmesh Prabhu

2017-09-07 17:54:23 +0000

[diff] [blame]

883

result_reporter=None,

Allen Li

2016-12-09 18:04:26 -0800

[diff] [blame]

884

):

Allen Li

7f43ef9

2017-03-09 16:29:48 -0800

[diff] [blame]

885

"""Initialize instance.

Chris Masone

2012-08-15 14:25:53 -0700

[diff] [blame]

886

Allen Li

2017-03-09 16:44:30 -0800

[diff] [blame]

887

@param tests: Iterable of tests to run.

Chris Masone

2012-08-15 14:25:53 -0700

[diff] [blame]

888

@param tag: a string with which to tag jobs run in this suite.

Dan Shi

2014-10-10 13:38:51 -0700

[diff] [blame]

889

@param builds: the builds on which we're running this suite.

Alex Miller

a091307

2013-06-12 10:01:51 -0700

[diff] [blame]

890

@param board: the board on which we're running this suite.

Chris Masone

2012-08-15 14:25:53 -0700

[diff] [blame]

891

@param afe: an instance of AFE as defined in server/frontend.py.

892

@param tko: an instance of TKO as defined in server/frontend.py.

893

@param pool: Specify the pool of machines to use for scheduling

894

purposes.

895

@param results_dir: The directory where the job can write results to.

896

This must be set if you want job_id of sub-jobs

897

list in the job keyvals.

Aviv Keshet

1830892

2013-02-19 17:49:49 -0800

[diff] [blame]

898

@param max_runtime_mins: Maximum suite runtime, in minutes.

Alex Miller

028b031

2013-09-07 15:25:45 -0700

[diff] [blame]

899

@param timeout: Maximum job lifetime, in hours.

Aviv Keshet

1830892

2013-02-19 17:49:49 -0800

[diff] [blame]

900

@param suite_job_id: Job id that will act as parent id to all sub jobs.

901

Default: None

Aviv Keshet

d7959f3

2013-05-17 15:58:43 -0700

[diff] [blame]

902

@param ignore_deps: True if jobs should ignore the DEPENDENCIES

903

attribute and skip applying of dependency labels.

904

(Default:False)

Alex Miller

47a0367

2013-08-27 09:09:53 -0700

[diff] [blame]

905

@param extra_deps: A list of strings which are the extra DEPENDENCIES

906

to add to each test being scheduled.

Alex Miller

7d658cf

2013-09-04 16:00:35 -0700

[diff] [blame]

907

@param priority: Integer priority level. Higher is more important.

Dan Shi

9512241

2013-11-12 16:20:33 -0800

[diff] [blame]

908

@param wait_for_results: Set to False to run the suite job without

909

waiting for test jobs to finish. Default is

910

True.

Jacob Kopczynski

2018-01-10 17:25:38 -0800

[diff] [blame]

911

@param job_retry: A bool value indicating whether jobs should be retried

Fang Deng

2014-03-17 15:19:46 -0700

[diff] [blame]

912

on failure. If True, the field 'JOB_RETRIES' in

913

control files will be respected. If False, do not

914

retry.

Fang Deng

2015-01-02 14:51:49 -0800

[diff] [blame]

915

@param max_retries: Maximum retry limit at suite level.

916

Regardless how many times each individual test

917

has been retried, the total number of retries

918

happening in the suite can't exceed _max_retries.

919

Default to sys.maxint.

Simran Basi

1e10e92

2015-04-16 15:09:56 -0700

[diff] [blame]

920

@param offload_failures_only: Only enable gs_offloading for failed

921

jobs.

Dan Shi

2014-10-10 13:38:51 -0700

[diff] [blame]

922

@param test_source_build: Build that contains the server-side test code.

Shuqian Zhao

2017-02-13 16:22:58 -0800

[diff] [blame]

923

@param job_keyvals: General job keyvals to be inserted into keyval file,

924

which will be used by tko/parse later.

Aviv Keshet

2017-10-30 12:53:01 -0700

[diff] [blame]

925

@param child_dependencies: (optional) list of dependency strings

926

to be added as dependencies to child jobs.

Prathmesh Prabhu

2017-09-07 17:54:23 +0000

[diff] [blame]

927

@param result_reporter: A _ResultReporter instance to report results. If

928

None, an _EmailReporter will be created.

Chris Masone

2012-08-15 14:25:53 -0700

[diff] [blame]

929

"""

Allen Li

493eefa

2016-12-09 18:05:35 -0800

[diff] [blame]

930

Allen Li

2017-03-09 16:44:30 -0800

[diff] [blame]

931

self.tests = list(tests)

Chris Masone

2012-08-15 14:25:53 -0700

[diff] [blame]

932

self._tag = tag

Dan Shi

2014-10-10 13:38:51 -0700

[diff] [blame]

933

self._builds = builds

Chris Masone

2012-08-15 14:25:53 -0700

[diff] [blame]

934

self._results_dir = results_dir

935

self._afe = afe or frontend_wrappers.RetryingAFE(timeout_min=30,

936

delay_sec=10,

937

debug=False)

938

self._tko = tko or frontend_wrappers.RetryingTKO(timeout_min=30,

939

delay_sec=10,

940

debug=False)

Chris Masone

2012-08-15 14:25:53 -0700

[diff] [blame]

941

self._jobs = []

Fang Deng

2014-03-17 15:19:46 -0700

[diff] [blame]

942

self._jobs_to_tests = {}

beeps

89f1e06

2013-09-18 12:00:17 -0700

[diff] [blame]

943

Alex Miller

a3a4fe7

2013-01-22 09:57:47 -0800

[diff] [blame]

944

self._file_bugs = file_bugs

Aviv Keshet

1830892

2013-02-19 17:49:49 -0800

[diff] [blame]

945

self._suite_job_id = suite_job_id

Fang Deng

2014-03-17 15:19:46 -0700

[diff] [blame]

946

self._job_retry=job_retry

Fang Deng

2015-01-02 14:51:49 -0800

[diff] [blame]

947

self._max_retries = max_retries

Fang Deng

2014-03-17 15:19:46 -0700

[diff] [blame]

948

# RetryHandler to be initialized in schedule()

949

self._retry_handler = None

Dan Shi

9512241

2013-11-12 16:20:33 -0800

[diff] [blame]

950

self.wait_for_results = wait_for_results

Shuqian Zhao

2017-02-13 16:22:58 -0800

[diff] [blame]

951

self._job_keyvals = job_keyvals

Prathmesh Prabhu

2017-09-07 17:54:23 +0000

[diff] [blame]

952

if result_reporter is None:

953

self._result_reporter = _EmailReporter(self)

954

else:

955

self._result_reporter = result_reporter

Alex Miller

a3a4fe7

2013-01-22 09:57:47 -0800

[diff] [blame]

956

Allen Li

80dc02c

2017-02-28 18:22:16 -0800

[diff] [blame]

957

if extra_deps is None:

958

extra_deps = []

Allen Li

3a83fe6

2017-02-28 18:27:09 -0800

[diff] [blame]

959

extra_deps.append(board)

Allen Li

cceb183

2017-02-28 18:25:06 -0800

[diff] [blame]

960

if pool:

961

extra_deps.append(pool)

Aviv Keshet

2017-10-30 12:53:01 -0700

[diff] [blame]

962

extra_deps.extend(child_dependencies)

Allen Li

2017-11-06 17:48:09 -0800

[diff] [blame]

963

self._dependencies = tuple(extra_deps)

Aviv Keshet

2017-10-30 12:53:01 -0700

[diff] [blame]

964

Allen Li

2017-03-29 17:31:35 -0700

[diff] [blame]

965

self._job_creator = _SuiteChildJobCreator(

Allen Li

2017-03-29 17:48:46 -0700

[diff] [blame]

966

tag=tag,

Allen Li

2017-03-29 17:37:43 -0700

[diff] [blame]

967

builds=builds,

Allen Li

2017-03-29 17:31:35 -0700

[diff] [blame]

968

board=board,

Allen Li

2017-03-29 17:58:23 -0700

[diff] [blame]

969

afe=afe,

Allen Li

2017-03-29 17:58:23 -0700

[diff] [blame]

970

max_runtime_mins=max_runtime_mins,

971

timeout_mins=timeout_mins,

Allen Li

2017-03-29 17:48:46 -0700

[diff] [blame]

972

suite_job_id=suite_job_id,

Allen Li

2017-03-29 17:31:35 -0700

[diff] [blame]

973

ignore_deps=ignore_deps,

974

extra_deps=extra_deps,

Allen Li

2017-03-29 17:58:23 -0700

[diff] [blame]

975

priority=priority,

Allen Li

2017-03-29 17:48:46 -0700

[diff] [blame]

976

offload_failures_only=offload_failures_only,

977

test_source_build=test_source_build,

Shuhei Takahashi

2017-06-14 20:02:26 +0900

[diff] [blame]

978

job_keyvals=job_keyvals,

Allen Li

2017-03-29 17:31:35 -0700

[diff] [blame]

979

)

Allen Li

da198fd

2017-03-29 17:22:13 -0700

[diff] [blame]

980

Chris Masone

2012-08-15 14:25:53 -0700

[diff] [blame]

981

Allen Li

2017-07-07 16:50:38 -0700

[diff] [blame]

982

def _schedule_test(self, record, test, retry_for=None):

Fang Deng

2014-03-17 15:19:46 -0700

[diff] [blame]

983

"""Schedule a single test and return the job.

984

Allen Li

e79b3cb

2016-12-12 18:24:17 -0800

[diff] [blame]

985

Schedule a single test by creating a job, and then update relevant

986

data structures that are used to keep track of all running jobs.

Fang Deng

2014-03-17 15:19:46 -0700

[diff] [blame]

987

Allen Li

e79b3cb

2016-12-12 18:24:17 -0800

[diff] [blame]

988

Emits a TEST_NA status log entry if it failed to schedule the test due

989

to NoEligibleHostException or a non-existent board label.

990

991

Returns a frontend.Job object if the test is successfully scheduled.

992

If scheduling failed due to NoEligibleHostException or a non-existent

Allen Li

2017-07-07 16:50:38 -0700

[diff] [blame]

993

board label, returns None.

Fang Deng

2014-03-17 15:19:46 -0700

[diff] [blame]

994

995

@param record: A callable to use for logging.

996

prototype: record(base_job.status_log_entry)

997

@param test: ControlData for a test to run.

998

@param retry_for: If we are scheduling a test to retry an

999

old job, the afe_job_id of the old job

1000

will be passed in as |retry_for|.

Fang Deng

2014-03-17 15:19:46 -0700

[diff] [blame]

1001

Allen Li

e79b3cb

2016-12-12 18:24:17 -0800

[diff] [blame]

1002

@returns: A frontend.Job object or None

Fang Deng

2014-03-17 15:19:46 -0700

[diff] [blame]

1003

"""

1004

msg = 'Scheduling %s' % test.name

1005

if retry_for:

1006

msg = msg + ', to retry afe job %d' % retry_for

1007

logging.debug(msg)

Dan Shi

dfea368

2014-08-10 23:38:40 -0700

[diff] [blame]

1008

begin_time_str = datetime.datetime.now().strftime(time_utils.TIME_FMT)

Fang Deng

2014-03-17 15:19:46 -0700

[diff] [blame]

1009

try:

Allen Li

2017-03-29 17:58:23 -0700

[diff] [blame]

1010

job = self._job_creator.create_job(test, retry_for=retry_for)

Allen Li

6fd440f

2016-12-12 18:40:05 -0800

[diff] [blame]

1011

except (error.NoEligibleHostException, proxy.ValidationError) as e:

1012

if (isinstance(e, error.NoEligibleHostException)

1013

or (isinstance(e, proxy.ValidationError)

1014

and _is_nonexistent_board_error(e))):

1015

# Treat a dependency on a non-existent board label the same as

1016

# a dependency on a board that exists, but for which there's no

1017

# hardware.

1018

logging.debug('%s not applicable for this board/pool. '

1019

'Emitting TEST_NA.', test.name)

1020

Status('TEST_NA', test.name,

1021

'Skipping: test not supported on this board/pool.',

Allen Li

9fcd4b4

2016-12-12 16:15:14 -0800

[diff] [blame]

1022

begin_time_str=begin_time_str).record_all(record)

1023

return None

1024

else:

Fang Deng

2014-03-17 15:19:46 -0700

[diff] [blame]

1025

raise e

Allen Li

2017-07-07 16:50:38 -0700

[diff] [blame]

1026

except (error.RPCException, proxy.JSONRPCException):

Fang Deng

2014-03-17 15:19:46 -0700

[diff] [blame]

1027

if retry_for:

1028

# Mark that we've attempted to retry the old job.

Jacob Kopczynski

2018-01-10 17:25:38 -0800

[diff] [blame]

1029

logging.debug("RPC exception occurred")

Fang Deng

2014-03-17 15:19:46 -0700

[diff] [blame]

1030

self._retry_handler.set_attempted(job_id=retry_for)

Allen Li

2017-07-07 16:50:38 -0700

[diff] [blame]

1031

raise

Fang Deng

2014-03-17 15:19:46 -0700

[diff] [blame]

1032

else:

1033

self._jobs.append(job)

1034

self._jobs_to_tests[job.id] = test

1035

if retry_for:

1036

# A retry job was just created, record it.

1037

self._retry_handler.add_retry(

1038

old_job_id=retry_for, new_job_id=job.id)

1039

retry_count = (test.job_retries -

1040

self._retry_handler.get_retry_max(job.id))

1041

logging.debug('Job %d created to retry job %d. '

1042

'Have retried for %d time(s)',

1043

job.id, retry_for, retry_count)

Allen Li

4df053e

2016-12-29 16:05:41 -0800

[diff] [blame]

1044

self._remember_job_keyval(job)

Fang Deng

2014-03-17 15:19:46 -0700

[diff] [blame]

1045

return job

Fang Deng

2014-03-17 15:19:46 -0700

[diff] [blame]

1046

Allen Li

5b5642f

2017-05-17 17:02:56 -0700

[diff] [blame]

1047

def schedule(self, record):

Chris Masone

2012-08-15 14:25:53 -0700

[diff] [blame]

1048

"""

1049

Schedule jobs using |self._afe|.

1050

1051

frontend.Job objects representing each scheduled job will be put in

1052

|self._jobs|.

1053

Fang Deng

2014-03-17 15:19:46 -0700

[diff] [blame]

1054

@param record: A callable to use for logging.

1055

prototype: record(base_job.status_log_entry)

Aviv Keshet

e9170d9

2013-07-19 11:20:45 -0700

[diff] [blame]

1056

@returns: The number of tests that were scheduled.

Chris Masone

2012-08-15 14:25:53 -0700

[diff] [blame]

1057

"""

Allen Li

f4cb5ec

2017-01-03 16:58:12 -0800

[diff] [blame]

1058

scheduled_test_names = []

Allen Li

5b5642f

2017-05-17 17:02:56 -0700

[diff] [blame]

1059

logging.debug('Discovered %d tests.', len(self.tests))

Chris Masone

2012-08-15 14:25:53 -0700

[diff] [blame]

1060

Alex Miller

2012-12-19 13:38:31 -0800

[diff] [blame]

1061

Status('INFO', 'Start %s' % self._tag).record_result(record)

1062

try:

Shuqian Zhao

2017-02-13 16:22:58 -0800

[diff] [blame]

1063

# Write job_keyvals into keyval file.

1064

if self._job_keyvals:

1065

utils.write_keyval(self._results_dir, self._job_keyvals)

1066

Prathmesh Prabhu

2017-06-08 10:44:52 -0700

[diff] [blame]

1067

# TODO(crbug.com/730885): This is a hack to protect tests that are

1068

# not usually retried from getting hit by a provision error when run

1069

# as part of a suite. Remove this hack once provision is separated

1070

# out in its own suite.

Allen Li

5b5642f

2017-05-17 17:02:56 -0700

[diff] [blame]

1071

self._bump_up_test_retries(self.tests)

1072

for test in self.tests:

Allen Li

da90573

2016-12-12 15:49:16 -0800

[diff] [blame]

1073

scheduled_job = self._schedule_test(record, test)

1074

if scheduled_job is not None:

Shuqian Zhao

cd866f3

2016-11-29 20:14:34 -0800

[diff] [blame]

1075

scheduled_test_names.append(test.name)

1076

1077

# Write the num of scheduled tests and name of them to keyval file.

Shuqian Zhao

cd866f3

2016-11-29 20:14:34 -0800

[diff] [blame]

1078

logging.debug('Scheduled %d tests, writing the total to keyval.',

Allen Li

a4d3502

2016-12-12 15:42:10 -0800

[diff] [blame]

1079

len(scheduled_test_names))

Allen Li

d4d5dda

2016-12-12 15:39:11 -0800

[diff] [blame]

1080

utils.write_keyval(

1081

self._results_dir,

Allen Li

dda59b8

2016-12-12 18:20:04 -0800

[diff] [blame]

1082

self._make_scheduled_tests_keyvals(scheduled_test_names))

Aviv Keshet

ff7bd29

2017-07-27 11:14:41 -0700

[diff] [blame]

1083

except Exception:

Allen Li

b892d9f

2016-12-29 15:50:11 -0800

[diff] [blame]

1084

logging.exception('Exception while scheduling suite')

Alex Miller

2012-12-19 13:38:31 -0800

[diff] [blame]

1085

Status('FAIL', self._tag,

1086

'Exception while scheduling suite').record_result(record)

1087

Fang Deng

7e655a9

2014-05-23 13:48:11 -0700

[diff] [blame]

1088

if self._job_retry:

Jacob Kopczynski

2018-01-10 17:25:38 -0800

[diff] [blame]

1089

logging.debug("Initializing RetryHandler for suite %s.", self._tag)

Fang Deng

7e655a9

2014-05-23 13:48:11 -0700

[diff] [blame]

1090

self._retry_handler = RetryHandler(

Fang Deng

2015-01-02 14:51:49 -0800

[diff] [blame]

1091

initial_jobs_to_tests=self._jobs_to_tests,

1092

max_retries=self._max_retries)

Jacob Kopczynski

2018-01-10 17:25:38 -0800

[diff] [blame]

1093

logging.debug("retry map created: %s ",

1094

self._retry_handler._retry_map)

1095

else:

Jacob Kopczynski

61a2d37

2018-06-13 11:51:46 -0700

[diff] [blame]

1096

logging.info("Will not retry jobs from suite %s.", self._tag)

Allen Li

a4d3502

2016-12-12 15:42:10 -0800

[diff] [blame]

1097

return len(scheduled_test_names)

Aviv Keshet

e9170d9

2013-07-19 11:20:45 -0700

[diff] [blame]

1098

Alex Miller

2012-12-19 13:38:31 -0800

[diff] [blame]

1099

Prathmesh Prabhu

2017-06-08 10:44:52 -0700

[diff] [blame]

1100

def _bump_up_test_retries(self, tests):

1101

"""Bump up individual test retries to match suite retry options."""

1102

if not self._job_retry:

1103

return

1104

1105

for test in tests:

Ilja H. Friedel

dd98c2b

2017-08-31 23:55:40 -0700

[diff] [blame]

1106

# We do honor if a test insists on JOB_RETRIES = 0.

1107

if test.job_retries is None:

Prathmesh Prabhu

2017-06-08 10:44:52 -0700

[diff] [blame]

1108

logging.debug(

Ilja H. Friedel

dd98c2b

2017-08-31 23:55:40 -0700

[diff] [blame]

1109

'Test %s did not request retries, but suite requires '

Prathmesh Prabhu

2017-06-08 10:44:52 -0700

[diff] [blame]

1110

'retries. Bumping retries up to 1. '

1111

'(See crbug.com/730885)',

test.name)

test.job_retries = 1

Allen Li

2016-12-12 18:20:04 -0800

[diff] [blame]

1116

def _make_scheduled_tests_keyvals(self, scheduled_test_names):

1117

"""Make a keyvals dict to write for scheduled test names.

1118

1119

@param scheduled_test_names: A list of scheduled test name strings.

1120

1121

@returns: A keyvals dict.

1122

"""

1123

return {

1124

constants.SCHEDULED_TEST_COUNT_KEY: len(scheduled_test_names),

1125

constants.SCHEDULED_TEST_NAMES_KEY: repr(scheduled_test_names),

}

Allen Li

2016-12-29 15:12:39 -0800

[diff] [blame]

1129

def _should_report(self, result):

beeps

2013-05-30 11:34:14 -0700

[diff] [blame]

1130

"""

Shuqian Zhao

e33ba4a

2015-09-11 18:51:43 -0700

[diff] [blame]

1131

Returns True if this failure requires to be reported.

beeps

2013-05-30 11:34:14 -0700

[diff] [blame]

1132

1133

@param result: A result, encapsulating the status of the failed job.

Shuqian Zhao

e33ba4a

2015-09-11 18:51:43 -0700

[diff] [blame]

1134

@return: True if we should report this failure.

beeps

2013-05-30 11:34:14 -0700

[diff] [blame]

1135

"""

Alex Miller

fcc119b

2014-01-15 13:54:58 -0800

[diff] [blame]

1136

return (self._file_bugs and result.test_executed and

Fang Deng

d82c1c7

2014-07-29 10:43:01 -0700

[diff] [blame]

1137

not result.is_testna() and

beeps

32fa677

2014-01-28 13:19:53 -0800

[diff] [blame]

1138

result.is_worse_than(job_status.Status('GOOD', '', 'reason')))

beeps

2013-05-30 11:34:14 -0700

[diff] [blame]

1139

1140

Allen Li

cc75229

2017-01-03 12:44:39 -0800

[diff] [blame]

1141

def _has_retry(self, result):

1142

"""

1143

Return True if this result gets to retry.

1144

1145

@param result: A result, encapsulating the status of the failed job.

1146

@return: bool

1147

"""

1148

return (self._job_retry

1149

and self._retry_handler.has_following_retry(result))

1150

1151

Aviv Keshet

3e5ff4a

2017-08-04 14:11:37 -0700

[diff] [blame]

1152

def wait(self, record):

Alex Miller

2012-12-19 13:38:31 -0800

[diff] [blame]

1153

"""

1154

Polls for the job statuses, using |record| to print status when each

1155

completes.

1156

1157

@param record: callable that records job status.

1158

prototype:

1159

record(base_job.status_log_entry)

1160

"""

Allen Li

2017-07-10 11:44:54 -0700

[diff] [blame]

1161

waiter = job_status.JobResultWaiter(self._afe, self._tko)

Alex Miller

2012-12-19 13:38:31 -0800

[diff] [blame]

1162

try:

Aviv Keshet

133beb1

2013-08-20 14:37:13 -0700

[diff] [blame]

1163

if self._suite_job_id:

Allen Li

2017-07-10 11:44:54 -0700

[diff] [blame]

1164

jobs = self._afe.get_jobs(parent_job_id=self._suite_job_id)

Aviv Keshet

133beb1

2013-08-20 14:37:13 -0700

[diff] [blame]

1165

else:

Ilja H. Friedel

04be2bd

2014-05-07 21:29:59 -0700

[diff] [blame]

1166

logging.warning('Unknown suite_job_id, falling back to less '

Dan Shi

08ff128

2016-02-18 19:51:16 -0800

[diff] [blame]

1167

'efficient results_generator.')

Allen Li

2017-07-10 11:44:54 -0700

[diff] [blame]

1168

jobs = self._jobs

1169

waiter.add_jobs(jobs)

1170

for result in waiter.wait_for_results():

Prathmesh Prabhu

2017-09-07 17:54:23 +0000

[diff] [blame]

1171

self._handle_result(result=result, record=record, waiter=waiter)

Allen Li

2017-07-10 13:00:31 -0700

[diff] [blame]

1172

if self._finished_waiting():

1173

break

1174

except Exception: # pylint: disable=W0703

Allen Li

b892d9f

2016-12-29 15:50:11 -0800

[diff] [blame]

1175

logging.exception('Exception waiting for results')

Alex Miller

2012-12-19 13:38:31 -0800

[diff] [blame]

1176

Status('FAIL', self._tag,

1177

'Exception waiting for results').record_result(record)

1178

1179

Allen Li

2017-07-10 13:00:31 -0700

[diff] [blame]

1180

def _finished_waiting(self):

1181

"""Return whether the suite is finished waiting for child jobs."""

return False

Prathmesh Prabhu

2017-09-07 17:54:23 +0000

[diff] [blame]

1185

def _handle_result(self, result, record, waiter):

Allen Li

2016-12-29 15:23:01 -0800

[diff] [blame]

1186

"""

Allen Li

2017-07-10 12:53:06 -0700

[diff] [blame]

1187

Handle a test job result.

Allen Li

2016-12-29 15:23:01 -0800

[diff] [blame]

1188

1189

@param result: Status instance for job.

1190

@param record: callable that records job status.

1191

prototype:

1192

record(base_job.status_log_entry)

Allen Li

2017-07-10 11:44:54 -0700

[diff] [blame]

1193

@param waiter: JobResultsWaiter instance.

Jacob Kopczynski

2018-01-10 17:25:38 -0800

[diff] [blame]

1194

1195

@instance_param _result_reporter: _ResultReporter instance.

Allen Li

2016-12-29 15:23:01 -0800

[diff] [blame]

1196

"""

Allen Li

2017-07-10 12:53:06 -0700

[diff] [blame]

1197

self._record_result(result, record)

Prathmesh Prabhu

2017-09-07 12:30:03 -0700

[diff] [blame]

1198

rescheduled = False

xixuan

bf854f8

2017-04-20 10:40:15 -0700

[diff] [blame]

1199

if self._job_retry and self._retry_handler._should_retry(result):

Prathmesh Prabhu

2017-09-07 12:30:03 -0700

[diff] [blame]

1200

rescheduled = self._retry_result(result, record, waiter)

1201

# TODO (crbug.com/751428): If the suite times out before a retry could

1202

# finish, we would lose the chance to report errors from the original

1203

# job.

1204

if self._has_retry(result) and rescheduled:

1205

return

1206

Prathmesh Prabhu

2017-09-07 17:54:23 +0000

[diff] [blame]

1207

if self._should_report(result):

1208

self._result_reporter.report(result)

Allen Li

2016-12-29 15:23:01 -0800

[diff] [blame]

1209

Allen Li

2017-07-10 12:53:06 -0700

[diff] [blame]

1210

def _record_result(self, result, record):

1211

"""

1212

Record a test job result.

1213

1214

@param result: Status instance for job.

1215

@param record: callable that records job status.

1216

prototype:

1217

record(base_job.status_log_entry)

1218

"""

1219

result.record_all(record)

1220

self._remember_job_keyval(result)

1221

1222

1223

def _retry_result(self, result, record, waiter):

1224

"""

1225

Retry a test job result.

1226

1227

@param result: Status instance for job.

1228

@param record: callable that records job status.

1229

prototype:

1230

record(base_job.status_log_entry)

1231

@param waiter: JobResultsWaiter instance.

Prathmesh Prabhu

2017-09-07 12:30:03 -0700

[diff] [blame]

1232

@returns: True if a job was scheduled for retry, False otherwise.

Allen Li

2017-07-10 12:53:06 -0700

[diff] [blame]

1233

"""

1234

test = self._jobs_to_tests[result.id]

1235

try:

Xixuan Wu

163ba1f

2017-12-05 11:03:47 -0800

[diff] [blame]

1236

# It only takes effect for CQ retriable job:

1237

# 1) in first try, test.fast=True.

1238

# 2) in second try, test will be run in normal mode, so reset

1239

# test.fast=False.

1240

test.fast = False

Allen Li

2017-07-10 12:53:06 -0700

[diff] [blame]

1241

new_job = self._schedule_test(

1242

record=record, test=test, retry_for=result.id)

1243

except (error.RPCException, proxy.JSONRPCException) as e:

1244

logging.error('Failed to schedule test: %s, Reason: %s',

1245

test.name, e)

Prathmesh Prabhu

2017-09-07 12:30:03 -0700

[diff] [blame]

1246

return False

Allen Li

2017-07-10 12:53:06 -0700

[diff] [blame]

1247

else:

1248

waiter.add_job(new_job)

Prathmesh Prabhu

2017-09-07 12:30:03 -0700

[diff] [blame]

1249

return bool(new_job)

Allen Li

2017-07-10 12:53:06 -0700

[diff] [blame]

1250

Jacob Kopczynski

2018-01-10 17:25:38 -0800

[diff] [blame]

1251

@property

1252

def jobs(self):

1253

"""Give a copy of the associated jobs

1254

1255

@returns: array of jobs"""

1256

return [job for job in self._jobs]

1257

Allen Li

2017-07-10 12:53:06 -0700

[diff] [blame]

1258

Allen Li

1130898

2016-12-29 16:19:55 -0800

[diff] [blame]

1259

@property

1260

def _should_file_bugs(self):

1261

"""Return whether bugs should be filed.

@returns: bool

"""

# File bug when failure is one of the _FILE_BUG_SUITES,

1266

# otherwise send an email to the owner anc cc.

1267

return self._tag in _FILE_BUG_SUITES

1268

1269

Alex Miller

2012-12-19 13:38:31 -0800

[diff] [blame]

1270

def abort(self):

1271

"""

1272

Abort all scheduled test jobs.

1273

"""

1274

if self._jobs:

1275

job_ids = [job.id for job in self._jobs]

1276

self._afe.run('abort_host_queue_entries', job__id__in=job_ids)

Chris Masone

2012-08-15 14:25:53 -0700

[diff] [blame]

1277

1278

Allen Li

4df053e

2016-12-29 16:05:41 -0800

[diff] [blame]

1279

def _remember_job_keyval(self, job):

Chris Masone

d9f13c5

2012-08-29 10:37:08 -0700

[diff] [blame]

1280

"""

1281

Record provided job as a suite job keyval, for later referencing.

1282

Allen Li

4df053e

2016-12-29 16:05:41 -0800

[diff] [blame]

1283

@param job: some representation of a job that has the attributes:

1284

id, test_name, and owner

Chris Masone

d9f13c5

2012-08-29 10:37:08 -0700

[diff] [blame]

1285

"""

Allen Li

3cc73cd

2016-12-12 16:02:21 -0800

[diff] [blame]

1286

if self._results_dir and job.id and job.owner and job.test_name:

Chris Masone

2012-08-15 14:25:53 -0700

[diff] [blame]

1287

job_id_owner = '%s-%s' % (job.id, job.owner)

Chris Masone

d9f13c5

2012-08-29 10:37:08 -0700

[diff] [blame]

1288

logging.debug('Adding job keyval for %s=%s',

Chris Sosa

accb5ce

2012-08-30 17:29:15 -0700

[diff] [blame]

1289

job.test_name, job_id_owner)

Chris Masone

2012-08-15 14:25:53 -0700

[diff] [blame]

1290

utils.write_keyval(

1291

self._results_dir,

1292

{hashlib.md5(job.test_name).hexdigest(): job_id_owner})

1293

Dan Shi

d152180

2013-05-24 13:08:37 -0700

[diff] [blame]

1294

Allen Li

2017-03-09 16:01:35 -0800

[diff] [blame]

1295

class Suite(_BaseSuite):

1296

"""

1297

A suite of tests, defined by some predicate over control file variables.

1298

1299

Given a place to search for control files a predicate to match the desired

1300

tests, can gather tests and fire off jobs to run them, and then wait for

1301

results.

1302

1303

@var _predicate: a function that should return True when run over a

1304

ControlData representation of a control file that should be in

1305

this Suite.

1306

@var _tag: a string with which to tag jobs run in this suite.

1307

@var _builds: the builds on which we're running this suite.

1308

@var _afe: an instance of AFE as defined in server/frontend.py.

1309

@var _tko: an instance of TKO as defined in server/frontend.py.

1310

@var _jobs: currently scheduled jobs, if any.

1311

@var _jobs_to_tests: a dictionary that maps job ids to tests represented

1312

ControlData objects.

1313

@var _cf_getter: a control_file_getter.ControlFileGetter

1314

@var _retry: a bool value indicating whether jobs should be retried on

1315

failure.

1316

@var _retry_handler: a RetryHandler object.

"""

# TODO(ayatane): These methods are kept on the Suite class for

1321

# backward compatibility.

1322

find_and_parse_tests = _deprecated_suite_method(find_and_parse_tests)

1323

find_possible_tests = _deprecated_suite_method(find_possible_tests)

1324

create_fs_getter = _deprecated_suite_method(create_fs_getter)

Xixuan Wu

81b71cb

2019-01-10 16:00:30 -0800

[diff] [blame^]

1325

name_in_tag_predicate = _deprecated_suite_method(

1326

suite_common.name_in_tag_predicate)

Allen Li

2017-03-09 16:01:35 -0800

[diff] [blame]

1327

name_in_tag_similarity_predicate = _deprecated_suite_method(

1328

name_in_tag_similarity_predicate)

1329

test_name_equals_predicate = _deprecated_suite_method(

1330

test_name_equals_predicate)

1331

test_name_matches_pattern_predicate = _deprecated_suite_method(

1332

test_name_matches_pattern_predicate)

1333

test_file_matches_pattern_predicate = _deprecated_suite_method(

1334

test_file_matches_pattern_predicate)

1335

matches_attribute_expression_predicate = _deprecated_suite_method(

1336

matches_attribute_expression_predicate)

1337

test_name_similarity_predicate = _deprecated_suite_method(

1338

test_name_similarity_predicate)

1339

test_file_similarity_predicate = _deprecated_suite_method(

1340

test_file_similarity_predicate)

1341

list_all_suites = _deprecated_suite_method(list_all_suites)

Xixuan Wu

2018-04-25 17:04:51 -0700

[diff] [blame]

1342

get_test_source_build = _deprecated_suite_method(

1343

suite_common.get_test_source_build)

Allen Li

2017-03-09 16:01:35 -0800

[diff] [blame]

1344

1345

Allen Li

2017-03-09 16:27:00 -0800

[diff] [blame]

1346

@classmethod

1347

def create_from_predicates(cls, predicates, builds, board, devserver,

1348

cf_getter=None, name='ad_hoc_suite',

1349

run_prod_code=False, **dargs):

1350

"""

1351

Create a Suite using a given predicate test filters.

1352

1353

Uses supplied predicate(s) to instantiate a Suite. Looks for tests in

1354

|autotest_dir| and will schedule them using |afe|. Pulls control files

1355

from the default dev server. Results will be pulled from |tko| upon

1356

completion.

1357

1358

@param predicates: A list of callables that accept ControlData

1359

representations of control files. A test will be

1360

included in suite if all callables in this list

1361

return True on the given control file.

1362

@param builds: the builds on which we're running this suite. It's a

1363

dictionary of version_prefix:build.

1364

@param board: the board on which we're running this suite.

1365

@param devserver: the devserver which contains the build.

1366

@param cf_getter: control_file_getter.ControlFileGetter. Defaults to

1367

using DevServerGetter.

1368

@param name: name of suite. Defaults to 'ad_hoc_suite'

1369

@param run_prod_code: If true, the suite will run the tests that

1370

lives in prod aka the test code currently on the

1371

lab servers.

1372

@param **dargs: Any other Suite constructor parameters, as described

1373

in Suite.__init__ docstring.

1374

@return a Suite instance.

1375

"""

1376

if cf_getter is None:

1377

if run_prod_code:

1378

cf_getter = create_fs_getter(_AUTOTEST_DIR)

1379

else:

Xixuan Wu

2018-04-25 17:04:51 -0700

[diff] [blame]

1380

build = suite_common.get_test_source_build(builds, **dargs)

Allen Li

2017-03-09 16:27:00 -0800

[diff] [blame]

1381

cf_getter = _create_ds_getter(build, devserver)

1382

1383

return cls(predicates,

1384

name, builds, board, cf_getter, run_prod_code, **dargs)

@classmethod

def create_from_name(cls, name, builds, board, devserver, cf_getter=None,

1389

**dargs):

1390

"""

1391

Create a Suite using a predicate based on the SUITE control file var.

1392

1393

Makes a predicate based on |name| and uses it to instantiate a Suite

1394

that looks for tests in |autotest_dir| and will schedule them using

1395

|afe|. Pulls control files from the default dev server.

1396

Results will be pulled from |tko| upon completion.

1397

1398

@param name: a value of the SUITE control file variable to search for.

1399

@param builds: the builds on which we're running this suite. It's a

1400

dictionary of version_prefix:build.

1401

@param board: the board on which we're running this suite.

1402

@param devserver: the devserver which contains the build.

1403

@param cf_getter: control_file_getter.ControlFileGetter. Defaults to

1404

using DevServerGetter.

1405

@param **dargs: Any other Suite constructor parameters, as described

1406

in Suite.__init__ docstring.

1407

@return a Suite instance.

1408

"""

1409

if cf_getter is None:

Xixuan Wu

2018-04-25 17:04:51 -0700

[diff] [blame]

1410

build = suite_common.get_test_source_build(builds, **dargs)

Allen Li

2017-03-09 16:27:00 -0800

[diff] [blame]

1411

cf_getter = _create_ds_getter(build, devserver)

1412

Xixuan Wu

81b71cb

2019-01-10 16:00:30 -0800

[diff] [blame^]

1413

return cls([suite_common.name_in_tag_predicate(name)],

Allen Li

2017-03-09 16:27:00 -0800

[diff] [blame]

1414

name, builds, board, cf_getter, **dargs)

1415

1416

Allen Li

2017-03-09 16:23:06 -0800

[diff] [blame]

def __init__(

self,

predicates,

tag,

builds,

board,

cf_getter,

run_prod_code=False,

afe=None,

tko=None,

pool=None,

results_dir=None,

max_runtime_mins=24*60,

1430

timeout_mins=24*60,

1431

file_bugs=False,

Allen Li

2017-03-09 16:23:06 -0800

[diff] [blame]

suite_job_id=None,

ignore_deps=False,

extra_deps=None,

priority=priorities.Priority.DEFAULT,

1436

forgiving_parser=True,

1437

wait_for_results=True,

1438

job_retry=False,

1439

max_retries=sys.maxint,

1440

offload_failures_only=False,

1441

test_source_build=None,

Allen Li

7f43ef9

2017-03-09 16:29:48 -0800

[diff] [blame]

1442

job_keyvals=None,

Prathmesh Prabhu

2017-09-07 17:54:23 +0000

[diff] [blame]

1443

test_args=None,

Aviv Keshet

2017-10-30 12:53:01 -0700

[diff] [blame]

1444

child_dependencies=(),

Prathmesh Prabhu

2017-09-07 17:54:23 +0000

[diff] [blame]

1445

result_reporter=None,

Allen Li

2017-03-09 16:23:06 -0800

[diff] [blame]

):

"""

Constructor

@param predicates: A list of callables that accept ControlData

1451

representations of control files. A test will be

Allen Li

2887e33

2017-03-09 16:30:36 -0800

[diff] [blame]

1452

included in suite if all callables in this list

Allen Li

2017-03-09 16:23:06 -0800

[diff] [blame]

1453

return True on the given control file.

1454

@param tag: a string with which to tag jobs run in this suite.

1455

@param builds: the builds on which we're running this suite.

1456

@param board: the board on which we're running this suite.

1457

@param cf_getter: a control_file_getter.ControlFileGetter

1458

@param afe: an instance of AFE as defined in server/frontend.py.

1459

@param tko: an instance of TKO as defined in server/frontend.py.

1460

@param pool: Specify the pool of machines to use for scheduling

1461

purposes.

1462

@param run_prod_code: If true, the suite will run the test code that

1463

lives in prod aka the test code currently on the

1464

lab servers.

1465

@param results_dir: The directory where the job can write results to.

1466

This must be set if you want job_id of sub-jobs

1467

list in the job keyvals.

1468

@param max_runtime_mins: Maximum suite runtime, in minutes.

1469

@param timeout: Maximum job lifetime, in hours.

1470

@param suite_job_id: Job id that will act as parent id to all sub jobs.

1471

Default: None

1472

@param ignore_deps: True if jobs should ignore the DEPENDENCIES

1473

attribute and skip applying of dependency labels.

1474

(Default:False)

1475

@param extra_deps: A list of strings which are the extra DEPENDENCIES

1476

to add to each test being scheduled.

1477

@param priority: Integer priority level. Higher is more important.

1478

@param wait_for_results: Set to False to run the suite job without

1479

waiting for test jobs to finish. Default is

1480

True.

Jacob Kopczynski

2018-01-10 17:25:38 -0800

[diff] [blame]

1481

@param job_retry: A bool value indicating whether jobs should be retried

Allen Li

2017-03-09 16:23:06 -0800

[diff] [blame]

1482

on failure. If True, the field 'JOB_RETRIES' in

1483

control files will be respected. If False, do not

1484

retry.

1485

@param max_retries: Maximum retry limit at suite level.

1486

Regardless how many times each individual test

1487

has been retried, the total number of retries

1488

happening in the suite can't exceed _max_retries.

1489

Default to sys.maxint.

1490

@param offload_failures_only: Only enable gs_offloading for failed

1491

jobs.

1492

@param test_source_build: Build that contains the server-side test code.

1493

@param job_keyvals: General job keyvals to be inserted into keyval file,

1494

which will be used by tko/parse later.

Allen Li

7f43ef9

2017-03-09 16:29:48 -0800

[diff] [blame]

1495

@param test_args: A dict of args passed all the way to each individual

1496

test that will be actually ran.

Aviv Keshet

2017-10-30 12:53:01 -0700

[diff] [blame]

1497

@param child_dependencies: (optional) list of dependency strings

1498

to be added as dependencies to child jobs.

Prathmesh Prabhu

2017-09-07 17:54:23 +0000

[diff] [blame]

1499

@param result_reporter: A _ResultReporter instance to report results. If

1500

None, an _EmailReporter will be created.

Allen Li

2017-03-09 16:23:06 -0800

[diff] [blame]

1501

"""

Allen Li

2017-03-09 16:44:30 -0800

[diff] [blame]

1502

tests = find_and_parse_tests(

1503

cf_getter,

1504

_ComposedPredicate(predicates),

1505

tag,

Allen Li

2017-03-09 16:44:30 -0800

[diff] [blame]

1506

forgiving_parser=forgiving_parser,

1507

run_prod_code=run_prod_code,

1508

test_args=test_args,

1509

)

Allen Li

2017-03-09 16:23:06 -0800

[diff] [blame]

1510

super(Suite, self).__init__(

Allen Li

2017-03-09 16:44:30 -0800

[diff] [blame]

1511

tests=tests,

Allen Li

2017-03-09 16:23:06 -0800

[diff] [blame]

1512

tag=tag,

1513

builds=builds,

1514

board=board,

Allen Li

2017-03-09 16:23:06 -0800

[diff] [blame]

afe=afe,

tko=tko,

pool=pool,

results_dir=results_dir,

1519

max_runtime_mins=max_runtime_mins,

1520

timeout_mins=timeout_mins,

1521

file_bugs=file_bugs,

Allen Li

2017-03-09 16:23:06 -0800

[diff] [blame]

1522

suite_job_id=suite_job_id,

1523

ignore_deps=ignore_deps,

1524

extra_deps=extra_deps,

1525

priority=priority,

Allen Li

2017-03-09 16:23:06 -0800

[diff] [blame]

1526

wait_for_results=wait_for_results,

1527

job_retry=job_retry,

1528

max_retries=max_retries,

1529

offload_failures_only=offload_failures_only,

1530

test_source_build=test_source_build,

Prathmesh Prabhu

2017-09-07 17:54:23 +0000

[diff] [blame]

1531

job_keyvals=job_keyvals,

Aviv Keshet

2017-10-30 12:53:01 -0700

[diff] [blame]

1532

child_dependencies=child_dependencies,

Prathmesh Prabhu

2017-09-07 17:54:23 +0000

[diff] [blame]

1533

result_reporter=result_reporter,

1534

)

Allen Li

2017-03-09 16:23:06 -0800

[diff] [blame]

1535

Allen Li

2017-03-09 16:01:35 -0800

[diff] [blame]

1536

Allen Li

2017-05-24 16:47:37 -0700

[diff] [blame]

1537

class ProvisionSuite(_BaseSuite):

1538

"""

1539

A suite for provisioning DUTs.

1540

1541

This is done by creating dummy_Pass tests.

"""

def __init__(

self,

tag,

builds,

board,

Allen Li

2017-05-24 16:47:37 -0700

[diff] [blame]

1550

devserver,

Allen Li

2017-07-10 13:00:31 -0700

[diff] [blame]

1551

num_required,

Allen Li

2017-11-06 17:48:09 -0800

[diff] [blame]

1552

num_max=float('inf'),

Allen Li

2017-05-24 16:47:37 -0700

[diff] [blame]

cf_getter=None,

run_prod_code=False,

test_args=None,

test_source_build=None,

Allen Li

aa7f284

2017-07-06 16:06:32 -0700

[diff] [blame]

1557

**kwargs):

Allen Li

2017-05-24 16:47:37 -0700

[diff] [blame]

"""

Constructor

@param tag: a string with which to tag jobs run in this suite.

1562

@param builds: the builds on which we're running this suite.

1563

@param board: the board on which we're running this suite.

Allen Li

2017-05-24 16:47:37 -0700

[diff] [blame]

1564

@param devserver: the devserver which contains the build.

Allen Li

2017-11-06 17:48:09 -0800

[diff] [blame]

1565

@param num_required: number of tests that must pass. This is

1566

capped by the number of tests that are run.

1567

@param num_max: max number of tests to make. By default there

1568

is no cap, a test is created for each eligible host.

Allen Li

2017-05-24 16:47:37 -0700

[diff] [blame]

1569

@param cf_getter: a control_file_getter.ControlFileGetter.

1570

@param test_args: A dict of args passed all the way to each individual

1571

test that will be actually ran.

1572

@param test_source_build: Build that contains the server-side test code.

Allen Li

aa7f284

2017-07-06 16:06:32 -0700

[diff] [blame]

1573

@param kwargs: Various keyword arguments passed to

1574

_BaseSuite constructor.

Allen Li

2017-05-24 16:47:37 -0700

[diff] [blame]

1575

"""

Allen Li

2017-05-24 16:47:37 -0700

[diff] [blame]

1576

super(ProvisionSuite, self).__init__(

Allen Li

2017-11-06 17:48:09 -0800

[diff] [blame]

1577

tests=[],

Allen Li

2017-05-24 16:47:37 -0700

[diff] [blame]

1578

tag=tag,

1579

builds=builds,

1580

board=board,

Allen Li

aa7f284

2017-07-06 16:06:32 -0700

[diff] [blame]

1581

**kwargs)

Allen Li

4f09a6d

2018-01-23 15:00:17 -0800

[diff] [blame]

1582

self._num_successful = 0

1583

self._num_required = 0

1584

self.tests = []

1585

Allen Li

0c0e776

2017-11-17 14:07:43 -0800

[diff] [blame]

1586

static_deps = [dep for dep in self._dependencies

1587

if not provision.Provision.acts_on(dep)]

Allen Li

4f09a6d

2018-01-23 15:00:17 -0800

[diff] [blame]

1588

if 'pool:suites' in static_deps:

1589

logging.info('Provision suite is disabled on suites pool')

1590

return

1591

logging.debug('Looking for hosts matching %r', static_deps)

Allen Li

2017-11-06 17:48:09 -0800

[diff] [blame]

1592

hosts = self._afe.get_hosts(

Allen Li

0c0e776

2017-11-17 14:07:43 -0800

[diff] [blame]

1593

invalid=False, multiple_labels=static_deps)

Allen Li

42e511e

2017-11-13 18:36:34 -0800

[diff] [blame]

1594

logging.debug('Found %d matching hosts for ProvisionSuite', len(hosts))

Allen Li

2017-11-06 17:48:09 -0800

[diff] [blame]

1595

available_hosts = [h for h in hosts if h.is_available()]

Allen Li

42e511e

2017-11-13 18:36:34 -0800

[diff] [blame]

1596

logging.debug('Found %d available hosts for ProvisionSuite',

1597

len(available_hosts))

Allen Li

4f09a6d

2018-01-23 15:00:17 -0800

[diff] [blame]

1598

dummy_test = _load_dummy_test(

1599

builds, devserver, cf_getter,

1600

run_prod_code, test_args, test_source_build)

Allen Li

2017-11-06 17:48:09 -0800

[diff] [blame]

1601

self.tests = [dummy_test] * min(len(available_hosts), num_max)

1602

logging.debug('Made %d tests for ProvisionSuite', len(self.tests))

1603

self._num_required = min(num_required, len(self.tests))

1604

logging.debug('Expecting %d tests to pass for ProvisionSuite',

1605

self._num_required)

Allen Li

2017-07-10 13:00:31 -0700

[diff] [blame]

1606

Prathmesh Prabhu

2017-09-07 17:54:23 +0000

[diff] [blame]

1607

def _handle_result(self, result, record, waiter):

1608

super(ProvisionSuite, self)._handle_result(result, record, waiter)

Allen Li

2017-07-10 13:00:31 -0700

[diff] [blame]

1609

if result.is_good():

1610

self._num_successful += 1

1611

1612

def _finished_waiting(self):

1613

return self._num_successful >= self._num_required

Allen Li

2017-05-24 16:47:37 -0700

[diff] [blame]

1614

1615

1616

def _load_dummy_test(

builds,

devserver,

cf_getter=None,

run_prod_code=False,

test_args=None,

test_source_build=None):

1623

"""

1624

Load and return the dummy pass test.

1625

1626

@param builds: the builds on which we're running this suite.

1627

@param devserver: the devserver which contains the build.

1628

@param cf_getter: a control_file_getter.ControlFileGetter.

1629

@param test_args: A dict of args passed all the way to each individual

1630

test that will be actually ran.

1631

@param test_source_build: Build that contains the server-side test code.

Allen Li

2017-05-24 16:47:37 -0700

[diff] [blame]

1632

"""

1633

if cf_getter is None:

1634

if run_prod_code:

1635

cf_getter = create_fs_getter(_AUTOTEST_DIR)

1636

else:

Xixuan Wu

2018-04-25 17:04:51 -0700

[diff] [blame]

1637

build = suite_common.get_test_source_build(

Allen Li

2017-05-24 16:47:37 -0700

[diff] [blame]

1638

builds, test_source_build=test_source_build)

Richard Barnette

adf0586

2018-06-04 17:37:02 -0700

[diff] [blame]

1639

devserver.stage_artifacts(image=build,

1640

artifacts=['control_files'])

Allen Li

2017-05-24 16:47:37 -0700

[diff] [blame]

1641

cf_getter = _create_ds_getter(build, devserver)

Xixuan Wu

2018-04-30 17:17:10 -0700

[diff] [blame]

1642

retriever = _ControlFileRetriever(cf_getter,

1643

run_prod_code=run_prod_code,

1644

test_args=test_args)

Xixuan Wu

b2cf7fc

2018-05-04 17:37:24 -0700

[diff] [blame]

1645

return retriever.retrieve_for_test('dummy_Pass')

Allen Li