Blame - server/cros/dynamic_suite/suite.py - platform/external/autotest

2012-08-15 14:25:53 -0700

[diff] [blame]

1

2

# Use of this source code is governed by a BSD-style license that can be

3

# found in the LICENSE file.

4

Allen Li

ee36ab8

2017-07-07 15:46:40 -0700

[diff] [blame]

5

import abc

Fang Deng

2015-01-02 14:51:49 -0800

[diff] [blame]

6

import datetime

7

import difflib

Allen Li

2017-02-28 18:43:24 -0800

[diff] [blame]

8

import functools

Fang Deng

2015-01-02 14:51:49 -0800

[diff] [blame]

import hashlib

import logging

import operator

import os

import re

Fang Deng

2015-01-02 14:51:49 -0800

[diff] [blame]

14

import sys

Allen Li

2017-02-28 18:43:24 -0800

[diff] [blame]

15

import warnings

Chris Masone

2012-08-15 14:25:53 -0700

[diff] [blame]

import common

J. Richard Barnette

2014-04-02 10:27:33 -0700

[diff] [blame]

19

from autotest_lib.frontend.afe.json_rpc import proxy

Fang Deng

2014-03-17 15:19:46 -0700

[diff] [blame]

20

from autotest_lib.client.common_lib import enum

Dan Shi

dfea368

2014-08-10 23:38:40 -0700

[diff] [blame]

21

from autotest_lib.client.common_lib import error

Simran Basi

5ace6f2

2016-01-06 17:30:44 -0800

[diff] [blame]

22

from autotest_lib.client.common_lib import global_config

Alex Miller

7d658cf

2013-09-04 16:00:35 -0700

[diff] [blame]

23

from autotest_lib.client.common_lib import priorities

Dan Shi

dfea368

2014-08-10 23:38:40 -0700

[diff] [blame]

24

from autotest_lib.client.common_lib import time_utils

25

from autotest_lib.client.common_lib import utils

Xixuan Wu

eb1acc4

2017-11-22 15:46:03 -0800

[diff] [blame]

26

from autotest_lib.frontend.afe import model_attributes

Fang Deng

2014-03-17 15:19:46 -0700

[diff] [blame]

27

from autotest_lib.frontend.afe.json_rpc import proxy

Dan Shi

2014-10-10 13:38:51 -0700

[diff] [blame]

28

from autotest_lib.server.cros import provision

Chris Masone

2012-08-15 14:25:53 -0700

[diff] [blame]

29

from autotest_lib.server.cros.dynamic_suite import constants

30

from autotest_lib.server.cros.dynamic_suite import control_file_getter

31

from autotest_lib.server.cros.dynamic_suite import frontend_wrappers

Alex Miller

2012-12-19 13:38:31 -0800

[diff] [blame]

32

from autotest_lib.server.cros.dynamic_suite import job_status

Xixuan Wu

2018-04-25 17:04:51 -0700

[diff] [blame]

33

from autotest_lib.server.cros.dynamic_suite import suite_common

J. Richard Barnette

e7b98bb

2013-08-21 16:34:16 -0700

[diff] [blame]

34

from autotest_lib.server.cros.dynamic_suite import tools

35

from autotest_lib.server.cros.dynamic_suite.job_status import Status

Chris Masone

2012-08-15 14:25:53 -0700

[diff] [blame]

36

Shuqian Zhao

ab46881

2015-04-08 14:40:38 -0700

[diff] [blame]

37

try:

38

from chromite.lib import boolparse_lib

39

from chromite.lib import cros_logging as logging

40

except ImportError:

41

print 'Unable to import chromite.'

42

print 'This script must be either:'

43

print ' - Be run in the chroot.'

44

print ' - (not yet supported) be run after running '

45

print ' ../utils/build_externals.py'

Fang Deng

2014-03-17 15:19:46 -0700

[diff] [blame]

46

Shuqian Zhao

490f78f

2016-01-20 13:18:40 -0800

[diff] [blame]

47

_FILE_BUG_SUITES = ['au', 'bvt', 'bvt-cq', 'bvt-inline', 'paygen_au_beta',

48

'paygen_au_canary', 'paygen_au_dev', 'paygen_au_stable',

49

'sanity', 'push_to_prod']

Simran Basi

5ace6f2

2016-01-06 17:30:44 -0800

[diff] [blame]

50

_AUTOTEST_DIR = global_config.global_config.get_config_value(

51

'SCHEDULER', 'drone_installation_directory')

Xixuan Wu

2018-04-30 17:17:10 -0700

[diff] [blame]

52

Shuqian Zhao

e33ba4a

2015-09-11 18:51:43 -0700

[diff] [blame]

53

Fang Deng

2014-03-17 15:19:46 -0700

[diff] [blame]

54

class RetryHandler(object):

55

"""Maintain retry information.

56

57

@var _retry_map: A dictionary that stores retry history.

58

The key is afe job id. The value is a dictionary.

59

{job_id: {'state':RetryHandler.States, 'retry_max':int}}

60

- state:

61

The retry state of a job.

62

NOT_ATTEMPTED:

63

We haven't done anything about the job.

64

ATTEMPTED:

65

We've made an attempt to schedule a retry job. The

66

scheduling may or may not be successful, e.g.

67

it might encounter an rpc error. Note failure

68

in scheduling a retry is different from a retry job failure.

69

For each job, we only attempt to schedule a retry once.

70

For example, assume we have a test with JOB_RETRIES=5 and

71

its second retry job failed. When we attempt to create

72

a third retry job to retry the second, we hit an rpc

73

error. In such case, we will give up on all following

74

retries.

75

RETRIED:

76

A retry job has already been successfully

77

scheduled.

78

- retry_max:

79

The maximum of times the job can still

80

be retried, taking into account retries

81

that have occurred.

82

@var _retry_level: A retry might be triggered only if the result

83

is worse than the level.

Fang Deng

2015-01-02 14:51:49 -0800

[diff] [blame]

84

@var _max_retries: Maximum retry limit at suite level.

85

Regardless how many times each individual test

86

has been retried, the total number of retries happening in

87

the suite can't exceed _max_retries.

Fang Deng

2014-03-17 15:19:46 -0700

[diff] [blame]

88

"""

89

90

States = enum.Enum('NOT_ATTEMPTED', 'ATTEMPTED', 'RETRIED',

91

start_value=1, step=1)

92

Fang Deng

2015-01-02 14:51:49 -0800

[diff] [blame]

93

def __init__(self, initial_jobs_to_tests, retry_level='WARN',

94

max_retries=None):

Fang Deng

2014-03-17 15:19:46 -0700

[diff] [blame]

95

"""Initialize RetryHandler.

96

97

@param initial_jobs_to_tests: A dictionary that maps a job id to

98

a ControlData object. This dictionary should contain

99

jobs that are originally scheduled by the suite.

100

@param retry_level: A retry might be triggered only if the result is

101

worse than the level.

Fang Deng

2015-01-02 14:51:49 -0800

[diff] [blame]

102

@param max_retries: Integer, maxmium total retries allowed

103

for the suite. Default to None, no max.

Fang Deng

2014-03-17 15:19:46 -0700

[diff] [blame]

104

"""

105

self._retry_map = {}

106

self._retry_level = retry_level

Fang Deng

2015-01-02 14:51:49 -0800

[diff] [blame]

107

self._max_retries = (max_retries

108

if max_retries is not None else sys.maxint)

Fang Deng

2014-03-17 15:19:46 -0700

[diff] [blame]

109

for job_id, test in initial_jobs_to_tests.items():

110

if test.job_retries > 0:

Allen Li

fb89e2b

2017-01-03 12:47:58 -0800

[diff] [blame]

111

self._add_job(new_job_id=job_id,

112

retry_max=test.job_retries)

Jacob Kopczynski

2018-01-10 17:25:38 -0800

[diff] [blame]

113

else:

114

logging.debug("Test %s has no retries", test.name)

Fang Deng

2014-03-17 15:19:46 -0700

[diff] [blame]

115

116

Allen Li

fb89e2b

2017-01-03 12:47:58 -0800

[diff] [blame]

117

def _add_job(self, new_job_id, retry_max):

Fang Deng

2014-03-17 15:19:46 -0700

[diff] [blame]

118

"""Add a newly-created job to the retry map.

119

120

@param new_job_id: The afe_job_id of a newly created job.

121

@param retry_max: The maximum of times that we could retry

122

the test if the job fails.

123

124

@raises ValueError if new_job_id is already in retry map.

125

126

"""

127

if new_job_id in self._retry_map:

128

raise ValueError('add_job called when job is already in retry map.')

129

130

self._retry_map[new_job_id] = {

131

'state': self.States.NOT_ATTEMPTED,

132

'retry_max': retry_max}

133

134

Allen Li

0cd1926

2017-01-03 12:56:08 -0800

[diff] [blame]

135

def _suite_max_reached(self):

Fang Deng

2015-01-02 14:51:49 -0800

[diff] [blame]

136

"""Return whether maximum retry limit for a suite has been reached."""

Fang Deng

e4326d6

2015-01-06 13:15:15 -0800

[diff] [blame]

137

return self._max_retries <= 0

Fang Deng

2015-01-02 14:51:49 -0800

[diff] [blame]

138

139

Fang Deng

2014-03-17 15:19:46 -0700

[diff] [blame]

140

def add_retry(self, old_job_id, new_job_id):

141

"""Record a retry.

142

143

Update retry map with the retry information.

144

145

@param old_job_id: The afe_job_id of the job that is retried.

146

@param new_job_id: The afe_job_id of the retry job.

147

148

@raises KeyError if old_job_id isn't in the retry map.

149

@raises ValueError if we have already retried or made an attempt

150

to retry the old job.

151

152

"""

153

old_record = self._retry_map[old_job_id]

154

if old_record['state'] != self.States.NOT_ATTEMPTED:

155

raise ValueError(

156

'We have already retried or attempted to retry job %d' %

157

old_job_id)

158

old_record['state'] = self.States.RETRIED

Allen Li

fb89e2b

2017-01-03 12:47:58 -0800

[diff] [blame]

159

self._add_job(new_job_id=new_job_id,

160

retry_max=old_record['retry_max'] - 1)

Fang Deng

2015-01-02 14:51:49 -0800

[diff] [blame]

161

self._max_retries -= 1

Fang Deng

2014-03-17 15:19:46 -0700

[diff] [blame]

162

163

164

def set_attempted(self, job_id):

165

"""Set the state of the job to ATTEMPTED.

166

167

@param job_id: afe_job_id of a job.

168

169

@raises KeyError if job_id isn't in the retry map.

170

@raises ValueError if the current state is not NOT_ATTEMPTED.

171

172

"""

173

current_state = self._retry_map[job_id]['state']

174

if current_state != self.States.NOT_ATTEMPTED:

175

# We are supposed to retry or attempt to retry each job

176

# only once. Raise an error if this is not the case.

177

raise ValueError('Unexpected state transition: %s -> %s' %

178

(self.States.get_string(current_state),

179

self.States.get_string(self.States.ATTEMPTED)))

180

else:

181

self._retry_map[job_id]['state'] = self.States.ATTEMPTED

182

183

184

def has_following_retry(self, result):

185

"""Check whether there will be a following retry.

186

187

We have the following cases for a given job id (result.id),

188

- no retry map entry -> retry not required, no following retry

189

- has retry map entry:

190

- already retried -> has following retry

191

- has not retried

192

(this branch can be handled by checking should_retry(result))

193

- retry_max == 0 --> the last retry job, no more retry

194

- retry_max > 0

195

- attempted, but has failed in scheduling a

196

following retry due to rpc error --> no more retry

197

- has not attempped --> has following retry if test failed.

198

199

@param result: A result, encapsulating the status of the job.

200

201

@returns: True, if there will be a following retry.

202

False otherwise.

203

204

"""

Allen Li

2ee2a26

2017-01-03 13:21:10 -0800

[diff] [blame]

205

return (result.test_executed

206

and result.id in self._retry_map

207

and (self._retry_map[result.id]['state'] == self.States.RETRIED

208

or self._should_retry(result)))

Allen Li

5cb0065

2017-01-03 13:06:30 -0800

[diff] [blame]

209

210

211

def _should_retry(self, result):

212

"""Check whether we should retry a job based on its result.

213

214

We will retry the job that corresponds to the result

215

when all of the following are true.

216

a) The test was actually executed, meaning that if

217

a job was aborted before it could ever reach the state

218

of 'Running', the job will not be retried.

219

b) The result is worse than |self._retry_level| which

220

defaults to 'WARN'.

221

c) The test requires retry, i.e. the job has an entry in the retry map.

222

d) We haven't made any retry attempt yet, i.e. state == NOT_ATTEMPTED

223

Note that if a test has JOB_RETRIES=5, and the second time

224

it was retried it hit an rpc error, we will give up on

225

all following retries.

226

e) The job has not reached its retry max, i.e. retry_max > 0

227

228

@param result: A result, encapsulating the status of the job.

229

230

@returns: True if we should retry the job.

231

232

"""

233

return (

xixuan

bf854f8

2017-04-20 10:40:15 -0700

[diff] [blame]

234

result.test_executed

235

and result.id in self._retry_map

236

and not self._suite_max_reached()

Allen Li

5cb0065

2017-01-03 13:06:30 -0800

[diff] [blame]

237

and result.is_worse_than(

238

job_status.Status(self._retry_level, '', 'reason'))

Allen Li

5cb0065

2017-01-03 13:06:30 -0800

[diff] [blame]

239

and self._retry_map[result.id]['state'] == self.States.NOT_ATTEMPTED

240

and self._retry_map[result.id]['retry_max'] > 0

241

)

Fang Deng

2014-03-17 15:19:46 -0700

[diff] [blame]

242

Jacob Kopczynski

2018-01-10 17:25:38 -0800

[diff] [blame]

243

def _should_retry_local_job(self, job_id):

244

"""Check whether we should retry a job based on information available

245

for a local job without a Result object.

246

247

We will retry the job that corresponds to the result

248

when all of the following are true.

249

a) The test requires retry, i.e. the job has an entry in the retry map.

250

b) We haven't made any retry attempt yet for this job, i.e.

251

state == NOT_ATTEMPTED

252

If the job is aborted, we will give up on all following retries,

253

regardless of max_retries.

254

c) The job has not reached its retry max, i.e. retry_max > 0

255

256

@param job_id: the id for the job, to look up relevant information.

257

258

@returns: True if we should retry the job.

259

260

"""

261

if self._suite_max_reached():

262

logging.debug('suite max_retries reached, not retrying.')

263

return False

264

if job_id not in self._retry_map:

265

logging.debug('job_id not in retry map, not retrying.')

266

return False

267

if self._retry_map[job_id]['state'] != self.States.NOT_ATTEMPTED:

268

logging.debug("job state was %s not 'Not Attempted', not retrying",

269

self._retry_map[job_id]['state'])

270

return False

271

if self._retry_map[job_id]['retry_max'] <= 0:

272

logging.debug('test-level retries exhausted, not retrying')

return False

return True

def job_present(self, job_id):

278

"""Check whether a job id present in the retry map.

279

280

@param job_id: afe_job_id of a job.

281

282

@returns: A True if the job is present, False if not.

283

"""

284

return bool(self._retry_map.get(job_id))

285

286

Fang Deng

2014-03-17 15:19:46 -0700

[diff] [blame]

287

288

def get_retry_max(self, job_id):

289

"""Get the maximum times the job can still be retried.

290

291

@param job_id: afe_job_id of a job.

292

293

@returns: An int, representing the maximum times the job can still be

294

retried.

295

@raises KeyError if job_id isn't in the retry map.

296

297

"""

298

return self._retry_map[job_id]['retry_max']

299

300

Allen Li

da198fd

2017-03-29 17:22:13 -0700

[diff] [blame]

301

class _SuiteChildJobCreator(object):

302

"""Create test jobs for a suite."""

303

Allen Li

2017-03-29 17:31:35 -0700

[diff] [blame]

304

def __init__(

305

self,

Allen Li

2017-03-29 17:48:46 -0700

[diff] [blame]

306

tag,

Allen Li

2017-03-29 17:37:43 -0700

[diff] [blame]

307

builds,

Allen Li

2017-03-29 17:31:35 -0700

[diff] [blame]

308

board,

Allen Li

2017-03-29 17:58:23 -0700

[diff] [blame]

309

afe=None,

Allen Li

2017-03-29 17:58:23 -0700

[diff] [blame]

310

max_runtime_mins=24*60,

311

timeout_mins=24*60,

Allen Li

2017-03-29 17:48:46 -0700

[diff] [blame]

312

suite_job_id=None,

Allen Li

2017-03-29 17:31:35 -0700

[diff] [blame]

313

ignore_deps=False,

Allen Li

37e1a29

2017-02-28 18:28:41 -0800

[diff] [blame]

314

extra_deps=(),

Allen Li

2017-03-29 17:58:23 -0700

[diff] [blame]

315

priority=priorities.Priority.DEFAULT,

Allen Li

2017-03-29 17:48:46 -0700

[diff] [blame]

316

offload_failures_only=False,

Shuhei Takahashi

2017-06-14 20:02:26 +0900

[diff] [blame]

317

test_source_build=None,

Aviv Keshet

2017-10-30 12:53:01 -0700

[diff] [blame]

318

job_keyvals=None,

319

):

Allen Li

2017-03-29 17:31:35 -0700

[diff] [blame]

"""

Constructor

Allen Li

2017-03-29 17:48:46 -0700

[diff] [blame]

323

@param tag: a string with which to tag jobs run in this suite.

Allen Li

2017-03-29 17:37:43 -0700

[diff] [blame]

324

@param builds: the builds on which we're running this suite.

Allen Li

2017-03-29 17:31:35 -0700

[diff] [blame]

325

@param board: the board on which we're running this suite.

Allen Li

2017-03-29 17:58:23 -0700

[diff] [blame]

326

@param afe: an instance of AFE as defined in server/frontend.py.

Allen Li

2017-03-29 17:58:23 -0700

[diff] [blame]

327

@param max_runtime_mins: Maximum suite runtime, in minutes.

328

@param timeout_mins: Maximum job lifetime, in minutes.

Allen Li

2017-03-29 17:48:46 -0700

[diff] [blame]

329

@param suite_job_id: Job id that will act as parent id to all sub jobs.

330

Default: None

Allen Li

2017-03-29 17:31:35 -0700

[diff] [blame]

331

@param ignore_deps: True if jobs should ignore the DEPENDENCIES

332

attribute and skip applying of dependency labels.

333

(Default:False)

334

@param extra_deps: A list of strings which are the extra DEPENDENCIES

335

to add to each test being scheduled.

Allen Li

2017-03-29 17:58:23 -0700

[diff] [blame]

336

@param priority: Integer priority level. Higher is more important.

Allen Li

2017-03-29 17:48:46 -0700

[diff] [blame]

337

@param offload_failures_only: Only enable gs_offloading for failed

338

jobs.

339

@param test_source_build: Build that contains the server-side test code.

Shuhei Takahashi

2017-06-14 20:02:26 +0900

[diff] [blame]

340

@param job_keyvals: General job keyvals to be inserted into keyval file,

341

which will be used by tko/parse later.

Allen Li

2017-03-29 17:31:35 -0700

[diff] [blame]

342

"""

Allen Li

2017-03-29 17:48:46 -0700

[diff] [blame]

343

self._tag = tag

Allen Li

2017-03-29 17:37:43 -0700

[diff] [blame]

344

self._builds = builds

Allen Li

2017-03-29 17:31:35 -0700

[diff] [blame]

345

self._board = board

Allen Li

2017-03-29 17:58:23 -0700

[diff] [blame]

346

self._afe = afe or frontend_wrappers.RetryingAFE(timeout_min=30,

347

delay_sec=10,

348

debug=False)

Allen Li

2017-03-29 17:58:23 -0700

[diff] [blame]

349

self._max_runtime_mins = max_runtime_mins

350

self._timeout_mins = timeout_mins

Allen Li

2017-03-29 17:48:46 -0700

[diff] [blame]

351

self._suite_job_id = suite_job_id

Allen Li

2017-03-29 17:31:35 -0700

[diff] [blame]

352

self._ignore_deps = ignore_deps

Allen Li

37e1a29

2017-02-28 18:28:41 -0800

[diff] [blame]

353

self._extra_deps = tuple(extra_deps)

Allen Li

2017-03-29 17:58:23 -0700

[diff] [blame]

354

self._priority = priority

Allen Li

2017-03-29 17:48:46 -0700

[diff] [blame]

355

self._offload_failures_only = offload_failures_only

356

self._test_source_build = test_source_build

Shuhei Takahashi

2017-06-14 20:02:26 +0900

[diff] [blame]

357

self._job_keyvals = job_keyvals

Allen Li

2017-03-29 17:31:35 -0700

[diff] [blame]

358

359

Allen Li

2017-03-29 17:37:43 -0700

[diff] [blame]

360

@property

361

def cros_build(self):

362

"""Return the CrOS build or the first build in the builds dict."""

363

# TODO(ayatane): Note that the builds dict isn't ordered. I'm not

364

# sure what the implications of this are, but it's probably not a

365

# good thing.

366

return self._builds.get(provision.CROS_VERSION_PREFIX,

367

self._builds.values()[0])

368

369

Allen Li

2017-03-29 17:58:23 -0700

[diff] [blame]

370

def create_job(self, test, retry_for=None):

371

"""

372

Thin wrapper around frontend.AFE.create_job().

373

374

@param test: ControlData object for a test to run.

375

@param retry_for: If the to-be-created job is a retry for an

376

old job, the afe_job_id of the old job will

377

be passed in as |retry_for|, which will be

378

recorded in the new job's keyvals.

379

@returns: A frontend.Job object with an added test_name member.

380

test_name is used to preserve the higher level TEST_NAME

381

name of the job.

382

"""

Keith Haddow

782e2a8

2017-09-26 15:44:51 -0700

[diff] [blame]

383

# For a system running multiple suites which share tests, the priority

384

# overridden may lead to unexpected scheduling order that adds extra

385

# provision jobs.

386

test_priority = self._priority

387

if utils.is_moblab():

388

test_priority = max(self._priority, test.priority)

389

Xixuan Wu

eb1acc4

2017-11-22 15:46:03 -0800

[diff] [blame]

390

reboot_before = (model_attributes.RebootBefore.NEVER if test.fast

391

else None)

392

Allen Li

2017-03-29 17:58:23 -0700

[diff] [blame]

393

test_obj = self._afe.create_job(

394

control_file=test.text,

395

name=tools.create_job_name(

396

self._test_source_build or self.cros_build,

397

self._tag,

398

test.name),

399

control_type=test.test_type.capitalize(),

400

meta_hosts=[self._board]*test.sync_count,

401

dependencies=self._create_job_deps(test),

402

keyvals=self._create_keyvals_for_test_job(test, retry_for),

403

max_runtime_mins=self._max_runtime_mins,

404

timeout_mins=self._timeout_mins,

405

parent_job_id=self._suite_job_id,

406

test_retry=test.retries,

Xixuan Wu

eb1acc4

2017-11-22 15:46:03 -0800

[diff] [blame]

407

reboot_before=reboot_before,

408

run_reset=not test.fast,

Keith Haddow

782e2a8

2017-09-26 15:44:51 -0700

[diff] [blame]

409

priority=test_priority,

Allen Li

2017-03-29 17:58:23 -0700

[diff] [blame]

410

synch_count=test.sync_count,

411

require_ssp=test.require_ssp)

412

413

test_obj.test_name = test.name

return test_obj

Allen Li

2017-03-29 17:31:35 -0700

[diff] [blame]

417

def _create_job_deps(self, test):

418

"""Create job deps list for a test job.

419

420

@returns: A list of dependency strings.

421

"""

422

if self._ignore_deps:

423

job_deps = []

424

else:

425

job_deps = list(test.dependencies)

426

job_deps.extend(self._extra_deps)

Allen Li

2017-03-29 17:31:35 -0700

[diff] [blame]

427

return job_deps

428

Allen Li

da198fd

2017-03-29 17:22:13 -0700

[diff] [blame]

429

Allen Li

2017-03-29 17:48:46 -0700

[diff] [blame]

430

def _create_keyvals_for_test_job(self, test, retry_for=None):

431

"""Create keyvals dict for creating a test job.

432

433

@param test: ControlData object for a test to run.

434

@param retry_for: If the to-be-created job is a retry for an

435

old job, the afe_job_id of the old job will

436

be passed in as |retry_for|, which will be

437

recorded in the new job's keyvals.

438

@returns: A keyvals dict for creating the test job.

439

"""

440

keyvals = {

441

constants.JOB_BUILD_KEY: self.cros_build,

442

constants.JOB_SUITE_KEY: self._tag,

443

constants.JOB_EXPERIMENTAL_KEY: test.experimental,

444

constants.JOB_BUILDS_KEY: self._builds

445

}

446

# test_source_build is saved to job_keyvals so scheduler can retrieve

447

# the build name from database when compiling autoserv commandline.

448

# This avoid a database change to add a new field in afe_jobs.

449

#

450

# Only add `test_source_build` to job keyvals if the build is different

451

# from the CrOS build or the job uses more than one build, e.g., both

452

# firmware and CrOS will be updated in the dut.

453

# This is for backwards compatibility, so the update Autotest code can

454

# compile an autoserv command line to run in a SSP container using

455

# previous builds.

456

if (self._test_source_build and

457

(self.cros_build != self._test_source_build or

458

len(self._builds) > 1)):

459

keyvals[constants.JOB_TEST_SOURCE_BUILD_KEY] = \

460

self._test_source_build

461

for prefix, build in self._builds.iteritems():

462

if prefix == provision.FW_RW_VERSION_PREFIX:

463

keyvals[constants.FWRW_BUILD]= build

464

elif prefix == provision.FW_RO_VERSION_PREFIX:

465

keyvals[constants.FWRO_BUILD] = build

466

# Add suite job id to keyvals so tko parser can read it from keyval

467

# file.

468

if self._suite_job_id:

469

keyvals[constants.PARENT_JOB_ID] = self._suite_job_id

470

# We drop the old job's id in the new job's keyval file so that

471

# later our tko parser can figure out the retry relationship and

472

# invalidate the results of the old job in tko database.

473

if retry_for:

474

keyvals[constants.RETRY_ORIGINAL_JOB_ID] = retry_for

475

if self._offload_failures_only:

476

keyvals[constants.JOB_OFFLOAD_FAILURES_KEY] = True

Shuhei Takahashi

2017-06-14 20:02:26 +0900

[diff] [blame]

477

if self._job_keyvals:

478

for key in constants.INHERITED_KEYVALS:

479

if key in self._job_keyvals:

480

keyvals[key] = self._job_keyvals[key]

Allen Li

2017-03-29 17:48:46 -0700

[diff] [blame]

return keyvals

Allen Li

2017-03-10 16:11:53 -0800

[diff] [blame]

484

class _ControlFileRetriever(object):

485

"""Retrieves control files.

486

487

This returns control data instances, unlike control file getters

488

which simply return the control file text contents.

Allen Li

066f587

2017-02-28 13:30:44 -0800

[diff] [blame]

489

"""

Allen Li

066f587

2017-02-28 13:30:44 -0800

[diff] [blame]

490

Allen Li

2017-03-14 13:36:26 -0700

[diff] [blame]

491

def __init__(self, cf_getter, forgiving_parser=True, run_prod_code=False,

492

test_args=None):

Allen Li

3674697

2017-03-10 16:17:46 -0800

[diff] [blame]

493

"""Initialize instance.

494

495

@param cf_getter: a control_file_getter.ControlFileGetter used to list

496

and fetch the content of control files

Allen Li

574fe4d

2017-03-10 16:11:53 -0800

[diff] [blame]

497

@param forgiving_parser: If False, will raise ControlVariableExceptions

498

if any are encountered when parsing control

499

files. Note that this can raise an exception

500

for syntax errors in unrelated files, because

501

we parse them before applying the predicate.

Allen Li

2017-03-14 13:36:26 -0700

[diff] [blame]

502

@param run_prod_code: If true, the retrieved tests will run the test

503

code that lives in prod aka the test code

504

currently on the lab servers by disabling

505

SSP for the discovered tests.

Allen Li

574fe4d

2017-03-10 16:11:53 -0800

[diff] [blame]

506

@param test_args: A dict of args to be seeded in test control file under

507

the name |args_dict|.

Allen Li

2017-03-14 13:36:26 -0700

[diff] [blame]

508

"""

509

self._cf_getter = cf_getter

510

self._forgiving_parser = forgiving_parser

511

self._run_prod_code = run_prod_code

512

self._test_args = test_args

513

514

Xixuan Wu

b2cf7fc

2018-05-04 17:37:24 -0700

[diff] [blame]

515

def retrieve_for_test(self, test_name):

Allen Li

b47f59a

2017-03-10 17:50:45 -0800

[diff] [blame]

516

"""Retrieve a test's control data.

517

518

This ignores forgiving_parser because we cannot return a

519

forgiving value.

520

521

@param test_name: Name of test to retrieve.

522

523

@raises ControlVariableException: There is a syntax error in a

524

control file.

525

526

@returns a ControlData object

527

"""

Xixuan Wu

9af2265

2018-05-14 10:50:54 -0700

[diff] [blame]

528

return suite_common.retrieve_control_data_for_test(

529

self._cf_getter, test_name)

Allen Li

b47f59a

2017-03-10 17:50:45 -0800

[diff] [blame]

530

531

Allen Li

2017-03-14 13:36:26 -0700

[diff] [blame]

532

def retrieve_for_suite(self, suite_name=''):

533

"""Scan through all tests and find all tests.

534

535

@param suite_name: If specified, this method will attempt to restrain

536

the search space to just this suite's control files.

Allen Li

066f587

2017-02-28 13:30:44 -0800

[diff] [blame]

537

Allen Li

574fe4d

2017-03-10 16:11:53 -0800

[diff] [blame]

538

@raises ControlVariableException: If forgiving_parser is False and there

539

is a syntax error in a control file.

540

541

@returns a dictionary of ControlData objects that based on given

542

parameters.

543

"""

Xixuan Wu

2018-04-30 17:17:10 -0700

[diff] [blame]

544

tests = suite_common.retrieve_for_suite(

545

self._cf_getter, suite_name, self._forgiving_parser,

546

self._test_args)

Allen Li

f8b0b70

2017-03-10 17:42:11 -0800

[diff] [blame]

547

if self._run_prod_code:

Xixuan Wu

2018-04-30 17:17:10 -0700

[diff] [blame]

548

for test in tests.itervalues():

549

test.require_ssp = False

Allen Li

f8b0b70

2017-03-10 17:42:11 -0800

[diff] [blame]

550

Xixuan Wu

2018-04-30 17:17:10 -0700

[diff] [blame]

551

return tests

Allen Li

1abded5

2017-03-10 16:37:57 -0800

[diff] [blame]

552

553

Allen Li

122cd09

2017-03-09 15:56:46 -0800

[diff] [blame]

554

def list_all_suites(build, devserver, cf_getter=None):

555

"""

556

Parses all ControlData objects with a SUITE tag and extracts all

557

defined suite names.

558

559

@param build: the build on which we're running this suite.

560

@param devserver: the devserver which contains the build.

561

@param cf_getter: control_file_getter.ControlFileGetter. Defaults to

562

using DevServerGetter.

563

564

@return list of suites

565

"""

566

if cf_getter is None:

567

cf_getter = _create_ds_getter(build, devserver)

568

569

suites = set()

570

predicate = lambda t: True

Allen Li

f487fa1

2017-05-17 16:46:32 -0700

[diff] [blame]

571

for test in find_and_parse_tests(cf_getter, predicate):

Allen Li

122cd09

2017-03-09 15:56:46 -0800

[diff] [blame]

572

suites.update(test.suite_tag_parts)

return list(suites)

Allen Li

2017-03-09 15:52:33 -0800

[diff] [blame]

576

def test_file_similarity_predicate(test_file_pattern):

577

"""Returns predicate that gets the similarity based on a test's file

578

name pattern.

579

580

Builds a predicate that takes in a parsed control file (a ControlData)

581

and returns a tuple of (file path, ratio), where ratio is the

582

similarity between the test file name and the given test_file_pattern.

583

584

@param test_file_pattern: regular expression (string) to match against

585

control file names.

586

@return a callable that takes a ControlData and and returns a tuple of

587

(file path, ratio), where ratio is the similarity between the

588

test file name and the given test_file_pattern.

589

"""

590

return lambda t: ((None, 0) if not hasattr(t, 'path') else

591

(t.path, difflib.SequenceMatcher(a=t.path,

592

b=test_file_pattern).ratio()))

593

594

Allen Li

b5b4a7a

2017-03-09 15:50:09 -0800

[diff] [blame]

595

def test_name_similarity_predicate(test_name):

596

"""Returns predicate that matched based on a test's name.

597

598

Builds a predicate that takes in a parsed control file (a ControlData)

599

and returns a tuple of (test name, ratio), where ratio is the similarity

600

between the test name and the given test_name.

601

602

@param test_name: the test name to base the predicate on.

603

@return a callable that takes a ControlData and returns a tuple of

604

(test name, ratio), where ratio is the similarity between the

605

test name and the given test_name.

606

"""

607

return lambda t: ((None, 0) if not hasattr(t, 'name') else

608

(t.name,

609

difflib.SequenceMatcher(a=t.name, b=test_name).ratio()))

610

611

Allen Li

e37d6ba

2017-03-09 15:49:25 -0800

[diff] [blame]

612

def matches_attribute_expression_predicate(test_attr_boolstr):

613

"""Returns predicate that matches based on boolean expression of

614

attributes.

615

616

Builds a predicate that takes in a parsed control file (a ControlData)

617

ans returns True if the test attributes satisfy the given attribute

618

boolean expression.

619

620

@param test_attr_boolstr: boolean expression of the attributes to be

621

test, like 'system:all and interval:daily'.

622

623

@return a callable that takes a ControlData and returns True if the test

624

attributes satisfy the given boolean expression.

625

"""

626

return lambda t: boolparse_lib.BoolstrResult(

627

test_attr_boolstr, t.attributes)

628

629

Allen Li

f29b48a

2017-03-09 15:48:41 -0800

[diff] [blame]

630

def test_file_matches_pattern_predicate(test_file_pattern):

631

"""Returns predicate that matches based on a test's file name pattern.

632

633

Builds a predicate that takes in a parsed control file (a ControlData)

634

and returns True if the test's control file name matches the given

635

regular expression.

636

637

@param test_file_pattern: regular expression (string) to match against

638

control file names.

639

@return a callable that takes a ControlData and and returns

640

True if control file name matches the pattern.

641

"""

642

return lambda t: hasattr(t, 'path') and re.match(test_file_pattern,

t.path)

Allen Li

2017-03-09 15:47:25 -0800

[diff] [blame]

646

def test_name_matches_pattern_predicate(test_name_pattern):

647

"""Returns predicate that matches based on a test's name pattern.

648

649

Builds a predicate that takes in a parsed control file (a ControlData)

650

and returns True if the test name matches the given regular expression.

651

652

@param test_name_pattern: regular expression (string) to match against

653

test names.

654

@return a callable that takes a ControlData and returns

655

True if the name fields matches the pattern.

656

"""

657

return lambda t: hasattr(t, 'name') and re.match(test_name_pattern,

t.name)

Allen Li

2017-03-09 15:46:32 -0800

[diff] [blame]

661

def test_name_equals_predicate(test_name):

662

"""Returns predicate that matched based on a test's name.

663

664

Builds a predicate that takes in a parsed control file (a ControlData)

665

and returns True if the test name is equal to |test_name|.

666

667

@param test_name: the test name to base the predicate on.

668

@return a callable that takes a ControlData and looks for |test_name|

669

in that ControlData's name.

670

"""

671

return lambda t: hasattr(t, 'name') and test_name == t.name

672

673

Allen Li

6e2fa4f

2017-03-09 15:45:43 -0800

[diff] [blame]

674

def name_in_tag_similarity_predicate(name):

675

"""Returns predicate that takes a control file and gets the similarity

676

of the suites in the control file and the given name.

677

678

Builds a predicate that takes in a parsed control file (a ControlData)

679

and returns a list of tuples of (suite name, ratio), where suite name

680

is each suite listed in the control file, and ratio is the similarity

681

between each suite and the given name.

682

683

@param name: the suite name to base the predicate on.

684

@return a callable that takes a ControlData and returns a list of tuples

685

of (suite name, ratio), where suite name is each suite listed in

686

the control file, and ratio is the similarity between each suite

687

and the given name.

688

"""

689

return lambda t: [(suite,

690

difflib.SequenceMatcher(a=suite, b=name).ratio())

691

for suite in t.suite_tag_parts] or [(None, 0)]

692

693

Allen Li

398ddbd

2017-03-09 15:44:25 -0800

[diff] [blame]

694

def name_in_tag_predicate(name):

695

"""Returns predicate that takes a control file and looks for |name|.

696

697

Builds a predicate that takes in a parsed control file (a ControlData)

698

and returns True if the SUITE tag is present and contains |name|.

699

700

@param name: the suite name to base the predicate on.

701

@return a callable that takes a ControlData and looks for |name| in that

702

ControlData object's suite member.

703

"""

704

return lambda t: name in t.suite_tag_parts

705

706

Allen Li

a640d6d

2017-03-09 15:41:35 -0800

[diff] [blame]

707

def create_fs_getter(autotest_dir):

708

"""

709

@param autotest_dir: the place to find autotests.

710

@return a FileSystemGetter instance that looks under |autotest_dir|.

711

"""

712

# currently hard-coded places to look for tests.

713

subpaths = ['server/site_tests', 'client/site_tests',

714

'server/tests', 'client/tests']

715

directories = [os.path.join(autotest_dir, p) for p in subpaths]

716

return control_file_getter.FileSystemGetter(directories)

717

718

Allen Li

0f91587

2017-02-28 18:51:04 -0800

[diff] [blame]

719

def _create_ds_getter(build, devserver):

720

"""

721

@param build: the build on which we're running this suite.

722

@param devserver: the devserver which contains the build.

723

@return a FileSystemGetter instance that looks under |autotest_dir|.

724

"""

725

return control_file_getter.DevServerGetter(build, devserver)

726

727

Allen Li

3adae95

2017-03-10 17:18:12 -0800

[diff] [blame]

728

def _non_experimental_tests_predicate(test_data):

729

"""Test predicate for non-experimental tests."""

730

return not test_data.experimental

731

732

Allen Li

0b1fa38

2017-02-28 18:47:16 -0800

[diff] [blame]

733

def find_and_parse_tests(cf_getter, predicate, suite_name='',

734

add_experimental=False, forgiving_parser=True,

735

run_prod_code=False, test_args=None):

736

"""

737

Function to scan through all tests and find eligible tests.

738

739

Search through all tests based on given cf_getter, suite_name,

740

add_experimental and forgiving_parser, return the tests that match

741

given predicate.

742

743

@param cf_getter: a control_file_getter.ControlFileGetter used to list

744

and fetch the content of control files

745

@param predicate: a function that should return True when run over a

746

ControlData representation of a control file that should be in

747

this Suite.

748

@param suite_name: If specified, this method will attempt to restrain

749

the search space to just this suite's control files.

750

@param add_experimental: add tests with experimental attribute set.

751

@param forgiving_parser: If False, will raise ControlVariableExceptions

752

if any are encountered when parsing control

753

files. Note that this can raise an exception

754

for syntax errors in unrelated files, because

755

we parse them before applying the predicate.

756

@param run_prod_code: If true, the suite will run the test code that

757

lives in prod aka the test code currently on the

758

lab servers by disabling SSP for the discovered

759

tests.

760

@param test_args: A dict of args to be seeded in test control file.

761

762

@raises ControlVariableException: If forgiving_parser is False and there

763

is a syntax error in a control file.

764

765

@return list of ControlData objects that should be run, with control

766

file text added in |text| attribute. Results are sorted based

767

on the TIME setting in control file, slowest test comes first.

768

"""

Allen Li

bb60f44

2017-03-14 12:18:57 -0700

[diff] [blame]

769

logging.debug('Getting control file list for suite: %s', suite_name)

Xixuan Wu

2018-04-30 17:17:10 -0700

[diff] [blame]

770

retriever = _ControlFileRetriever(cf_getter,

771

forgiving_parser=forgiving_parser,

772

run_prod_code=run_prod_code,

773

test_args=test_args)

Allen Li

2017-03-14 13:36:26 -0700

[diff] [blame]

774

tests = retriever.retrieve_for_suite(suite_name)

Allen Li

3adae95

2017-03-10 17:18:12 -0800

[diff] [blame]

775

if not add_experimental:

776

predicate = _ComposedPredicate([predicate,

777

_non_experimental_tests_predicate])

Xixuan Wu

d964853

2018-05-04 18:06:53 -0700

[diff] [blame]

778

return suite_common.filter_tests(tests, predicate)

Allen Li

0b1fa38

2017-02-28 18:47:16 -0800

[diff] [blame]

779

780

Allen Li

da01219

2017-02-28 18:37:52 -0800

[diff] [blame]

781

def find_possible_tests(cf_getter, predicate, suite_name='', count=10):

782

"""

783

Function to scan through all tests and find possible tests.

784

785

Search through all tests based on given cf_getter, suite_name,

786

add_experimental and forgiving_parser. Use the given predicate to

787

calculate the similarity and return the top 10 matches.

788

789

@param cf_getter: a control_file_getter.ControlFileGetter used to list

790

and fetch the content of control files

791

@param predicate: a function that should return a tuple of (name, ratio)

792

when run over a ControlData representation of a control file that

793

should be in this Suite. `name` is the key to be compared, e.g.,

794

a suite name or test name. `ratio` is a value between [0,1]

795

indicating the similarity of `name` and the value to be compared.

796

@param suite_name: If specified, this method will attempt to restrain

797

the search space to just this suite's control files.

798

@param count: Number of suggestions to return, default to 10.

799

800

@return list of top names that similar to the given test, sorted by

801

match ratio.

802

"""

Allen Li

bb60f44

2017-03-14 12:18:57 -0700

[diff] [blame]

803

logging.debug('Getting control file list for suite: %s', suite_name)

Xixuan Wu

2018-04-30 17:17:10 -0700

[diff] [blame]

804

tests = _ControlFileRetriever(cf_getter).retrieve_for_suite(suite_name)

Allen Li

da01219

2017-02-28 18:37:52 -0800

[diff] [blame]

805

logging.debug('Parsed %s control files.', len(tests))

806

similarities = {}

807

for test in tests.itervalues():

808

ratios = predicate(test)

809

# Some predicates may return a list of tuples, e.g.,

810

# name_in_tag_similarity_predicate. Convert all returns to a list.

811

if not isinstance(ratios, list):

812

ratios = [ratios]

813

for name, ratio in ratios:

814

similarities[name] = ratio

815

return [s[0] for s in

816

sorted(similarities.items(), key=operator.itemgetter(1),

817

reverse=True)][:count]

818

819

Allen Li

2017-02-28 18:43:24 -0800

[diff] [blame]

820

def _deprecated_suite_method(func):

821

"""Decorator for deprecated Suite static methods.

822

823

TODO(ayatane): This is used to decorate functions that are called as

824

static methods on Suite.

825

"""

826

@functools.wraps(func)

827

def wrapper(*args, **kwargs):

Shuhei Takahashi

2017-06-14 20:02:26 +0900

[diff] [blame]

828

"""Wraps |func| for warning."""

Brian Norris

32add5d

2017-06-19 11:09:54 -0700

[diff] [blame]

829

warnings.warn('Calling method "%s" from Suite is deprecated' %

830

func.__name__)

Allen Li

2017-02-28 18:43:24 -0800

[diff] [blame]

831

return func(*args, **kwargs)

832

return staticmethod(wrapper)

833

834

Allen Li

2017-03-09 16:01:35 -0800

[diff] [blame]

835

class _BaseSuite(object):

Chris Masone

2012-08-15 14:25:53 -0700

[diff] [blame]

836

"""

837

A suite of tests, defined by some predicate over control file variables.

838

839

Given a place to search for control files a predicate to match the desired

840

tests, can gather tests and fire off jobs to run them, and then wait for

841

results.

842

843

@var _predicate: a function that should return True when run over a

844

ControlData representation of a control file that should be in

845

this Suite.

846

@var _tag: a string with which to tag jobs run in this suite.

Dan Shi

2014-10-10 13:38:51 -0700

[diff] [blame]

847

@var _builds: the builds on which we're running this suite.

Chris Masone

2012-08-15 14:25:53 -0700

[diff] [blame]

848

@var _afe: an instance of AFE as defined in server/frontend.py.

849

@var _tko: an instance of TKO as defined in server/frontend.py.

850

@var _jobs: currently scheduled jobs, if any.

Fang Deng

2014-03-17 15:19:46 -0700

[diff] [blame]

851

@var _jobs_to_tests: a dictionary that maps job ids to tests represented

852

ControlData objects.

Fang Deng

2014-03-17 15:19:46 -0700

[diff] [blame]

853

@var _retry: a bool value indicating whether jobs should be retried on

854

failure.

855

@var _retry_handler: a RetryHandler object.

856

Chris Masone

2012-08-15 14:25:53 -0700

[diff] [blame]

857

"""

858

Dan Shi

2014-10-10 13:38:51 -0700

[diff] [blame]

859

Allen Li

2016-12-09 18:04:26 -0800

[diff] [blame]

860

def __init__(

861

self,

Allen Li

2017-03-09 16:44:30 -0800

[diff] [blame]

862

tests,

Allen Li

2016-12-09 18:04:26 -0800

[diff] [blame]

863

tag,

864

builds,

865

board,

Allen Li

2016-12-09 18:04:26 -0800

[diff] [blame]

afe=None,

tko=None,

pool=None,

results_dir=None,

max_runtime_mins=24*60,

871

timeout_mins=24*60,

872

file_bugs=False,

Allen Li

2016-12-09 18:04:26 -0800

[diff] [blame]

873

suite_job_id=None,

874

ignore_deps=False,

Allen Li

493eefa

2016-12-09 18:05:35 -0800

[diff] [blame]

875

extra_deps=None,

Allen Li

2016-12-09 18:04:26 -0800

[diff] [blame]

876

priority=priorities.Priority.DEFAULT,

Allen Li

2016-12-09 18:04:26 -0800

[diff] [blame]

877

wait_for_results=True,

878

job_retry=False,

879

max_retries=sys.maxint,

880

offload_failures_only=False,

Shuqian Zhao

2017-02-13 16:22:58 -0800

[diff] [blame]

881

test_source_build=None,

Prathmesh Prabhu

2017-09-07 17:54:23 +0000

[diff] [blame]

882

job_keyvals=None,

Aviv Keshet

2017-10-30 12:53:01 -0700

[diff] [blame]

883

child_dependencies=(),

Prathmesh Prabhu

2017-09-07 17:54:23 +0000

[diff] [blame]

884

result_reporter=None,

Allen Li

2016-12-09 18:04:26 -0800

[diff] [blame]

885

):

Allen Li

7f43ef9

2017-03-09 16:29:48 -0800

[diff] [blame]

886

"""Initialize instance.

Chris Masone

2012-08-15 14:25:53 -0700

[diff] [blame]

887

Allen Li

2017-03-09 16:44:30 -0800

[diff] [blame]

888

@param tests: Iterable of tests to run.

Chris Masone

2012-08-15 14:25:53 -0700

[diff] [blame]

889

@param tag: a string with which to tag jobs run in this suite.

Dan Shi

2014-10-10 13:38:51 -0700

[diff] [blame]

890

@param builds: the builds on which we're running this suite.

Alex Miller

a091307

2013-06-12 10:01:51 -0700

[diff] [blame]

891

@param board: the board on which we're running this suite.

Chris Masone

2012-08-15 14:25:53 -0700

[diff] [blame]

892

@param afe: an instance of AFE as defined in server/frontend.py.

893

@param tko: an instance of TKO as defined in server/frontend.py.

894

@param pool: Specify the pool of machines to use for scheduling

895

purposes.

896

@param results_dir: The directory where the job can write results to.

897

This must be set if you want job_id of sub-jobs

898

list in the job keyvals.

Aviv Keshet

1830892

2013-02-19 17:49:49 -0800

[diff] [blame]

899

@param max_runtime_mins: Maximum suite runtime, in minutes.

Alex Miller

028b031

2013-09-07 15:25:45 -0700

[diff] [blame]

900

@param timeout: Maximum job lifetime, in hours.

Aviv Keshet

1830892

2013-02-19 17:49:49 -0800

[diff] [blame]

901

@param suite_job_id: Job id that will act as parent id to all sub jobs.

902

Default: None

Aviv Keshet

d7959f3

2013-05-17 15:58:43 -0700

[diff] [blame]

903

@param ignore_deps: True if jobs should ignore the DEPENDENCIES

904

attribute and skip applying of dependency labels.

905

(Default:False)

Alex Miller

47a0367

2013-08-27 09:09:53 -0700

[diff] [blame]

906

@param extra_deps: A list of strings which are the extra DEPENDENCIES

907

to add to each test being scheduled.

Alex Miller

7d658cf

2013-09-04 16:00:35 -0700

[diff] [blame]

908

@param priority: Integer priority level. Higher is more important.

Dan Shi

9512241

2013-11-12 16:20:33 -0800

[diff] [blame]

909

@param wait_for_results: Set to False to run the suite job without

910

waiting for test jobs to finish. Default is

911

True.

Jacob Kopczynski

2018-01-10 17:25:38 -0800

[diff] [blame]

912

@param job_retry: A bool value indicating whether jobs should be retried

Fang Deng

2014-03-17 15:19:46 -0700

[diff] [blame]

913

on failure. If True, the field 'JOB_RETRIES' in

914

control files will be respected. If False, do not

915

retry.

Fang Deng

2015-01-02 14:51:49 -0800

[diff] [blame]

916

@param max_retries: Maximum retry limit at suite level.

917

Regardless how many times each individual test

918

has been retried, the total number of retries

919

happening in the suite can't exceed _max_retries.

920

Default to sys.maxint.

Simran Basi

1e10e92

2015-04-16 15:09:56 -0700

[diff] [blame]

921

@param offload_failures_only: Only enable gs_offloading for failed

922

jobs.

Dan Shi

2014-10-10 13:38:51 -0700

[diff] [blame]

923

@param test_source_build: Build that contains the server-side test code.

Shuqian Zhao

2017-02-13 16:22:58 -0800

[diff] [blame]

924

@param job_keyvals: General job keyvals to be inserted into keyval file,

925

which will be used by tko/parse later.

Aviv Keshet

2017-10-30 12:53:01 -0700

[diff] [blame]

926

@param child_dependencies: (optional) list of dependency strings

927

to be added as dependencies to child jobs.

Prathmesh Prabhu

2017-09-07 17:54:23 +0000

[diff] [blame]

928

@param result_reporter: A _ResultReporter instance to report results. If

929

None, an _EmailReporter will be created.

Chris Masone

2012-08-15 14:25:53 -0700

[diff] [blame]

930

"""

Allen Li

493eefa

2016-12-09 18:05:35 -0800

[diff] [blame]

931

Allen Li

2017-03-09 16:44:30 -0800

[diff] [blame]

932

self.tests = list(tests)

Chris Masone

2012-08-15 14:25:53 -0700

[diff] [blame]

933

self._tag = tag

Dan Shi

2014-10-10 13:38:51 -0700

[diff] [blame]

934

self._builds = builds

Chris Masone

2012-08-15 14:25:53 -0700

[diff] [blame]

935

self._results_dir = results_dir

936

self._afe = afe or frontend_wrappers.RetryingAFE(timeout_min=30,

937

delay_sec=10,

938

debug=False)

939

self._tko = tko or frontend_wrappers.RetryingTKO(timeout_min=30,

940

delay_sec=10,

941

debug=False)

Chris Masone

2012-08-15 14:25:53 -0700

[diff] [blame]

942

self._jobs = []

Fang Deng

2014-03-17 15:19:46 -0700

[diff] [blame]

943

self._jobs_to_tests = {}

beeps

89f1e06

2013-09-18 12:00:17 -0700

[diff] [blame]

944

Alex Miller

a3a4fe7

2013-01-22 09:57:47 -0800

[diff] [blame]

945

self._file_bugs = file_bugs

Aviv Keshet

1830892

2013-02-19 17:49:49 -0800

[diff] [blame]

946

self._suite_job_id = suite_job_id

Fang Deng

2014-03-17 15:19:46 -0700

[diff] [blame]

947

self._job_retry=job_retry

Fang Deng

2015-01-02 14:51:49 -0800

[diff] [blame]

948

self._max_retries = max_retries

Fang Deng

2014-03-17 15:19:46 -0700

[diff] [blame]

949

# RetryHandler to be initialized in schedule()

950

self._retry_handler = None

Dan Shi

9512241

2013-11-12 16:20:33 -0800

[diff] [blame]

951

self.wait_for_results = wait_for_results

Shuqian Zhao

2017-02-13 16:22:58 -0800

[diff] [blame]

952

self._job_keyvals = job_keyvals

Prathmesh Prabhu

2017-09-07 17:54:23 +0000

[diff] [blame]

953

if result_reporter is None:

954

self._result_reporter = _EmailReporter(self)

955

else:

956

self._result_reporter = result_reporter

Alex Miller

a3a4fe7

2013-01-22 09:57:47 -0800

[diff] [blame]

957

Allen Li

80dc02c

2017-02-28 18:22:16 -0800

[diff] [blame]

958

if extra_deps is None:

959

extra_deps = []

Allen Li

3a83fe6

2017-02-28 18:27:09 -0800

[diff] [blame]

960

extra_deps.append(board)

Allen Li

cceb183

2017-02-28 18:25:06 -0800

[diff] [blame]

961

if pool:

962

extra_deps.append(pool)

Aviv Keshet

2017-10-30 12:53:01 -0700

[diff] [blame]

963

extra_deps.extend(child_dependencies)

Allen Li

2017-11-06 17:48:09 -0800

[diff] [blame]

964

self._dependencies = tuple(extra_deps)

Aviv Keshet

2017-10-30 12:53:01 -0700

[diff] [blame]

965

Allen Li

2017-03-29 17:31:35 -0700

[diff] [blame]

966

self._job_creator = _SuiteChildJobCreator(

Allen Li

2017-03-29 17:48:46 -0700

[diff] [blame]

967

tag=tag,

Allen Li

2017-03-29 17:37:43 -0700

[diff] [blame]

968

builds=builds,

Allen Li

2017-03-29 17:31:35 -0700

[diff] [blame]

969

board=board,

Allen Li

2017-03-29 17:58:23 -0700

[diff] [blame]

970

afe=afe,

Allen Li

2017-03-29 17:58:23 -0700

[diff] [blame]

971

max_runtime_mins=max_runtime_mins,

972

timeout_mins=timeout_mins,

Allen Li

2017-03-29 17:48:46 -0700

[diff] [blame]

973

suite_job_id=suite_job_id,

Allen Li

2017-03-29 17:31:35 -0700

[diff] [blame]

974

ignore_deps=ignore_deps,

975

extra_deps=extra_deps,

Allen Li

2017-03-29 17:58:23 -0700

[diff] [blame]

976

priority=priority,

Allen Li

2017-03-29 17:48:46 -0700

[diff] [blame]

977

offload_failures_only=offload_failures_only,

978

test_source_build=test_source_build,

Shuhei Takahashi

2017-06-14 20:02:26 +0900

[diff] [blame]

979

job_keyvals=job_keyvals,

Allen Li

2017-03-29 17:31:35 -0700

[diff] [blame]

980

)

Allen Li

da198fd

2017-03-29 17:22:13 -0700

[diff] [blame]

981

Chris Masone

2012-08-15 14:25:53 -0700

[diff] [blame]

982

Allen Li

2017-07-07 16:50:38 -0700

[diff] [blame]

983

def _schedule_test(self, record, test, retry_for=None):

Fang Deng

2014-03-17 15:19:46 -0700

[diff] [blame]

984

"""Schedule a single test and return the job.

985

Allen Li

e79b3cb

2016-12-12 18:24:17 -0800

[diff] [blame]

986

Schedule a single test by creating a job, and then update relevant

987

data structures that are used to keep track of all running jobs.

Fang Deng

2014-03-17 15:19:46 -0700

[diff] [blame]

988

Allen Li

e79b3cb

2016-12-12 18:24:17 -0800

[diff] [blame]

989

Emits a TEST_NA status log entry if it failed to schedule the test due

990

to NoEligibleHostException or a non-existent board label.

991

992

Returns a frontend.Job object if the test is successfully scheduled.

993

If scheduling failed due to NoEligibleHostException or a non-existent

Allen Li

2017-07-07 16:50:38 -0700

[diff] [blame]

994

board label, returns None.

Fang Deng

2014-03-17 15:19:46 -0700

[diff] [blame]

995

996

@param record: A callable to use for logging.

997

prototype: record(base_job.status_log_entry)

998

@param test: ControlData for a test to run.

999

@param retry_for: If we are scheduling a test to retry an

1000

old job, the afe_job_id of the old job

1001

will be passed in as |retry_for|.

Fang Deng

2014-03-17 15:19:46 -0700

[diff] [blame]

1002

Allen Li

e79b3cb

2016-12-12 18:24:17 -0800

[diff] [blame]

1003

@returns: A frontend.Job object or None

Fang Deng

2014-03-17 15:19:46 -0700

[diff] [blame]

1004

"""

1005

msg = 'Scheduling %s' % test.name

1006

if retry_for:

1007

msg = msg + ', to retry afe job %d' % retry_for

1008

logging.debug(msg)

Dan Shi

dfea368

2014-08-10 23:38:40 -0700

[diff] [blame]

1009

begin_time_str = datetime.datetime.now().strftime(time_utils.TIME_FMT)

Fang Deng

2014-03-17 15:19:46 -0700

[diff] [blame]

1010

try:

Allen Li

2017-03-29 17:58:23 -0700

[diff] [blame]

1011

job = self._job_creator.create_job(test, retry_for=retry_for)

Allen Li

6fd440f

2016-12-12 18:40:05 -0800

[diff] [blame]

1012

except (error.NoEligibleHostException, proxy.ValidationError) as e:

1013

if (isinstance(e, error.NoEligibleHostException)

1014

or (isinstance(e, proxy.ValidationError)

1015

and _is_nonexistent_board_error(e))):

1016

# Treat a dependency on a non-existent board label the same as

1017

# a dependency on a board that exists, but for which there's no

1018

# hardware.

1019

logging.debug('%s not applicable for this board/pool. '

1020

'Emitting TEST_NA.', test.name)

1021

Status('TEST_NA', test.name,

1022

'Skipping: test not supported on this board/pool.',

Allen Li

9fcd4b4

2016-12-12 16:15:14 -0800

[diff] [blame]

1023

begin_time_str=begin_time_str).record_all(record)

1024

return None

1025

else:

Fang Deng

2014-03-17 15:19:46 -0700

[diff] [blame]

1026

raise e

Allen Li

2017-07-07 16:50:38 -0700

[diff] [blame]

1027

except (error.RPCException, proxy.JSONRPCException):

Fang Deng

2014-03-17 15:19:46 -0700

[diff] [blame]

1028

if retry_for:

1029

# Mark that we've attempted to retry the old job.

Jacob Kopczynski

2018-01-10 17:25:38 -0800

[diff] [blame]

1030

logging.debug("RPC exception occurred")

Fang Deng

2014-03-17 15:19:46 -0700

[diff] [blame]

1031

self._retry_handler.set_attempted(job_id=retry_for)

Allen Li

2017-07-07 16:50:38 -0700

[diff] [blame]

1032

raise

Fang Deng

2014-03-17 15:19:46 -0700

[diff] [blame]

1033

else:

1034

self._jobs.append(job)

1035

self._jobs_to_tests[job.id] = test

1036

if retry_for:

1037

# A retry job was just created, record it.

1038

self._retry_handler.add_retry(

1039

old_job_id=retry_for, new_job_id=job.id)

1040

retry_count = (test.job_retries -

1041

self._retry_handler.get_retry_max(job.id))

1042

logging.debug('Job %d created to retry job %d. '

1043

'Have retried for %d time(s)',

1044

job.id, retry_for, retry_count)

Allen Li

4df053e

2016-12-29 16:05:41 -0800

[diff] [blame]

1045

self._remember_job_keyval(job)

Fang Deng

2014-03-17 15:19:46 -0700

[diff] [blame]

1046

return job

Fang Deng

2014-03-17 15:19:46 -0700

[diff] [blame]

1047

Allen Li

5b5642f

2017-05-17 17:02:56 -0700

[diff] [blame]

1048

def schedule(self, record):

Chris Masone

2012-08-15 14:25:53 -0700

[diff] [blame]

1049

"""

1050

Schedule jobs using |self._afe|.

1051

1052

frontend.Job objects representing each scheduled job will be put in

1053

|self._jobs|.

1054

Fang Deng

2014-03-17 15:19:46 -0700

[diff] [blame]

1055

@param record: A callable to use for logging.

1056

prototype: record(base_job.status_log_entry)

Aviv Keshet

e9170d9

2013-07-19 11:20:45 -0700

[diff] [blame]

1057

@returns: The number of tests that were scheduled.

Chris Masone

2012-08-15 14:25:53 -0700

[diff] [blame]

1058

"""

Allen Li

f4cb5ec

2017-01-03 16:58:12 -0800

[diff] [blame]

1059

scheduled_test_names = []

Allen Li

5b5642f

2017-05-17 17:02:56 -0700

[diff] [blame]

1060

logging.debug('Discovered %d tests.', len(self.tests))

Chris Masone

2012-08-15 14:25:53 -0700

[diff] [blame]

1061

Alex Miller

2012-12-19 13:38:31 -0800

[diff] [blame]

1062

Status('INFO', 'Start %s' % self._tag).record_result(record)

1063

try:

Shuqian Zhao

2017-02-13 16:22:58 -0800

[diff] [blame]

1064

# Write job_keyvals into keyval file.

1065

if self._job_keyvals:

1066

utils.write_keyval(self._results_dir, self._job_keyvals)

1067

Prathmesh Prabhu

2017-06-08 10:44:52 -0700

[diff] [blame]

1068

# TODO(crbug.com/730885): This is a hack to protect tests that are

1069

# not usually retried from getting hit by a provision error when run

1070

# as part of a suite. Remove this hack once provision is separated

1071

# out in its own suite.

Allen Li

5b5642f

2017-05-17 17:02:56 -0700

[diff] [blame]

1072

self._bump_up_test_retries(self.tests)

1073

for test in self.tests:

Allen Li

da90573

2016-12-12 15:49:16 -0800

[diff] [blame]

1074

scheduled_job = self._schedule_test(record, test)

1075

if scheduled_job is not None:

Shuqian Zhao

cd866f3

2016-11-29 20:14:34 -0800

[diff] [blame]

1076

scheduled_test_names.append(test.name)

1077

1078

# Write the num of scheduled tests and name of them to keyval file.

Shuqian Zhao

cd866f3

2016-11-29 20:14:34 -0800

[diff] [blame]

1079

logging.debug('Scheduled %d tests, writing the total to keyval.',

Allen Li

a4d3502

2016-12-12 15:42:10 -0800

[diff] [blame]

1080

len(scheduled_test_names))

Allen Li

d4d5dda

2016-12-12 15:39:11 -0800

[diff] [blame]

1081

utils.write_keyval(

1082

self._results_dir,

Allen Li

dda59b8

2016-12-12 18:20:04 -0800

[diff] [blame]

1083

self._make_scheduled_tests_keyvals(scheduled_test_names))

Aviv Keshet

ff7bd29

2017-07-27 11:14:41 -0700

[diff] [blame]

1084

except Exception:

Allen Li

b892d9f

2016-12-29 15:50:11 -0800

[diff] [blame]

1085

logging.exception('Exception while scheduling suite')

Alex Miller

2012-12-19 13:38:31 -0800

[diff] [blame]

1086

Status('FAIL', self._tag,

1087

'Exception while scheduling suite').record_result(record)

1088

Fang Deng

7e655a9

2014-05-23 13:48:11 -0700

[diff] [blame]

1089

if self._job_retry:

Jacob Kopczynski

2018-01-10 17:25:38 -0800

[diff] [blame]

1090

logging.debug("Initializing RetryHandler for suite %s.", self._tag)

Fang Deng

7e655a9

2014-05-23 13:48:11 -0700

[diff] [blame]

1091

self._retry_handler = RetryHandler(

Fang Deng

2015-01-02 14:51:49 -0800

[diff] [blame]

1092

initial_jobs_to_tests=self._jobs_to_tests,

1093

max_retries=self._max_retries)

Jacob Kopczynski

2018-01-10 17:25:38 -0800

[diff] [blame]

1094

logging.debug("retry map created: %s ",

1095

self._retry_handler._retry_map)

1096

else:

Jacob Kopczynski

61a2d37

2018-06-13 11:51:46 -0700

[diff] [blame^]

1097

logging.info("Will not retry jobs from suite %s.", self._tag)

Allen Li

a4d3502

2016-12-12 15:42:10 -0800

[diff] [blame]

1098

return len(scheduled_test_names)

Aviv Keshet

e9170d9

2013-07-19 11:20:45 -0700

[diff] [blame]

1099

Alex Miller

2012-12-19 13:38:31 -0800

[diff] [blame]

1100

Prathmesh Prabhu

2017-06-08 10:44:52 -0700

[diff] [blame]

1101

def _bump_up_test_retries(self, tests):

1102

"""Bump up individual test retries to match suite retry options."""

1103

if not self._job_retry:

1104

return

1105

1106

for test in tests:

Ilja H. Friedel

dd98c2b

2017-08-31 23:55:40 -0700

[diff] [blame]

1107

# We do honor if a test insists on JOB_RETRIES = 0.

1108

if test.job_retries is None:

Prathmesh Prabhu

2017-06-08 10:44:52 -0700

[diff] [blame]

1109

logging.debug(

Ilja H. Friedel

dd98c2b

2017-08-31 23:55:40 -0700

[diff] [blame]

1110

'Test %s did not request retries, but suite requires '

Prathmesh Prabhu

2017-06-08 10:44:52 -0700

[diff] [blame]

1111

'retries. Bumping retries up to 1. '

1112

'(See crbug.com/730885)',

test.name)

test.job_retries = 1

Allen Li

2016-12-12 18:20:04 -0800

[diff] [blame]

1117

def _make_scheduled_tests_keyvals(self, scheduled_test_names):

1118

"""Make a keyvals dict to write for scheduled test names.

1119

1120

@param scheduled_test_names: A list of scheduled test name strings.

1121

1122

@returns: A keyvals dict.

1123

"""

1124

return {

1125

constants.SCHEDULED_TEST_COUNT_KEY: len(scheduled_test_names),

1126

constants.SCHEDULED_TEST_NAMES_KEY: repr(scheduled_test_names),

}

Allen Li

2016-12-29 15:12:39 -0800

[diff] [blame]

1130

def _should_report(self, result):

beeps

2013-05-30 11:34:14 -0700

[diff] [blame]

1131

"""

Shuqian Zhao

e33ba4a

2015-09-11 18:51:43 -0700

[diff] [blame]

1132

Returns True if this failure requires to be reported.

beeps

2013-05-30 11:34:14 -0700

[diff] [blame]

1133

1134

@param result: A result, encapsulating the status of the failed job.

Shuqian Zhao

e33ba4a

2015-09-11 18:51:43 -0700

[diff] [blame]

1135

@return: True if we should report this failure.

beeps

2013-05-30 11:34:14 -0700

[diff] [blame]

1136

"""

Alex Miller

fcc119b

2014-01-15 13:54:58 -0800

[diff] [blame]

1137

return (self._file_bugs and result.test_executed and

Fang Deng

d82c1c7

2014-07-29 10:43:01 -0700

[diff] [blame]

1138

not result.is_testna() and

beeps

32fa677

2014-01-28 13:19:53 -0800

[diff] [blame]

1139

result.is_worse_than(job_status.Status('GOOD', '', 'reason')))

beeps

2013-05-30 11:34:14 -0700

[diff] [blame]

1140

1141

Allen Li

cc75229

2017-01-03 12:44:39 -0800

[diff] [blame]

1142

def _has_retry(self, result):

1143

"""

1144

Return True if this result gets to retry.

1145

1146

@param result: A result, encapsulating the status of the failed job.

1147

@return: bool

1148

"""

1149

return (self._job_retry

1150

and self._retry_handler.has_following_retry(result))

1151

1152

Aviv Keshet

3e5ff4a

2017-08-04 14:11:37 -0700

[diff] [blame]

1153

def wait(self, record):

Alex Miller

2012-12-19 13:38:31 -0800

[diff] [blame]

1154

"""

1155

Polls for the job statuses, using |record| to print status when each

1156

completes.

1157

1158

@param record: callable that records job status.

1159

prototype:

1160

record(base_job.status_log_entry)

1161

"""

Allen Li

2017-07-10 11:44:54 -0700

[diff] [blame]

1162

waiter = job_status.JobResultWaiter(self._afe, self._tko)

Alex Miller

2012-12-19 13:38:31 -0800

[diff] [blame]

1163

try:

Aviv Keshet

133beb1

2013-08-20 14:37:13 -0700

[diff] [blame]

1164

if self._suite_job_id:

Allen Li

2017-07-10 11:44:54 -0700

[diff] [blame]

1165

jobs = self._afe.get_jobs(parent_job_id=self._suite_job_id)

Aviv Keshet

133beb1

2013-08-20 14:37:13 -0700

[diff] [blame]

1166

else:

Ilja H. Friedel

04be2bd

2014-05-07 21:29:59 -0700

[diff] [blame]

1167

logging.warning('Unknown suite_job_id, falling back to less '

Dan Shi

08ff128

2016-02-18 19:51:16 -0800

[diff] [blame]

1168

'efficient results_generator.')

Allen Li

2017-07-10 11:44:54 -0700

[diff] [blame]

1169

jobs = self._jobs

1170

waiter.add_jobs(jobs)

1171

for result in waiter.wait_for_results():

Prathmesh Prabhu

2017-09-07 17:54:23 +0000

[diff] [blame]

1172

self._handle_result(result=result, record=record, waiter=waiter)

Allen Li

2017-07-10 13:00:31 -0700

[diff] [blame]

1173

if self._finished_waiting():

1174

break

1175

except Exception: # pylint: disable=W0703

Allen Li

b892d9f

2016-12-29 15:50:11 -0800

[diff] [blame]

1176

logging.exception('Exception waiting for results')

Alex Miller

2012-12-19 13:38:31 -0800

[diff] [blame]

1177

Status('FAIL', self._tag,

1178

'Exception waiting for results').record_result(record)

1179

1180

Allen Li

2017-07-10 13:00:31 -0700

[diff] [blame]

1181

def _finished_waiting(self):

1182

"""Return whether the suite is finished waiting for child jobs."""

return False

Prathmesh Prabhu

2017-09-07 17:54:23 +0000

[diff] [blame]

1186

def _handle_result(self, result, record, waiter):

Allen Li

2016-12-29 15:23:01 -0800

[diff] [blame]

1187

"""

Allen Li

2017-07-10 12:53:06 -0700

[diff] [blame]

1188

Handle a test job result.

Allen Li

2016-12-29 15:23:01 -0800

[diff] [blame]

1189

1190

@param result: Status instance for job.

1191

@param record: callable that records job status.

1192

prototype:

1193

record(base_job.status_log_entry)

Allen Li

2017-07-10 11:44:54 -0700

[diff] [blame]

1194

@param waiter: JobResultsWaiter instance.

Jacob Kopczynski

2018-01-10 17:25:38 -0800

[diff] [blame]

1195

1196

@instance_param _result_reporter: _ResultReporter instance.

Allen Li

2016-12-29 15:23:01 -0800

[diff] [blame]

1197

"""

Allen Li

2017-07-10 12:53:06 -0700

[diff] [blame]

1198

self._record_result(result, record)

Prathmesh Prabhu

2017-09-07 12:30:03 -0700

[diff] [blame]

1199

rescheduled = False

xixuan

bf854f8

2017-04-20 10:40:15 -0700

[diff] [blame]

1200

if self._job_retry and self._retry_handler._should_retry(result):

Prathmesh Prabhu

2017-09-07 12:30:03 -0700

[diff] [blame]

1201

rescheduled = self._retry_result(result, record, waiter)

1202

# TODO (crbug.com/751428): If the suite times out before a retry could

1203

# finish, we would lose the chance to report errors from the original

1204

# job.

1205

if self._has_retry(result) and rescheduled:

1206

return

1207

Prathmesh Prabhu

2017-09-07 17:54:23 +0000

[diff] [blame]

1208

if self._should_report(result):

1209

self._result_reporter.report(result)

Allen Li

2016-12-29 15:23:01 -0800

[diff] [blame]

1210

Allen Li

2017-07-10 12:53:06 -0700

[diff] [blame]

1211

def _record_result(self, result, record):

1212

"""

1213

Record a test job result.

1214

1215

@param result: Status instance for job.

1216

@param record: callable that records job status.

1217

prototype:

1218

record(base_job.status_log_entry)

1219

"""

1220

result.record_all(record)

1221

self._remember_job_keyval(result)

1222

1223

1224

def _retry_result(self, result, record, waiter):

1225

"""

1226

Retry a test job result.

1227

1228

@param result: Status instance for job.

1229

@param record: callable that records job status.

1230

prototype:

1231

record(base_job.status_log_entry)

1232

@param waiter: JobResultsWaiter instance.

Prathmesh Prabhu

2017-09-07 12:30:03 -0700

[diff] [blame]

1233

@returns: True if a job was scheduled for retry, False otherwise.

Allen Li

2017-07-10 12:53:06 -0700

[diff] [blame]

1234

"""

1235

test = self._jobs_to_tests[result.id]

1236

try:

Xixuan Wu

163ba1f

2017-12-05 11:03:47 -0800

[diff] [blame]

1237

# It only takes effect for CQ retriable job:

1238

# 1) in first try, test.fast=True.

1239

# 2) in second try, test will be run in normal mode, so reset

1240

# test.fast=False.

1241

test.fast = False

Allen Li

2017-07-10 12:53:06 -0700

[diff] [blame]

1242

new_job = self._schedule_test(

1243

record=record, test=test, retry_for=result.id)

1244

except (error.RPCException, proxy.JSONRPCException) as e:

1245

logging.error('Failed to schedule test: %s, Reason: %s',

1246

test.name, e)

Prathmesh Prabhu

2017-09-07 12:30:03 -0700

[diff] [blame]

1247

return False

Allen Li

2017-07-10 12:53:06 -0700

[diff] [blame]

1248

else:

1249

waiter.add_job(new_job)

Prathmesh Prabhu

2017-09-07 12:30:03 -0700

[diff] [blame]

1250

return bool(new_job)

Allen Li

2017-07-10 12:53:06 -0700

[diff] [blame]

1251

Jacob Kopczynski

2018-01-10 17:25:38 -0800

[diff] [blame]

1252

@property

1253

def jobs(self):

1254

"""Give a copy of the associated jobs

1255

1256

@returns: array of jobs"""

1257

return [job for job in self._jobs]

1258

Allen Li

2017-07-10 12:53:06 -0700

[diff] [blame]

1259

Allen Li

1130898

2016-12-29 16:19:55 -0800

[diff] [blame]

1260

@property

1261

def _should_file_bugs(self):

1262

"""Return whether bugs should be filed.

@returns: bool

"""

# File bug when failure is one of the _FILE_BUG_SUITES,

1267

# otherwise send an email to the owner anc cc.

1268

return self._tag in _FILE_BUG_SUITES

1269

1270

Alex Miller

2012-12-19 13:38:31 -0800

[diff] [blame]

1271

def abort(self):

1272

"""

1273

Abort all scheduled test jobs.

1274

"""

1275

if self._jobs:

1276

job_ids = [job.id for job in self._jobs]

1277

self._afe.run('abort_host_queue_entries', job__id__in=job_ids)

Chris Masone

2012-08-15 14:25:53 -0700

[diff] [blame]

1278

1279

Allen Li

4df053e

2016-12-29 16:05:41 -0800

[diff] [blame]

1280

def _remember_job_keyval(self, job):

Chris Masone

d9f13c5

2012-08-29 10:37:08 -0700

[diff] [blame]

1281

"""

1282

Record provided job as a suite job keyval, for later referencing.

1283

Allen Li

4df053e

2016-12-29 16:05:41 -0800

[diff] [blame]

1284

@param job: some representation of a job that has the attributes:

1285

id, test_name, and owner

Chris Masone

d9f13c5

2012-08-29 10:37:08 -0700

[diff] [blame]

1286

"""

Allen Li

3cc73cd

2016-12-12 16:02:21 -0800

[diff] [blame]

1287

if self._results_dir and job.id and job.owner and job.test_name:

Chris Masone

2012-08-15 14:25:53 -0700

[diff] [blame]

1288

job_id_owner = '%s-%s' % (job.id, job.owner)

Chris Masone

d9f13c5

2012-08-29 10:37:08 -0700

[diff] [blame]

1289

logging.debug('Adding job keyval for %s=%s',

Chris Sosa

accb5ce

2012-08-30 17:29:15 -0700

[diff] [blame]

1290

job.test_name, job_id_owner)

Chris Masone

2012-08-15 14:25:53 -0700

[diff] [blame]

1291

utils.write_keyval(

1292

self._results_dir,

1293

{hashlib.md5(job.test_name).hexdigest(): job_id_owner})

1294

Dan Shi

d152180

2013-05-24 13:08:37 -0700

[diff] [blame]

1295

Allen Li

2017-03-09 16:01:35 -0800

[diff] [blame]

1296

class Suite(_BaseSuite):

1297

"""

1298

A suite of tests, defined by some predicate over control file variables.

1299

1300

Given a place to search for control files a predicate to match the desired

1301

tests, can gather tests and fire off jobs to run them, and then wait for

1302

results.

1303

1304

@var _predicate: a function that should return True when run over a

1305

ControlData representation of a control file that should be in

1306

this Suite.

1307

@var _tag: a string with which to tag jobs run in this suite.

1308

@var _builds: the builds on which we're running this suite.

1309

@var _afe: an instance of AFE as defined in server/frontend.py.

1310

@var _tko: an instance of TKO as defined in server/frontend.py.

1311

@var _jobs: currently scheduled jobs, if any.

1312

@var _jobs_to_tests: a dictionary that maps job ids to tests represented

1313

ControlData objects.

1314

@var _cf_getter: a control_file_getter.ControlFileGetter

1315

@var _retry: a bool value indicating whether jobs should be retried on

1316

failure.

1317

@var _retry_handler: a RetryHandler object.

"""

# TODO(ayatane): These methods are kept on the Suite class for

1322

# backward compatibility.

1323

find_and_parse_tests = _deprecated_suite_method(find_and_parse_tests)

1324

find_possible_tests = _deprecated_suite_method(find_possible_tests)

1325

create_fs_getter = _deprecated_suite_method(create_fs_getter)

1326

name_in_tag_predicate = _deprecated_suite_method(name_in_tag_predicate)

1327

name_in_tag_similarity_predicate = _deprecated_suite_method(

1328

name_in_tag_similarity_predicate)

1329

test_name_equals_predicate = _deprecated_suite_method(

1330

test_name_equals_predicate)

1331

test_name_matches_pattern_predicate = _deprecated_suite_method(

1332

test_name_matches_pattern_predicate)

1333

test_file_matches_pattern_predicate = _deprecated_suite_method(

1334

test_file_matches_pattern_predicate)

1335

matches_attribute_expression_predicate = _deprecated_suite_method(

1336

matches_attribute_expression_predicate)

1337

test_name_similarity_predicate = _deprecated_suite_method(

1338

test_name_similarity_predicate)

1339

test_file_similarity_predicate = _deprecated_suite_method(

1340

test_file_similarity_predicate)

1341

list_all_suites = _deprecated_suite_method(list_all_suites)

Xixuan Wu

2018-04-25 17:04:51 -0700

[diff] [blame]

1342

get_test_source_build = _deprecated_suite_method(

1343

suite_common.get_test_source_build)

Allen Li

2017-03-09 16:01:35 -0800

[diff] [blame]

1344

1345

Allen Li

25bb1c6

2017-03-09 16:27:00 -0800

[diff] [blame]

1346

@classmethod

1347

def create_from_predicates(cls, predicates, builds, board, devserver,

1348

cf_getter=None, name='ad_hoc_suite',

1349

run_prod_code=False, **dargs):

1350

"""

1351

Create a Suite using a given predicate test filters.

1352

1353

Uses supplied predicate(s) to instantiate a Suite. Looks for tests in

1354

|autotest_dir| and will schedule them using |afe|. Pulls control files

1355

from the default dev server. Results will be pulled from |tko| upon

1356

completion.

1357

1358

@param predicates: A list of callables that accept ControlData

1359

representations of control files. A test will be

1360

included in suite if all callables in this list

1361

return True on the given control file.

1362

@param builds: the builds on which we're running this suite. It's a

1363

dictionary of version_prefix:build.

1364

@param board: the board on which we're running this suite.

1365

@param devserver: the devserver which contains the build.

1366

@param cf_getter: control_file_getter.ControlFileGetter. Defaults to

1367

using DevServerGetter.

1368

@param name: name of suite. Defaults to 'ad_hoc_suite'

1369

@param run_prod_code: If true, the suite will run the tests that

1370

lives in prod aka the test code currently on the

1371

lab servers.

1372

@param **dargs: Any other Suite constructor parameters, as described

1373

in Suite.__init__ docstring.

1374

@return a Suite instance.

1375

"""

1376

if cf_getter is None:

1377

if run_prod_code:

1378

cf_getter = create_fs_getter(_AUTOTEST_DIR)

1379

else:

Xixuan Wu

2018-04-25 17:04:51 -0700

[diff] [blame]

1380

build = suite_common.get_test_source_build(builds, **dargs)

Allen Li

25bb1c6

2017-03-09 16:27:00 -0800

[diff] [blame]

1381

cf_getter = _create_ds_getter(build, devserver)

1382

1383

return cls(predicates,

1384

name, builds, board, cf_getter, run_prod_code, **dargs)

@classmethod

def create_from_name(cls, name, builds, board, devserver, cf_getter=None,

1389

**dargs):

1390

"""

1391

Create a Suite using a predicate based on the SUITE control file var.

1392

1393

Makes a predicate based on |name| and uses it to instantiate a Suite

1394

that looks for tests in |autotest_dir| and will schedule them using

1395

|afe|. Pulls control files from the default dev server.

1396

Results will be pulled from |tko| upon completion.

1397

1398

@param name: a value of the SUITE control file variable to search for.

1399

@param builds: the builds on which we're running this suite. It's a

1400

dictionary of version_prefix:build.

1401

@param board: the board on which we're running this suite.

1402

@param devserver: the devserver which contains the build.

1403

@param cf_getter: control_file_getter.ControlFileGetter. Defaults to

1404

using DevServerGetter.

1405

@param **dargs: Any other Suite constructor parameters, as described

1406

in Suite.__init__ docstring.

1407

@return a Suite instance.

1408

"""

1409

if cf_getter is None:

Xixuan Wu

2018-04-25 17:04:51 -0700

[diff] [blame]

1410

build = suite_common.get_test_source_build(builds, **dargs)

Allen Li

25bb1c6

2017-03-09 16:27:00 -0800

[diff] [blame]

1411

cf_getter = _create_ds_getter(build, devserver)

1412

1413

return cls([name_in_tag_predicate(name)],

1414

name, builds, board, cf_getter, **dargs)

1415

1416

Allen Li

2017-03-09 16:23:06 -0800

[diff] [blame]

def __init__(

self,

predicates,

tag,

builds,

board,

cf_getter,

run_prod_code=False,

afe=None,

tko=None,

pool=None,

results_dir=None,

max_runtime_mins=24*60,

1430

timeout_mins=24*60,

1431

file_bugs=False,

Allen Li

2017-03-09 16:23:06 -0800

[diff] [blame]

suite_job_id=None,

ignore_deps=False,

extra_deps=None,

priority=priorities.Priority.DEFAULT,

1436

forgiving_parser=True,

1437

wait_for_results=True,

1438

job_retry=False,

1439

max_retries=sys.maxint,

1440

offload_failures_only=False,

1441

test_source_build=None,

Allen Li

7f43ef9

2017-03-09 16:29:48 -0800

[diff] [blame]

1442

job_keyvals=None,

Prathmesh Prabhu

2017-09-07 17:54:23 +0000

[diff] [blame]

1443

test_args=None,

Aviv Keshet

2017-10-30 12:53:01 -0700

[diff] [blame]

1444

child_dependencies=(),

Prathmesh Prabhu

2017-09-07 17:54:23 +0000

[diff] [blame]

1445

result_reporter=None,

Allen Li

2017-03-09 16:23:06 -0800

[diff] [blame]

):

"""

Constructor

@param predicates: A list of callables that accept ControlData

1451

representations of control files. A test will be

Allen Li

2887e33

2017-03-09 16:30:36 -0800

[diff] [blame]

1452

included in suite if all callables in this list

Allen Li

2017-03-09 16:23:06 -0800

[diff] [blame]

1453

return True on the given control file.

1454

@param tag: a string with which to tag jobs run in this suite.

1455

@param builds: the builds on which we're running this suite.

1456

@param board: the board on which we're running this suite.

1457

@param cf_getter: a control_file_getter.ControlFileGetter

1458

@param afe: an instance of AFE as defined in server/frontend.py.

1459

@param tko: an instance of TKO as defined in server/frontend.py.

1460

@param pool: Specify the pool of machines to use for scheduling

1461

purposes.

1462

@param run_prod_code: If true, the suite will run the test code that

1463

lives in prod aka the test code currently on the

1464

lab servers.

1465

@param results_dir: The directory where the job can write results to.

1466

This must be set if you want job_id of sub-jobs

1467

list in the job keyvals.

1468

@param max_runtime_mins: Maximum suite runtime, in minutes.

1469

@param timeout: Maximum job lifetime, in hours.

1470

@param suite_job_id: Job id that will act as parent id to all sub jobs.

1471

Default: None

1472

@param ignore_deps: True if jobs should ignore the DEPENDENCIES

1473

attribute and skip applying of dependency labels.

1474

(Default:False)

1475

@param extra_deps: A list of strings which are the extra DEPENDENCIES

1476

to add to each test being scheduled.

1477

@param priority: Integer priority level. Higher is more important.

1478

@param wait_for_results: Set to False to run the suite job without

1479

waiting for test jobs to finish. Default is

1480

True.

Jacob Kopczynski

2018-01-10 17:25:38 -0800

[diff] [blame]

1481

@param job_retry: A bool value indicating whether jobs should be retried

Allen Li

2017-03-09 16:23:06 -0800

[diff] [blame]

1482

on failure. If True, the field 'JOB_RETRIES' in

1483

control files will be respected. If False, do not

1484

retry.

1485

@param max_retries: Maximum retry limit at suite level.

1486

Regardless how many times each individual test

1487

has been retried, the total number of retries

1488

happening in the suite can't exceed _max_retries.

1489

Default to sys.maxint.

1490

@param offload_failures_only: Only enable gs_offloading for failed

1491

jobs.

1492

@param test_source_build: Build that contains the server-side test code.

1493

@param job_keyvals: General job keyvals to be inserted into keyval file,

1494

which will be used by tko/parse later.

Allen Li

7f43ef9

2017-03-09 16:29:48 -0800

[diff] [blame]

1495

@param test_args: A dict of args passed all the way to each individual

1496

test that will be actually ran.

Aviv Keshet

2017-10-30 12:53:01 -0700

[diff] [blame]

1497

@param child_dependencies: (optional) list of dependency strings

1498

to be added as dependencies to child jobs.

Prathmesh Prabhu

2017-09-07 17:54:23 +0000

[diff] [blame]

1499

@param result_reporter: A _ResultReporter instance to report results. If

1500

None, an _EmailReporter will be created.

Allen Li

2017-03-09 16:23:06 -0800

[diff] [blame]

1501

"""

Allen Li

2017-03-09 16:44:30 -0800

[diff] [blame]

1502

tests = find_and_parse_tests(

1503

cf_getter,

1504

_ComposedPredicate(predicates),

1505

tag,

Allen Li

2017-03-09 16:44:30 -0800

[diff] [blame]

1506

forgiving_parser=forgiving_parser,

1507

run_prod_code=run_prod_code,

1508

test_args=test_args,

1509

)

Allen Li

2017-03-09 16:23:06 -0800

[diff] [blame]

1510

super(Suite, self).__init__(

Allen Li

2017-03-09 16:44:30 -0800

[diff] [blame]

1511

tests=tests,

Allen Li

2017-03-09 16:23:06 -0800

[diff] [blame]

1512

tag=tag,

1513

builds=builds,

1514

board=board,

Allen Li

2017-03-09 16:23:06 -0800

[diff] [blame]

afe=afe,

tko=tko,

pool=pool,

results_dir=results_dir,

1519

max_runtime_mins=max_runtime_mins,

1520

timeout_mins=timeout_mins,

1521

file_bugs=file_bugs,

Allen Li

2017-03-09 16:23:06 -0800

[diff] [blame]

1522

suite_job_id=suite_job_id,

1523

ignore_deps=ignore_deps,

1524

extra_deps=extra_deps,

1525

priority=priority,

Allen Li

2017-03-09 16:23:06 -0800

[diff] [blame]

1526

wait_for_results=wait_for_results,

1527

job_retry=job_retry,

1528

max_retries=max_retries,

1529

offload_failures_only=offload_failures_only,

1530

test_source_build=test_source_build,

Prathmesh Prabhu

2017-09-07 17:54:23 +0000

[diff] [blame]

1531

job_keyvals=job_keyvals,

Aviv Keshet

2017-10-30 12:53:01 -0700

[diff] [blame]

1532

child_dependencies=child_dependencies,

Prathmesh Prabhu

2017-09-07 17:54:23 +0000

[diff] [blame]

1533

result_reporter=result_reporter,

1534

)

Allen Li

2017-03-09 16:23:06 -0800

[diff] [blame]

1535

Allen Li

2017-03-09 16:01:35 -0800

[diff] [blame]

1536

Allen Li

2017-05-24 16:47:37 -0700

[diff] [blame]

1537

class ProvisionSuite(_BaseSuite):

1538

"""

1539

A suite for provisioning DUTs.

1540

1541

This is done by creating dummy_Pass tests.

"""

def __init__(

self,

tag,

builds,

board,

Allen Li

2017-05-24 16:47:37 -0700

[diff] [blame]

1550

devserver,

Allen Li

2017-07-10 13:00:31 -0700

[diff] [blame]

1551

num_required,

Allen Li

2017-11-06 17:48:09 -0800

[diff] [blame]

1552

num_max=float('inf'),

Allen Li

2017-05-24 16:47:37 -0700

[diff] [blame]

cf_getter=None,

run_prod_code=False,

test_args=None,

test_source_build=None,

Allen Li

aa7f284

2017-07-06 16:06:32 -0700

[diff] [blame]

1557

**kwargs):

Allen Li

2017-05-24 16:47:37 -0700

[diff] [blame]

"""

Constructor

@param tag: a string with which to tag jobs run in this suite.

1562

@param builds: the builds on which we're running this suite.

1563

@param board: the board on which we're running this suite.

Allen Li

2017-05-24 16:47:37 -0700

[diff] [blame]

1564

@param devserver: the devserver which contains the build.

Allen Li

2017-11-06 17:48:09 -0800

[diff] [blame]

1565

@param num_required: number of tests that must pass. This is

1566

capped by the number of tests that are run.

1567

@param num_max: max number of tests to make. By default there

1568

is no cap, a test is created for each eligible host.

Allen Li

2017-05-24 16:47:37 -0700

[diff] [blame]

1569

@param cf_getter: a control_file_getter.ControlFileGetter.

1570

@param test_args: A dict of args passed all the way to each individual

1571

test that will be actually ran.

1572

@param test_source_build: Build that contains the server-side test code.

Allen Li

aa7f284

2017-07-06 16:06:32 -0700

[diff] [blame]

1573

@param kwargs: Various keyword arguments passed to

1574

_BaseSuite constructor.

Allen Li

2017-05-24 16:47:37 -0700

[diff] [blame]

1575

"""

Allen Li

2017-05-24 16:47:37 -0700

[diff] [blame]

1576

super(ProvisionSuite, self).__init__(

Allen Li

2017-11-06 17:48:09 -0800

[diff] [blame]

1577

tests=[],

Allen Li

2017-05-24 16:47:37 -0700

[diff] [blame]

1578

tag=tag,

1579

builds=builds,

1580

board=board,

Allen Li

aa7f284

2017-07-06 16:06:32 -0700

[diff] [blame]

1581

**kwargs)

Allen Li

4f09a6d

2018-01-23 15:00:17 -0800

[diff] [blame]

1582

self._num_successful = 0

1583

self._num_required = 0

1584

self.tests = []

1585

Allen Li

0c0e776

2017-11-17 14:07:43 -0800

[diff] [blame]

1586

static_deps = [dep for dep in self._dependencies

1587

if not provision.Provision.acts_on(dep)]

Allen Li

4f09a6d

2018-01-23 15:00:17 -0800

[diff] [blame]

1588

if 'pool:suites' in static_deps:

1589

logging.info('Provision suite is disabled on suites pool')

1590

return

1591

logging.debug('Looking for hosts matching %r', static_deps)

Allen Li

2017-11-06 17:48:09 -0800

[diff] [blame]

1592

hosts = self._afe.get_hosts(

Allen Li

0c0e776

2017-11-17 14:07:43 -0800

[diff] [blame]

1593

invalid=False, multiple_labels=static_deps)

Allen Li

42e511e

2017-11-13 18:36:34 -0800

[diff] [blame]

1594

logging.debug('Found %d matching hosts for ProvisionSuite', len(hosts))

Allen Li

2017-11-06 17:48:09 -0800

[diff] [blame]

1595

available_hosts = [h for h in hosts if h.is_available()]

Allen Li

42e511e

2017-11-13 18:36:34 -0800

[diff] [blame]

1596

logging.debug('Found %d available hosts for ProvisionSuite',

1597

len(available_hosts))

Allen Li

4f09a6d

2018-01-23 15:00:17 -0800

[diff] [blame]

1598

dummy_test = _load_dummy_test(

1599

builds, devserver, cf_getter,

1600

run_prod_code, test_args, test_source_build)

Allen Li

2017-11-06 17:48:09 -0800

[diff] [blame]

1601

self.tests = [dummy_test] * min(len(available_hosts), num_max)

1602

logging.debug('Made %d tests for ProvisionSuite', len(self.tests))

1603

self._num_required = min(num_required, len(self.tests))

1604

logging.debug('Expecting %d tests to pass for ProvisionSuite',

1605

self._num_required)

Allen Li

2017-07-10 13:00:31 -0700

[diff] [blame]

1606

Prathmesh Prabhu

2017-09-07 17:54:23 +0000

[diff] [blame]

1607

def _handle_result(self, result, record, waiter):

1608

super(ProvisionSuite, self)._handle_result(result, record, waiter)

Allen Li

2017-07-10 13:00:31 -0700

[diff] [blame]

1609

if result.is_good():

1610

self._num_successful += 1

1611

1612

def _finished_waiting(self):

1613

return self._num_successful >= self._num_required

Allen Li

2017-05-24 16:47:37 -0700

[diff] [blame]

1614

1615

1616

def _load_dummy_test(

builds,

devserver,

cf_getter=None,

run_prod_code=False,

test_args=None,

test_source_build=None):

1623

"""

1624

Load and return the dummy pass test.

1625

1626

@param builds: the builds on which we're running this suite.

1627

@param devserver: the devserver which contains the build.

1628

@param cf_getter: a control_file_getter.ControlFileGetter.

1629

@param test_args: A dict of args passed all the way to each individual

1630

test that will be actually ran.

1631

@param test_source_build: Build that contains the server-side test code.

Allen Li

2017-05-24 16:47:37 -0700

[diff] [blame]

1632

"""

1633

if cf_getter is None:

1634

if run_prod_code:

1635

cf_getter = create_fs_getter(_AUTOTEST_DIR)

1636

else:

Xixuan Wu

2018-04-25 17:04:51 -0700

[diff] [blame]

1637

build = suite_common.get_test_source_build(

Allen Li

2017-05-24 16:47:37 -0700

[diff] [blame]

1638

builds, test_source_build=test_source_build)

Richard Barnette

adf0586

2018-06-04 17:37:02 -0700

[diff] [blame]

1639

devserver.stage_artifacts(image=build,

1640

artifacts=['control_files'])

Allen Li

2017-05-24 16:47:37 -0700

[diff] [blame]

1641

cf_getter = _create_ds_getter(build, devserver)

Xixuan Wu

2018-04-30 17:17:10 -0700

[diff] [blame]

1642

retriever = _ControlFileRetriever(cf_getter,

1643

run_prod_code=run_prod_code,

1644

test_args=test_args)

Xixuan Wu

b2cf7fc

2018-05-04 17:37:24 -0700

[diff] [blame]

1645

return retriever.retrieve_for_test('dummy_Pass')

Allen Li