Blame - server/cros/dynamic_suite.py - platform/external/autotest

2012-02-15 14:21:02 -0800

[diff] [blame^]

1

Chris Masone

2011-10-20 16:36:43 -0700

[diff] [blame]

2

# Use of this source code is governed by a BSD-style license that can be

3

# found in the LICENSE file.

4

5

import common

6

import compiler, logging, os, random, re, time

Chris Masone

2011-12-20 11:06:53 -0800

[diff] [blame]

7

from autotest_lib.client.common_lib import control_data, global_config, error

8

from autotest_lib.client.common_lib import utils

Chris Masone

2012-02-15 14:21:02 -0800

[diff] [blame^]

9

from autotest_lib.server.cros import control_file_getter, frontend_wrappers

Chris Masone

2011-10-20 16:36:43 -0700

[diff] [blame]

10

from autotest_lib.server import frontend

11

12

13

VERSION_PREFIX = 'cros-version-'

Chris Masone

2011-12-20 11:06:53 -0800

[diff] [blame]

14

CONFIG = global_config.global_config

15

16

Chris Masone

8b76425

2012-01-17 11:12:51 -0800

[diff] [blame]

17

def inject_vars(vars, control_file_in):

18

"""

19

Inject the contents of |vars| into |control_file_in|

20

21

@param vars: a dict to shoehorn into the provided control file string.

22

@param control_file_in: the contents of a control file to munge.

23

@return the modified control file string.

24

"""

25

control_file = ''

26

for key, value in vars.iteritems():

27

control_file += "%s='%s'\n" % (key, value)

28

return control_file + control_file_in

29

30

Chris Masone

2011-12-20 11:06:53 -0800

[diff] [blame]

31

def _image_url_pattern():

32

return CONFIG.get_config_value('CROS', 'image_url_pattern', type=str)

33

34

35

def _package_url_pattern():

36

return CONFIG.get_config_value('CROS', 'package_url_pattern', type=str)

37

Chris Masone

2011-10-20 16:36:43 -0700

[diff] [blame]

38

39

class Reimager(object):

40

"""

41

A class that can run jobs to reimage devices.

42

43

@var _afe: a frontend.AFE instance used to talk to autotest.

44

@var _tko: a frontend.TKO instance used to query the autotest results db.

45

@var _cf_getter: a ControlFileGetter used to get the AU control file.

"""

def __init__(self, autotest_dir, afe=None, tko=None):

"""

Constructor

@param autotest_dir: the place to find autotests.

54

@param afe: an instance of AFE as defined in server/frontend.py.

55

@param tko: an instance of TKO as defined in server/frontend.py.

56

"""

Chris Masone

2012-02-15 14:21:02 -0800

[diff] [blame^]

57

self._afe = afe or frontend_wrappers.RetryingAFE(timeout_min=30,

58

delay_sec=10,

59

debug=False)

60

self._tko = tko or frontend_wrappers.RetryingTKO(timeout_min=30,

61

delay_sec=10,

62

debug=False)

Chris Masone

2011-10-20 16:36:43 -0700

[diff] [blame]

63

self._cf_getter = control_file_getter.FileSystemGetter(

64

[os.path.join(autotest_dir, 'server/site_tests')])

65

66

Chris Masone

2011-12-20 11:06:53 -0800

[diff] [blame]

67

def skip(self, g):

68

return 'SKIP_IMAGE' in g and g['SKIP_IMAGE']

69

70

Chris Masone

5552dd7

2012-02-15 15:01:04 -0800

[diff] [blame]

71

def attempt(self, build, board, record, num=None):

Chris Masone

2011-10-20 16:36:43 -0700

[diff] [blame]

72

"""

73

Synchronously attempt to reimage some machines.

74

75

Fire off attempts to reimage |num| machines of type |board|, using an

Chris Masone

2012-01-31 09:27:36 -0800

[diff] [blame]

76

image at |url| called |build|. Wait for completion, polling every

Chris Masone

2011-10-20 16:36:43 -0700

[diff] [blame]

77

10s, and log results with |record| upon completion.

78

Chris Masone

2012-01-31 09:27:36 -0800

[diff] [blame]

79

@param build: the build to install e.g.

80

x86-alex-release/R18-1655.0.0-a1-b1584.

Chris Masone

2011-10-20 16:36:43 -0700

[diff] [blame]

81

@param board: which kind of devices to reimage.

82

@param record: callable that records job status.

83

prototype:

84

record(status, subdir, name, reason)

Chris Masone

5552dd7

2012-02-15 15:01:04 -0800

[diff] [blame]

85

@param num: how many devices to reimage.

Chris Masone

2011-10-20 16:36:43 -0700

[diff] [blame]

86

@return True if all reimaging jobs succeed, false otherwise.

87

"""

Chris Masone

5552dd7

2012-02-15 15:01:04 -0800

[diff] [blame]

88

if not num:

89

num = CONFIG.get_config_value('CROS', 'sharding_factor', type=int)

90

logging.debug("scheduling reiamging across %d machines", num)

Chris Masone

73f6502

2012-01-31 14:00:43 -0800

[diff] [blame]

91

wrapper_job_name = 'try new image'

92

record('START', None, wrapper_job_name)

Chris Masone

2012-01-31 09:27:36 -0800

[diff] [blame]

93

self._ensure_version_label(VERSION_PREFIX + build)

94

canary = self._schedule_reimage_job(build, num, board)

Chris Masone

2011-10-20 16:36:43 -0700

[diff] [blame]

95

logging.debug('Created re-imaging job: %d', canary.id)

96

while len(self._afe.get_jobs(id=canary.id, not_yet_run=True)) > 0:

97

time.sleep(10)

98

logging.debug('Re-imaging job running.')

99

while len(self._afe.get_jobs(id=canary.id, finished=True)) == 0:

100

time.sleep(10)

101

logging.debug('Re-imaging job finished.')

102

canary.result = self._afe.poll_job_results(self._tko, canary, 0)

103

104

if canary.result is True:

105

self._report_results(canary, record)

Chris Masone

73f6502

2012-01-31 14:00:43 -0800

[diff] [blame]

106

record('END GOOD', None, wrapper_job_name)

Chris Masone

2011-10-20 16:36:43 -0700

[diff] [blame]

107

return True

108

109

if canary.result is None:

110

record('FAIL', None, canary.name, 're-imaging tasks did not run')

111

else: # canary.result is False

112

self._report_results(canary, record)

113

Chris Masone

73f6502

2012-01-31 14:00:43 -0800

[diff] [blame]

114

record('END FAIL', None, wrapper_job_name)

Chris Masone

2011-10-20 16:36:43 -0700

[diff] [blame]

return False

def _ensure_version_label(self, name):

119

"""

120

Ensure that a label called |name| exists in the autotest DB.

121

122

@param name: the label to check for/create.

123

"""

124

labels = self._afe.get_labels(name=name)

125

if len(labels) == 0:

126

self._afe.create_label(name=name)

127

128

Chris Masone

2012-01-31 09:27:36 -0800

[diff] [blame]

129

def _schedule_reimage_job(self, build, num_machines, board):

Chris Masone

2011-10-20 16:36:43 -0700

[diff] [blame]

"""

Sends an RPC to the autotest frontend to enqueue reimaging jobs on

134

|num_machines| devices of type |board|

135

Chris Masone

2012-01-31 09:27:36 -0800

[diff] [blame]

136

@param build: the build to install (must be unique).

Chris Masone

2011-12-20 11:06:53 -0800

[diff] [blame]

137

@param num_machines: how many devices to reimage.

Chris Masone

2011-10-20 16:36:43 -0700

[diff] [blame]

138

@param board: which kind of devices to reimage.

139

@return a frontend.Job object for the reimaging job we scheduled.

140

"""

Chris Masone

8b76425

2012-01-17 11:12:51 -0800

[diff] [blame]

141

control_file = inject_vars(

Chris Masone

2012-01-31 09:27:36 -0800

[diff] [blame]

142

{'image_url': _image_url_pattern() % build, 'image_name': build},

Chris Masone

2011-10-20 16:36:43 -0700

[diff] [blame]

143

self._cf_getter.get_control_file_contents_by_name('autoupdate'))

144

Chris Masone

2011-12-20 11:06:53 -0800

[diff] [blame]

145

return self._afe.create_job(control_file=control_file,

Chris Masone

2012-01-31 09:27:36 -0800

[diff] [blame]

146

name=build + '-try',

Chris Masone

2011-12-20 11:06:53 -0800

[diff] [blame]

147

control_type='Server',

148

meta_hosts=[board] * num_machines)

Chris Masone

2011-10-20 16:36:43 -0700

[diff] [blame]

149

150

151

def _report_results(self, job, record):

152

"""

153

Record results from a completed frontend.Job object.

154

155

@param job: a completed frontend.Job object populated by

156

frontend.AFE.poll_job_results.

157

@param record: callable that records job status.

158

prototype:

159

record(status, subdir, name, reason)

160

"""

161

if job.result == True:

162

record('GOOD', None, job.name)

163

return

164

165

for platform in job.results_platform_map:

166

for status in job.results_platform_map[platform]:

167

if status == 'Total':

168

continue

169

for host in job.results_platform_map[platform][status]:

170

if host not in job.test_status:

171

record('ERROR', None, host, 'Job failed to run.')

172

elif status == 'Failed':

173

for test_status in job.test_status[host].fail:

174

record('FAIL', None, host, test_status.reason)

175

elif status == 'Aborted':

176

for test_status in job.test_status[host].fail:

177

record('ABORT', None, host, test_status.reason)

178

elif status == 'Completed':

179

record('GOOD', None, host)

class Suite(object):

"""

A suite of tests, defined by some predicate over control file variables.

185

186

Given a place to search for control files a predicate to match the desired

187

tests, can gather tests and fire off jobs to run them, and then wait for

188

results.

189

190

@var _predicate: a function that should return True when run over a

191

ControlData representation of a control file that should be in

192

this Suite.

193

@var _tag: a string with which to tag jobs run in this suite.

194

@var _afe: an instance of AFE as defined in server/frontend.py.

195

@var _tko: an instance of TKO as defined in server/frontend.py.

196

@var _jobs: currently scheduled jobs, if any.

197

@var _cf_getter: a control_file_getter.ControlFileGetter

"""

Chris Masone

2012-01-17 11:16:32 -0800

[diff] [blame]

201

@staticmethod

202

def create_fs_getter(autotest_dir):

203

"""

204

@param autotest_dir: the place to find autotests.

205

@return a FileSystemGetter instance that looks under |autotest_dir|.

206

"""

207

# currently hard-coded places to look for tests.

208

subpaths = ['server/site_tests', 'client/site_tests']

209

directories = [os.path.join(autotest_dir, p) for p in subpaths]

210

return control_file_getter.FileSystemGetter(directories)

@staticmethod

def create_from_name(name, autotest_dir, afe=None, tko=None):

Chris Masone

2011-10-20 16:36:43 -0700

[diff] [blame]

215

"""

216

Create a Suite using a predicate based on the SUITE control file var.

217

218

Makes a predicate based on |name| and uses it to instantiate a Suite

219

that looks for tests in |autotest_dir| and will schedule them using

220

|afe|. Results will be pulled from |tko| upon completion

221

222

@param name: a value of the SUITE control file variable to search for.

223

@param autotest_dir: the place to find autotests.

224

@param afe: an instance of AFE as defined in server/frontend.py.

225

@param tko: an instance of TKO as defined in server/frontend.py.

226

@return a Suite instance.

227

"""

228

return Suite(lambda t: hasattr(t, 'suite') and t.suite == name,

229

name, autotest_dir, afe, tko)

230

231

232

def __init__(self, predicate, tag, autotest_dir, afe=None, tko=None):

"""

Constructor

@param predicate: a function that should return True when run over a

237

ControlData representation of a control file that should be in

238

this Suite.

239

@param tag: a string with which to tag jobs run in this suite.

240

@param autotest_dir: the place to find autotests.

241

@param afe: an instance of AFE as defined in server/frontend.py.

242

@param tko: an instance of TKO as defined in server/frontend.py.

243

"""

244

self._predicate = predicate

245

self._tag = tag

Chris Masone

2012-02-15 14:21:02 -0800

[diff] [blame^]

246

self._afe = afe or frontend_wrappers.RetryingAFE(timeout_min=30,

247

delay_sec=10,

248

debug=False)

249

self._tko = tko or frontend_wrappers.RetryingTKO(timeout_min=30,

250

delay_sec=10,

251

debug=False)

Chris Masone

2011-10-20 16:36:43 -0700

[diff] [blame]

252

self._jobs = []

253

Chris Masone

fef2138

2012-01-17 11:16:32 -0800

[diff] [blame]

254

self._cf_getter = Suite.create_fs_getter(autotest_dir)

Chris Masone

2011-10-20 16:36:43 -0700

[diff] [blame]

255

256

self._tests = Suite.find_and_parse_tests(self._cf_getter,

257

self._predicate,

258

add_experimental=True)

@property

def tests(self):

"""

A list of ControlData objects in the suite, with added |text| attr.

"""

return self._tests

def stable_tests(self):

270

"""

271

|self.tests|, filtered for non-experimental tests.

272

"""

273

return filter(lambda t: not t.experimental, self.tests)

274

275

276

def unstable_tests(self):

277

"""

278

|self.tests|, filtered for experimental tests.

279

"""

280

return filter(lambda t: t.experimental, self.tests)

281

282

283

def _create_job(self, test, image_name):

284

"""

285

Thin wrapper around frontend.AFE.create_job().

286

287

@param test: ControlData object for a test to run.

288

@param image_name: the name of an image against which to test.

289

@return frontend.Job object for the job just scheduled.

290

"""

291

return self._afe.create_job(

292

control_file=test.text,

293

name='/'.join([image_name, self._tag, test.name]),

294

control_type=test.test_type.capitalize(),

295

meta_hosts=[VERSION_PREFIX+image_name])

296

297

298

def run_and_wait(self, image_name, record, add_experimental=True):

299

"""

300

Synchronously run tests in |self.tests|.

301

302

Schedules tests against a device running image |image_name|, and

303

then polls for status, using |record| to print status when each

304

completes.

305

306

Tests returned by self.stable_tests() will always be run, while tests

307

in self.unstable_tests() will only be run if |add_experimental| is true.

308

309

@param image_name: the name of an image against which to test.

310

@param record: callable that records job status.

311

prototype:

312

record(status, subdir, name, reason)

313

@param add_experimental: schedule experimental tests as well, or not.

314

"""

315

try:

Scott Zawalski

2012-02-10 18:29:12 -0500

[diff] [blame]

316

record('INFO', None, 'Start %s' % self._tag)

Chris Masone

2011-10-20 16:36:43 -0700

[diff] [blame]

317

self.schedule(image_name, add_experimental)

318

try:

319

for result in self.wait_for_results():

Scott Zawalski

2012-02-10 18:29:12 -0500

[diff] [blame]

320

# |result| will be a tuple of a maximum of 4 entries and a

321

# minimum of 3. We use the first 3 for START and END

322

# entries so we separate those variables out for legible

323

# variable names, nothing more.

324

status = result[0]

325

test_name = result[2]

326

record('START', None, test_name)

Chris Masone

2011-10-20 16:36:43 -0700

[diff] [blame]

327

record(*result)

Scott Zawalski

2012-02-10 18:29:12 -0500

[diff] [blame]

328

record('END %s' % status, None, test_name)

Chris Masone

2011-10-20 16:36:43 -0700

[diff] [blame]

329

except Exception as e:

330

logging.error(e)

Scott Zawalski

2012-02-10 18:29:12 -0500

[diff] [blame]

331

record('FAIL', None, self._tag,

332

'Exception waiting for results')

Chris Masone

2011-10-20 16:36:43 -0700

[diff] [blame]

333

except Exception as e:

334

logging.error(e)

Scott Zawalski

2012-02-10 18:29:12 -0500

[diff] [blame]

335

record('FAIL', None, self._tag,

336

'Exception while scheduling suite')

Chris Masone

2011-10-20 16:36:43 -0700

[diff] [blame]

337

338

339

def schedule(self, image_name, add_experimental=True):

340

"""

341

Schedule jobs using |self._afe|.

342

343

frontend.Job objects representing each scheduled job will be put in

344

|self._jobs|.

345

346

@param image_name: the name of an image against which to test.

347

@param add_experimental: schedule experimental tests as well, or not.

348

"""

349

for test in self.stable_tests():

350

logging.debug('Scheduling %s', test.name)

351

self._jobs.append(self._create_job(test, image_name))

352

353

if add_experimental:

354

# TODO(cmasone): ensure I can log results from these differently.

355

for test in self.unstable_tests():

356

logging.debug('Scheduling %s', test.name)

357

self._jobs.append(self._create_job(test, image_name))

358

359

360

def _status_is_relevant(self, status):

361

"""

362

Indicates whether the status of a given test is meaningful or not.

363

364

@param status: frontend.TestStatus object to look at.

365

@return True if this is a test result worth looking at further.

366

"""

367

return not (status.test_name.startswith('SERVER_JOB') or

368

status.test_name.startswith('CLIENT_JOB'))

369

370

371

def _collate_aborted(self, current_value, entry):

372

"""

373

reduce() over a list of HostQueueEntries for a job; True if any aborted.

374

375

Functor that can be reduced()ed over a list of

376

HostQueueEntries for a job. If any were aborted

377

(|entry.aborted| exists and is True), then the reduce() will

return True.

Ex:

entries = self._afe.run('get_host_queue_entries', job=job.id)

382

reduce(self._collate_aborted, entries, False)

383

384

@param current_value: the current accumulator (a boolean).

385

@param entry: the current entry under consideration.

386

@return the value of |entry.aborted| if it exists, False if not.

387

"""

388

return current_value or ('aborted' in entry and entry['aborted'])

389

390

391

def wait_for_results(self):

392

"""

393

Wait for results of all tests in all jobs in |self._jobs|.

394

395

Currently polls for results every 5s. When all results are available,

396

@return a list of tuples, one per test: (status, subdir, name, reason)

397

"""

Chris Masone

2011-10-20 16:36:43 -0700

[diff] [blame]

398

while self._jobs:

399

for job in list(self._jobs):

400

if not self._afe.get_jobs(id=job.id, finished=True):

401

continue

402

403

self._jobs.remove(job)

404

405

entries = self._afe.run('get_host_queue_entries', job=job.id)

406

if reduce(self._collate_aborted, entries, False):

Scott Zawalski

2012-02-10 18:29:12 -0500

[diff] [blame]

407

yield('ABORT', None, job.name)

Chris Masone

2011-10-20 16:36:43 -0700

[diff] [blame]

408

else:

409

statuses = self._tko.get_status_counts(job=job.id)

410

for s in filter(self._status_is_relevant, statuses):

Scott Zawalski

2012-02-10 18:29:12 -0500

[diff] [blame]

411

yield(s.status, None, s.test_name, s.reason)

Chris Masone

2011-10-20 16:36:43 -0700

[diff] [blame]

412

time.sleep(5)

413

Chris Masone

2011-10-20 16:36:43 -0700

[diff] [blame]

414

Chris Masone

fef2138

2012-01-17 11:16:32 -0800

[diff] [blame]

415

@staticmethod

416

def find_and_parse_tests(cf_getter, predicate, add_experimental=False):

Chris Masone