Blame - site_utils/test_push.py - platform/external/autotest

2013-07-25 15:08:48 -0700

[diff] [blame]

#!/usr/bin/python

#

# Use of this source code is governed by a BSD-style license that can be

5

# found in the LICENSE file.

6

7

"""Tool to validate code in prod branch before pushing to lab.

8

9

The script runs push_to_prod suite to verify code in prod branch is ready to be

10

pushed. Link to design document:

11

https://docs.google.com/a/google.com/document/d/1JMz0xS3fZRSHMpFkkKAL_rxsdbNZomhHbC3B8L71uuI/edit

12

13

To verify if prod branch can be pushed to lab, run following command in

Shuqian Zhao

bb030ff

2017-09-21 17:36:13 -0700

[diff] [blame]

14

chromeos-staging-master2.hot server:

Michael Liang

52d9f1f

2014-06-17 15:01:24 -0700

[diff] [blame]

15

/usr/local/autotest/site_utils/test_push.py -e someone@company.com

Dan Shi

2013-07-25 15:08:48 -0700

[diff] [blame]

16

Shuqian Zhao

2016-09-21 11:02:15 -0700

[diff] [blame]

17

The script uses latest gandof stable build as test build by default.

Dan Shi

2013-07-25 15:08:48 -0700

[diff] [blame]

"""

import argparse

Shuqian Zhao

1f311c0

2016-09-01 19:30:54 -0700

[diff] [blame]

22

import ast

Shuqian Zhao

2016-10-25 13:31:06 -0700

[diff] [blame]

23

from contextlib import contextmanager

Dan Shi

2013-07-25 15:08:48 -0700

[diff] [blame]

24

import getpass

Dan Shi

2015-04-07 17:37:09 -0700

[diff] [blame]

25

import multiprocessing

Dan Shi

2013-07-25 15:08:48 -0700

[diff] [blame]

import os

import re

import subprocess

import sys

Dan Shi

2015-04-07 17:37:09 -0700

[diff] [blame]

30

import time

31

import traceback

Dan Shi

2013-07-25 15:08:48 -0700

[diff] [blame]

32

import urllib2

33

34

import common

Dan Shi

2014-05-09 15:18:15 -0700

[diff] [blame]

35

try:

36

from autotest_lib.frontend import setup_django_environment

37

from autotest_lib.frontend.afe import models

Shuqian Zhao

2016-09-12 10:42:03 -0700

[diff] [blame]

38

from autotest_lib.frontend.afe import rpc_utils

Dan Shi

2014-05-09 15:18:15 -0700

[diff] [blame]

39

except ImportError:

40

# Unittest may not have Django database configured and will fail to import.

41

pass

Dan Shi

5fa602c

2015-03-26 17:54:13 -0700

[diff] [blame]

42

from autotest_lib.client.common_lib import global_config

Shuqian Zhao

2016-09-12 10:42:03 -0700

[diff] [blame]

43

from autotest_lib.client.common_lib import priorities

Shuqian Zhao

f239b31

2017-12-05 16:45:02 -0800

[diff] [blame]

44

from autotest_lib.client.common_lib.cros import retry

Xixuan Wu

93e646c

2017-12-07 18:36:10 -0800

[diff] [blame]

45

from autotest_lib.server import constants

Dan Shi

2013-07-25 15:08:48 -0700

[diff] [blame]

46

from autotest_lib.server import site_utils

Shuqian Zhao

2016-09-12 10:42:03 -0700

[diff] [blame]

47

from autotest_lib.server import utils

Dan Shi

2014-12-22 16:25:05 -0800

[diff] [blame]

48

from autotest_lib.server.cros import provision

Dan Shi

2013-07-25 15:08:48 -0700

[diff] [blame]

49

from autotest_lib.server.cros.dynamic_suite import frontend_wrappers

Dan Shi

2013-07-25 15:08:48 -0700

[diff] [blame]

50

Shuqian Zhao

2017-05-30 12:56:57 -0700

[diff] [blame]

51

try:

52

from chromite.lib import metrics

53

from chromite.lib import ts_mon_config

54

except ImportError:

55

metrics = site_utils.metrics_mock

56

ts_mon_config = site_utils.metrics_mock

57

Shuqian Zhao

2016-10-25 13:31:06 -0700

[diff] [blame]

58

AUTOTEST_DIR=common.autotest_dir

Dan Shi

2013-07-25 15:08:48 -0700

[diff] [blame]

59

CONFIG = global_config.global_config

60

Dan Shi

2016-02-03 11:37:02 -0800

[diff] [blame]

61

AFE = frontend_wrappers.RetryingAFE(timeout_min=0.5, delay_sec=2)

Shuqian Zhao

2016-09-12 10:42:03 -0700

[diff] [blame]

62

TKO = frontend_wrappers.RetryingTKO(timeout_min=0.1, delay_sec=10)

Dan Shi

2016-02-03 11:37:02 -0800

[diff] [blame]

63

Dan Shi

2013-07-25 15:08:48 -0700

[diff] [blame]

64

MAIL_FROM = 'chromeos-test@google.com'

Shuqian Zhao

2016-08-31 19:23:17 -0700

[diff] [blame]

65

BUILD_REGEX = 'R[\d]+-[\d]+\.[\d]+\.[\d]+'

Dan Shi

2013-07-25 15:08:48 -0700

[diff] [blame]

66

RUN_SUITE_COMMAND = 'run_suite.py'

67

PUSH_TO_PROD_SUITE = 'push_to_prod'

Jakob Juelich

2014-10-10 14:08:05 -0700

[diff] [blame]

68

DUMMY_SUITE = 'dummy'

Shuqian Zhao

11cedee

2017-09-13 16:51:12 -0700

[diff] [blame]

69

TESTBED_SUITE = 'testbed_push'

Shuqian Zhao

8ac22e8

2016-09-22 14:26:18 -0700

[diff] [blame]

70

# TODO(shuqianz): Dynamically get android build after crbug.com/646068 fixed

xixuan

2d66858

2016-06-10 14:02:32 -0700

[diff] [blame]

71

DEFAULT_TIMEOUT_MIN_FOR_SUITE_JOB = 30

Shuqian Zhao

2016-08-31 19:23:17 -0700

[diff] [blame]

72

IMAGE_BUCKET = CONFIG.get_config_value('CROS', 'image_storage_server')

Xixuan Wu

5c84f2d

2017-09-21 11:01:23 -0700

[diff] [blame]

73

# TODO(crbug.com/767302): Bump up tesbed requirement back to 1 when we

74

# re-enable testbed tests.

Allen Li

2017-11-27 15:33:54 -0800

[diff] [blame]

DEFAULT_NUM_DUTS = (

('gandof', 4),

('quawks', 2),

('testbed', 0),

)

Dan Shi

2013-07-25 15:08:48 -0700

[diff] [blame]

80

Fang Deng

6dddf60

2014-04-17 17:01:47 -0700

[diff] [blame]

81

SUITE_JOB_START_INFO_REGEX = ('^.*Created suite job:.*'

82

'tab_id=view_job&object_id=(\d+)$')

Dan Shi

2013-07-25 15:08:48 -0700

[diff] [blame]

83

84

# Dictionary of test results keyed by test name regular expression.

85

EXPECTED_TEST_RESULTS = {'^SERVER_JOB$': 'GOOD',

86

# This is related to dummy_Fail/control.dependency.

87

'dummy_Fail.dependency$': 'TEST_NA',

Dan Shi

dc9eb17

2014-12-09 16:05:02 -0800

[diff] [blame]

88

'login_LoginSuccess.*': 'GOOD',

Dan Shi

2014-12-22 16:25:05 -0800

[diff] [blame]

89

'provision_AutoUpdate.double': 'GOOD',

Dan Shi

2013-07-25 15:08:48 -0700

[diff] [blame]

90

'dummy_Pass.*': 'GOOD',

91

'dummy_Fail.Fail$': 'FAIL',

92

'dummy_Fail.RetryFail$': 'FAIL',

93

'dummy_Fail.RetrySuccess': 'GOOD',

94

'dummy_Fail.Error$': 'ERROR',

95

'dummy_Fail.Warn$': 'WARN',

96

'dummy_Fail.NAError$': 'TEST_NA',

97

'dummy_Fail.Crash$': 'GOOD',

Aviv Keshet

ff024f9

2017-09-26 13:43:14 -0700

[diff] [blame]

98

'autotest_SyncCount$': 'GOOD',

Dan Shi

2013-07-25 15:08:48 -0700

[diff] [blame]

99

}

100

Jakob Juelich

2014-10-10 14:08:05 -0700

[diff] [blame]

101

EXPECTED_TEST_RESULTS_DUMMY = {'^SERVER_JOB$': 'GOOD',

102

'dummy_Pass.*': 'GOOD',

103

'dummy_Fail.Fail': 'FAIL',

104

'dummy_Fail.Warn': 'WARN',

105

'dummy_Fail.Crash': 'GOOD',

106

'dummy_Fail.Error': 'ERROR',

107

'dummy_Fail.NAError': 'TEST_NA',}

108

Dan Shi

2016-09-09 13:58:31 -0700

[diff] [blame]

109

EXPECTED_TEST_RESULTS_TESTBED = {'^SERVER_JOB$': 'GOOD',

110

'testbed_DummyTest': 'GOOD',}

111

Shuqian Zhao

2016-09-12 10:42:03 -0700

[diff] [blame]

112

EXPECTED_TEST_RESULTS_POWERWASH = {'platform_Powerwash': 'GOOD',

113

'SERVER_JOB': 'GOOD'}

114

Dan Shi

2013-07-25 15:08:48 -0700

[diff] [blame]

115

URL_HOST = CONFIG.get_config_value('SERVER', 'hostname', type=str)

116

URL_PATTERN = CONFIG.get_config_value('CROS', 'log_url_pattern', type=str)

117

Dan Shi

dc9eb17

2014-12-09 16:05:02 -0800

[diff] [blame]

118

# Some test could be missing from the test results for various reasons. Add

119

# such test in this list and explain the reason.

120

IGNORE_MISSING_TESTS = [

121

# For latest build, npo_test_delta does not exist.

122

'autoupdate_EndToEndTest.npo_test_delta.*',

123

# For trybot build, nmo_test_delta does not exist.

124

'autoupdate_EndToEndTest.nmo_test_delta.*',

125

# Older build does not have login_LoginSuccess test in push_to_prod suite.

126

# TODO(dshi): Remove following lines after R41 is stable.

127

'login_LoginSuccess']

128

Aviv Keshet

2017-11-08 13:25:01 -0800

[diff] [blame]

129

# Multiprocessing proxy objects that are used to share data between background

130

# suite-running processes and main process. The multiprocessing-compatible

131

# versions are initialized in _main.

132

_run_suite_output = []

133

_all_suite_ids = []

134

Shuqian Zhao

2016-10-25 13:31:06 -0700

[diff] [blame]

135

# A dict maps the name of the updated repos and the path of them.

136

UPDATED_REPOS = {'autotest': AUTOTEST_DIR,

137

'chromite': '%s/site-packages/chromite/' % AUTOTEST_DIR}

Shuqian Zhao

2016-11-11 16:37:36 -0800

[diff] [blame]

138

PUSH_USER = 'chromeos-test-lab'

Dan Shi

2013-07-25 15:08:48 -0700

[diff] [blame]

139

140

class TestPushException(Exception):

141

"""Exception to be raised when the test to push to prod failed."""

142

pass

143

Shuqian Zhao

f239b31

2017-12-05 16:45:02 -0800

[diff] [blame]

144

@retry.retry(TestPushException, timeout_min=5, delay_sec=30)

145

def check_dut_inventory(required_num_duts, pool):

146

"""Check DUT inventory for each board in the pool specified..

147

148

@param required_num_duts: a dict specifying the number of DUT each platform

149

requires in order to finish push tests.

150

@param pool: the pool used by test_push.

151

@raise TestPushException: if number of DUTs are less than the requirement.

152

"""

153

print 'Checking DUT inventory...'

154

pool_label = constants.Labels.POOL_PREFIX + pool

155

hosts = AFE.run('get_hosts', status='Ready', locked=False)

156

hosts = [h for h in hosts if pool_label in h.get('labels', [])]

157

platforms = [host['platform'] for host in hosts]

158

current_inventory = {p : platforms.count(p) for p in platforms}

159

error_msg = ''

160

for platform, req_num in required_num_duts.items():

161

curr_num = current_inventory.get(platform, 0)

162

if curr_num < req_num:

163

error_msg += ('\nRequire %d %s DUTs in pool: %s, only %d are Ready'

164

' now' % (req_num, platform, pool, curr_num))

165

if error_msg:

166

raise TestPushException('Not enough DUTs to run push tests. %s' %

167

error_msg)

168

Dan Shi

2014-05-09 13:47:00 -0700

[diff] [blame]

169

Shuqian Zhao

2016-09-12 10:42:03 -0700

[diff] [blame]

170

def powerwash_dut_to_test_repair(hostname, timeout):

171

"""Powerwash dut to test repair workflow.

Kevin Cheng

2015-12-11 09:45:57 -0800

[diff] [blame]

172

173

@param hostname: hostname of the dut.

Shuqian Zhao

2016-09-12 10:42:03 -0700

[diff] [blame]

174

@param timeout: seconds of the powerwash test to hit timeout.

175

@raise TestPushException: if DUT fail to run the test.

Kevin Cheng

2015-12-11 09:45:57 -0800

[diff] [blame]

176

"""

Shuqian Zhao

2016-09-12 10:42:03 -0700

[diff] [blame]

177

t = models.Test.objects.get(name='platform_Powerwash')

178

c = utils.read_file(os.path.join(common.autotest_dir, t.path))

179

job_id = rpc_utils.create_job_common(

180

'powerwash', priority=priorities.Priority.SUPER,

181

control_type='Server', control_file=c, hosts=[hostname])

182

Shuqian Zhao

e83a78c

2016-09-16 15:01:25 -0700

[diff] [blame]

183

end = time.time() + timeout

Shuqian Zhao

2016-09-12 10:42:03 -0700

[diff] [blame]

184

while not TKO.get_job_test_statuses_from_db(job_id):

Shuqian Zhao

e83a78c

2016-09-16 15:01:25 -0700

[diff] [blame]

185

if time.time() >= end:

186

AFE.run('abort_host_queue_entries', job=job_id)

Shuqian Zhao

2016-09-12 10:42:03 -0700

[diff] [blame]

187

raise TestPushException(

Shuqian Zhao

e83a78c

2016-09-16 15:01:25 -0700

[diff] [blame]

188

'Powerwash test on %s timeout after %ds, abort it.' %

189

(hostname, timeout))

Shuqian Zhao

2016-09-12 10:42:03 -0700

[diff] [blame]

190

time.sleep(10)

191

verify_test_results(job_id, EXPECTED_TEST_RESULTS_POWERWASH)

192

# Kick off verify, verify will fail and a repair should be triggered.

193

AFE.reverify_hosts(hostnames=[hostname])

Kevin Cheng

2015-12-11 09:45:57 -0800

[diff] [blame]

194

195

Shuqian Zhao

06deae0

2017-02-28 09:55:59 -0800

[diff] [blame]

196

def reverify_all_push_duts():

197

"""Reverify all the push DUTs."""

198

print 'Reverifying all DUTs.'

199

hosts = [h.hostname for h in AFE.get_hosts()]

Shuqian Zhao

d2a99f0

2016-09-22 13:31:30 -0700

[diff] [blame]

200

AFE.reverify_hosts(hostnames=hosts)

201

202

Shuqian Zhao

bb030ff

2017-09-21 17:36:13 -0700

[diff] [blame]

203

def get_default_build(board='gandof', server='chromeos-staging-master2.hot'):

Dan Shi

2014-05-09 13:47:00 -0700

[diff] [blame]

204

"""Get the default build to be used for test.

205

Dan Shi

8df9c00

2016-03-08 15:37:39 -0800

[diff] [blame]

206

@param board: Name of board to be tested, default is gandof.

207

@return: Build to be tested, e.g., gandof-release/R36-5881.0.0

Dan Shi

2014-05-09 13:47:00 -0700

[diff] [blame]

208

"""

Dan Shi

2014-05-09 13:47:00 -0700

[diff] [blame]

209

build = None

Kevin Cheng

2016-12-15 12:17:13 -0800

[diff] [blame]

210

cmd = ('%s/cli/atest stable_version list --board=%s -w %s' %

211

(AUTOTEST_DIR, board, server))

Shuqian Zhao

2016-08-31 19:23:17 -0700

[diff] [blame]

212

result = subprocess.check_output(cmd, shell=True).strip()

213

build = re.search(BUILD_REGEX, result)

214

if build:

215

return '%s-release/%s' % (board, build.group(0))

Dan Shi

2014-05-09 13:47:00 -0700

[diff] [blame]

216

Shuqian Zhao

2016-08-31 19:23:17 -0700

[diff] [blame]

217

# If fail to get stable version from cautotest, use that defined in config

Dan Shi

2014-05-09 13:47:00 -0700

[diff] [blame]

218

build = CONFIG.get_config_value('CROS', 'stable_cros_version')

219

return '%s-release/%s' % (board, build)

220

Dan Shi

2013-07-25 15:08:48 -0700

[diff] [blame]

221

def parse_arguments():

222

"""Parse arguments for test_push tool.

223

224

@return: Parsed arguments.

225

226

"""

227

parser = argparse.ArgumentParser()

Dan Shi

8df9c00

2016-03-08 15:37:39 -0800

[diff] [blame]

228

parser.add_argument('-b', '--board', dest='board', default='gandof',

229

help='Default is gandof.')

Jakob Juelich

2014-10-10 14:08:05 -0700

[diff] [blame]

230

parser.add_argument('-sb', '--shard_board', dest='shard_board',

231

default='quawks',

232

help='Default is quawks.')

Dan Shi

2013-07-25 15:08:48 -0700

[diff] [blame]

233

parser.add_argument('-i', '--build', dest='build', default=None,

Shuqian Zhao

2016-09-21 11:02:15 -0700

[diff] [blame]

234

help='Default is the latest stale build of given '

235

'board. Must be a stable build, otherwise AU test '

236

'will fail. (ex: gandolf-release/R54-8743.25.0)')

Jakob Juelich

2014-10-10 14:08:05 -0700

[diff] [blame]

237

parser.add_argument('-si', '--shard_build', dest='shard_build', default=None,

Shuqian Zhao

2016-09-21 11:02:15 -0700

[diff] [blame]

238

help='Default is the latest stable build of given '

239

'board. Must be a stable build, otherwise AU test '

Jakob Juelich

2014-10-10 14:08:05 -0700

[diff] [blame]

240

'will fail.')

Shuqian Zhao

bb030ff

2017-09-21 17:36:13 -0700

[diff] [blame]

241

parser.add_argument('-w', '--web', default='chromeos-staging-master2.hot',

Kevin Cheng

2016-12-15 12:17:13 -0800

[diff] [blame]

242

help='Specify web server to grab stable version from.')

Dan Shi

2016-09-09 13:58:31 -0700

[diff] [blame]

243

parser.add_argument('-ab', '--android_board', dest='android_board',

Shuqian Zhao

8ac22e8

2016-09-22 14:26:18 -0700

[diff] [blame]

244

default='shamu-2', help='Android board to test.')

Dan Shi

2016-09-09 13:58:31 -0700

[diff] [blame]

245

parser.add_argument('-ai', '--android_build', dest='android_build',

246

help='Android build to test.')

Dan Shi

2013-07-25 15:08:48 -0700

[diff] [blame]

247

parser.add_argument('-p', '--pool', dest='pool', default='bvt')

Shuqian Zhao

2015-08-06 09:46:22 -0700

[diff] [blame]

248

parser.add_argument('-t', '--timeout_min', dest='timeout_min', type=int,

xixuan

2d66858

2016-06-10 14:02:32 -0700

[diff] [blame]

249

default=DEFAULT_TIMEOUT_MIN_FOR_SUITE_JOB,

Shuqian Zhao

2015-08-06 09:46:22 -0700

[diff] [blame]

250

help='Time in mins to wait before abort the jobs we '

251

'are waiting on. Only for the asynchronous suites '

252

'triggered by create_and_return flag.')

Shuqian Zhao

1f311c0

2016-09-01 19:30:54 -0700

[diff] [blame]

253

parser.add_argument('-ud', '--num_duts', dest='num_duts',

Allen Li

2017-11-27 15:33:54 -0800

[diff] [blame]

254

default=dict(DEFAULT_NUM_DUTS),

255

type=ast.literal_eval,

256

help="Python dict literal that specifies the required"

257

" number of DUTs for each board. E.g {'gandof':4}")

Shuqian Zhao

2016-09-21 14:20:50 -0700

[diff] [blame]

258

parser.add_argument('-c', '--continue_on_failure', action='store_true',

259

dest='continue_on_failure',

260

help='All tests continue to run when there is failure')

Dan Shi

2013-07-25 15:08:48 -0700

[diff] [blame]

261

262

arguments = parser.parse_args(sys.argv[1:])

263

Shuqian Zhao

2016-09-21 11:02:15 -0700

[diff] [blame]

264

# Get latest stable build as default build.

Dan Shi

2013-07-25 15:08:48 -0700

[diff] [blame]

265

if not arguments.build:

Kevin Cheng

2016-12-15 12:17:13 -0800

[diff] [blame]

266

arguments.build = get_default_build(arguments.board, arguments.web)

Jakob Juelich

2014-10-10 14:08:05 -0700

[diff] [blame]

267

if not arguments.shard_build:

Kevin Cheng

2016-12-15 12:17:13 -0800

[diff] [blame]

268

arguments.shard_build = get_default_build(arguments.shard_board,

269

arguments.web)

Dan Shi

2013-07-25 15:08:48 -0700

[diff] [blame]

return arguments

Shuqian Zhao

2015-08-06 09:46:22 -0700

[diff] [blame]

274

def do_run_suite(suite_name, arguments, use_shard=False,

Dan Shi

2016-09-09 13:58:31 -0700

[diff] [blame]

275

create_and_return=False, testbed_test=False):

Dan Shi

2013-07-25 15:08:48 -0700

[diff] [blame]

276

"""Call run_suite to run a suite job, and return the suite job id.

277

278

The script waits the suite job to finish before returning the suite job id.

279

Also it will echo the run_suite output to stdout.

280

281

@param suite_name: Name of a suite, e.g., dummy.

282

@param arguments: Arguments for run_suite command.

Jakob Juelich

2014-10-10 14:08:05 -0700

[diff] [blame]

283

@param use_shard: If true, suite is scheduled for shard board.

Shuqian Zhao

2015-08-06 09:46:22 -0700

[diff] [blame]

284

@param create_and_return: If True, run_suite just creates the suite, print

285

the job id, then finish immediately.

Dan Shi

2016-09-09 13:58:31 -0700

[diff] [blame]

286

@param testbed_test: True to run testbed test. Default is False.

Jakob Juelich

2014-10-10 14:08:05 -0700

[diff] [blame]

287

Dan Shi

2013-07-25 15:08:48 -0700

[diff] [blame]

288

@return: Suite job ID.

289

290

"""

Dan Shi

2016-09-09 13:58:31 -0700

[diff] [blame]

291

if use_shard and not testbed_test:

Jakob Juelich

2014-10-10 14:08:05 -0700

[diff] [blame]

292

board = arguments.shard_board

293

build = arguments.shard_build

Dan Shi

2016-09-09 13:58:31 -0700

[diff] [blame]

294

elif testbed_test:

295

board = arguments.android_board

296

build = arguments.android_build

297

else:

298

board = arguments.board

299

build = arguments.build

Jakob Juelich

2014-10-10 14:08:05 -0700

[diff] [blame]

300

Dan Shi

2014-12-22 16:25:05 -0800

[diff] [blame]

301

# Remove cros-version label to force provision.

Shuqian Zhao

7a49f1b

2016-10-24 16:48:04 -0700

[diff] [blame]

302

hosts = AFE.get_hosts(label=constants.Labels.BOARD_PREFIX+board,

303

locked=False)

Dan Shi

2014-12-22 16:25:05 -0800

[diff] [blame]

304

for host in hosts:

Dan Shi

2016-09-09 13:58:31 -0700

[diff] [blame]

305

labels_to_remove = [

306

l for l in host.labels

307

if (l.startswith(provision.CROS_VERSION_PREFIX) or

308

l.startswith(provision.TESTBED_BUILD_VERSION_PREFIX))]

309

if labels_to_remove:

310

AFE.run('host_remove_labels', id=host.id, labels=labels_to_remove)

Dan Shi

2014-12-22 16:25:05 -0800

[diff] [blame]

311

Shuqian Zhao

d01fad0

2016-11-18 10:00:22 -0800

[diff] [blame]

312

# Test repair work flow on shards, powerwash test will timeout after 7m.

Kevin Cheng

2015-12-11 09:45:57 -0800

[diff] [blame]

313

if use_shard and not create_and_return:

Shuqian Zhao

d01fad0

2016-11-18 10:00:22 -0800

[diff] [blame]

314

powerwash_dut_to_test_repair(host.hostname, timeout=420)

Kevin Cheng

2015-12-11 09:45:57 -0800

[diff] [blame]

315

Dan Shi

2015-04-07 17:37:09 -0700

[diff] [blame]

316

current_dir = os.path.dirname(os.path.realpath(__file__))

317

cmd = [os.path.join(current_dir, RUN_SUITE_COMMAND),

Dan Shi

2013-07-25 15:08:48 -0700

[diff] [blame]

318

'-s', suite_name,

Jakob Juelich

2014-10-10 14:08:05 -0700

[diff] [blame]

319

'-b', board,

320

'-i', build,

Dan Shi

2013-07-25 15:08:48 -0700

[diff] [blame]

321

'-p', arguments.pool,

Allen Li

2017-11-27 15:33:54 -0800

[diff] [blame]

322

'--minimum_duts', str(arguments.num_duts[board])]

Shuqian Zhao

2015-08-06 09:46:22 -0700

[diff] [blame]

323

if create_and_return:

324

cmd += ['-c']

Dan Shi

2016-09-09 13:58:31 -0700

[diff] [blame]

325

if testbed_test:

326

cmd += ['--run_prod_code']

Dan Shi

2013-07-25 15:08:48 -0700

[diff] [blame]

327

328

suite_job_id = None

Dan Shi

2013-07-25 15:08:48 -0700

[diff] [blame]

329

330

proc = subprocess.Popen(cmd, stdout=subprocess.PIPE,

331

stderr=subprocess.STDOUT)

332

333

while True:

334

line = proc.stdout.readline()

335

336

# Break when run_suite process completed.

337

if not line and proc.poll() != None:

338

break

339

print line.rstrip()

Aviv Keshet

2017-11-08 13:25:01 -0800

[diff] [blame]

340

_run_suite_output.append(line.rstrip())

Dan Shi

2013-07-25 15:08:48 -0700

[diff] [blame]

341

342

if not suite_job_id:

343

m = re.match(SUITE_JOB_START_INFO_REGEX, line)

344

if m and m.group(1):

345

suite_job_id = int(m.group(1))

Aviv Keshet

2017-11-08 13:25:01 -0800

[diff] [blame]

346

_all_suite_ids.append(suite_job_id)

Dan Shi

2013-07-25 15:08:48 -0700

[diff] [blame]

347

348

if not suite_job_id:

349

raise TestPushException('Failed to retrieve suite job ID.')

Dan Shi

2014-05-09 15:18:15 -0700

[diff] [blame]

350

Shuqian Zhao

2015-08-06 09:46:22 -0700

[diff] [blame]

351

# If create_and_return specified, wait for the suite to finish.

352

if create_and_return:

353

end = time.time() + arguments.timeout_min * 60

Dan Shi

2016-02-03 11:37:02 -0800

[diff] [blame]

354

while not AFE.get_jobs(id=suite_job_id, finished=True):

Shuqian Zhao

2015-08-06 09:46:22 -0700

[diff] [blame]

355

if time.time() < end:

356

time.sleep(10)

357

else:

Dan Shi

2016-02-03 11:37:02 -0800

[diff] [blame]

358

AFE.run('abort_host_queue_entries', job=suite_job_id)

Shuqian Zhao

2015-08-06 09:46:22 -0700

[diff] [blame]

359

raise TestPushException(

360

'Asynchronous suite triggered by create_and_return '

361

'flag has timed out after %d mins. Aborting it.' %

362

arguments.timeout_min)

363

Dan Shi

2014-05-09 15:18:15 -0700

[diff] [blame]

364

print 'Suite job %s is completed.' % suite_job_id

Dan Shi

2013-07-25 15:08:48 -0700

[diff] [blame]

return suite_job_id

Dan Shi

2014-05-09 15:18:15 -0700

[diff] [blame]

368

def check_dut_image(build, suite_job_id):

369

"""Confirm all DUTs used for the suite are imaged to expected build.

370

371

@param build: Expected build to be imaged.

372

@param suite_job_id: job ID of the suite job.

373

@raise TestPushException: If a DUT does not have expected build imaged.

374

"""

375

print 'Checking image installed in DUTs...'

376

job_ids = [job.id for job in

377

models.Job.objects.filter(parent_job_id=suite_job_id)]

378

hqes = [models.HostQueueEntry.objects.filter(job_id=job_id)[0]

379

for job_id in job_ids]

380

hostnames = set([hqe.host.hostname for hqe in hqes])

381

for hostname in hostnames:

Prathmesh Prabhu

f10f41a

2017-04-21 11:52:16 -0700

[diff] [blame]

382

found_build = site_utils.get_build_from_afe(hostname, AFE)

383

if found_build != build:

Dan Shi

2014-05-09 15:18:15 -0700

[diff] [blame]

384

raise TestPushException('DUT is not imaged properly. Host %s has '

385

'build %s, while build %s is expected.' %

Prathmesh Prabhu

f10f41a

2017-04-21 11:52:16 -0700

[diff] [blame]

386

(hostname, found_build, build))

Dan Shi

2014-05-09 15:18:15 -0700

[diff] [blame]

387

388

Shuqian Zhao

2015-08-06 09:46:22 -0700

[diff] [blame]

389

def test_suite(suite_name, expected_results, arguments, use_shard=False,

Dan Shi

2016-09-09 13:58:31 -0700

[diff] [blame]

390

create_and_return=False, testbed_test=False):

Dan Shi

2013-07-25 15:08:48 -0700

[diff] [blame]

391

"""Call run_suite to start a suite job and verify results.

392

393

@param suite_name: Name of a suite, e.g., dummy

394

@param expected_results: A dictionary of test name to test result.

395

@param arguments: Arguments for run_suite command.

Jakob Juelich

2014-10-10 14:08:05 -0700

[diff] [blame]

396

@param use_shard: If true, suite is scheduled for shard board.

Shuqian Zhao

2015-08-06 09:46:22 -0700

[diff] [blame]

397

@param create_and_return: If True, run_suite just creates the suite, print

398

the job id, then finish immediately.

Dan Shi

2016-09-09 13:58:31 -0700

[diff] [blame]

399

@param testbed_test: True to run testbed test. Default is False.

Dan Shi

2013-07-25 15:08:48 -0700

[diff] [blame]

400

"""

Shuqian Zhao

2015-08-06 09:46:22 -0700

[diff] [blame]

401

suite_job_id = do_run_suite(suite_name, arguments, use_shard,

Dan Shi

2016-09-09 13:58:31 -0700

[diff] [blame]

402

create_and_return, testbed_test)

Dan Shi

2013-07-25 15:08:48 -0700

[diff] [blame]

403

Dan Shi

2014-05-09 15:18:15 -0700

[diff] [blame]

404

# Confirm all DUTs used for the suite are imaged to expected build.

Jakob Juelich

2014-10-10 14:08:05 -0700

[diff] [blame]

405

# hqe.host_id for jobs running in shard is not synced back to master db,

406

# therefore, skip verifying dut build for jobs running in shard.

Dan Shi

2016-09-09 13:58:31 -0700

[diff] [blame]

407

build_expected = (arguments.android_build if testbed_test

408

else arguments.build)

Aviv Keshet

d235912

2017-05-03 22:50:10 -0700

[diff] [blame]

409

if not use_shard and not testbed_test:

Dan Shi

2016-09-09 13:58:31 -0700

[diff] [blame]

410

check_dut_image(build_expected, suite_job_id)

Dan Shi

2014-05-09 15:18:15 -0700

[diff] [blame]

411

Shuqian Zhao

2016-09-12 10:42:03 -0700

[diff] [blame]

412

# Verify test results are the expected results.

413

verify_test_results(suite_job_id, expected_results)

414

415

416

def verify_test_results(job_id, expected_results):

417

"""Verify the test results with the expected results.

418

419

@param job_id: id of the running jobs. For suite job, it is suite_job_id.

420

@param expected_results: A dictionary of test name to test result.

421

@raise TestPushException: If verify fails.

422

"""

Dan Shi

2014-05-09 15:18:15 -0700

[diff] [blame]

423

print 'Comparing test results...'

Shuqian Zhao

2016-09-12 10:42:03 -0700

[diff] [blame]

424

test_views = site_utils.get_test_views_from_tko(job_id, TKO)

Dan Shi

2013-07-25 15:08:48 -0700

[diff] [blame]

425

426

mismatch_errors = []

427

extra_test_errors = []

428

429

found_keys = set()

Shuqian Zhao

2016-09-12 10:42:03 -0700

[diff] [blame]

430

for test_name, test_status in test_views.items():

Dan Shi

2013-07-25 15:08:48 -0700

[diff] [blame]

431

print "%s%s" % (test_name.ljust(30), test_status)

Dan Shi

80b6ec0

2016-07-21 15:49:18 -0700

[diff] [blame]

432

# platform_InstallTestImage test may exist in old builds.

433

if re.search('platform_InstallTestImage_SERVER_JOB$', test_name):

434

continue

Dan Shi

2013-07-25 15:08:48 -0700

[diff] [blame]

435

test_found = False

436

for key,val in expected_results.items():

437

if re.search(key, test_name):

438

test_found = True

439

found_keys.add(key)

Dan Shi

2013-07-25 15:08:48 -0700

[diff] [blame]

440

if val != test_status:

441

error = ('%s Expected: [%s], Actual: [%s]' %

442

(test_name, val, test_status))

443

mismatch_errors.append(error)

444

if not test_found:

445

extra_test_errors.append(test_name)

446

447

missing_test_errors = set(expected_results.keys()) - found_keys

Dan Shi

dc9eb17

2014-12-09 16:05:02 -0800

[diff] [blame]

448

for exception in IGNORE_MISSING_TESTS:

449

try:

450

missing_test_errors.remove(exception)

except KeyError:

pass

Dan Shi

2013-07-25 15:08:48 -0700

[diff] [blame]

454

summary = []

455

if mismatch_errors:

456

summary.append(('Results of %d test(s) do not match expected '

457

'values:') % len(mismatch_errors))

458

summary.extend(mismatch_errors)

459

summary.append('\n')

460

461

if extra_test_errors:

462

summary.append('%d test(s) are not expected to be run:' %

463

len(extra_test_errors))

464

summary.extend(extra_test_errors)

465

summary.append('\n')

466

467

if missing_test_errors:

468

summary.append('%d test(s) are missing from the results:' %

469

len(missing_test_errors))

470

summary.extend(missing_test_errors)

471

summary.append('\n')

472

473

# Test link to log can be loaded.

Shuqian Zhao

2016-09-12 10:42:03 -0700

[diff] [blame]

474

job_name = '%s-%s' % (job_id, getpass.getuser())

Dan Shi

2013-07-25 15:08:48 -0700

[diff] [blame]

475

log_link = URL_PATTERN % (URL_HOST, job_name)

476

try:

477

urllib2.urlopen(log_link).read()

478

except urllib2.URLError:

479

summary.append('Failed to load page for link to log: %s.' % log_link)

480

481

if summary:

482

raise TestPushException('\n'.join(summary))

483

484

Dan Shi

2015-04-07 17:37:09 -0700

[diff] [blame]

485

def test_suite_wrapper(queue, suite_name, expected_results, arguments,

Dan Shi

2016-09-09 13:58:31 -0700

[diff] [blame]

486

use_shard=False, create_and_return=False,

487

testbed_test=False):

Dan Shi

2015-04-07 17:37:09 -0700

[diff] [blame]

488

"""Wrapper to call test_suite. Handle exception and pipe it to parent

489

process.

490

491

@param queue: Queue to save exception to be accessed by parent process.

492

@param suite_name: Name of a suite, e.g., dummy

493

@param expected_results: A dictionary of test name to test result.

494

@param arguments: Arguments for run_suite command.

495

@param use_shard: If true, suite is scheduled for shard board.

Shuqian Zhao

2015-08-06 09:46:22 -0700

[diff] [blame]

496

@param create_and_return: If True, run_suite just creates the suite, print

497

the job id, then finish immediately.

Dan Shi

2016-09-09 13:58:31 -0700

[diff] [blame]

498

@param testbed_test: True to run testbed test. Default is False.

Dan Shi

2015-04-07 17:37:09 -0700

[diff] [blame]

499

"""

500

try:

Shuqian Zhao

2015-08-06 09:46:22 -0700

[diff] [blame]

501

test_suite(suite_name, expected_results, arguments, use_shard,

Dan Shi

2016-09-09 13:58:31 -0700

[diff] [blame]

502

create_and_return, testbed_test)

Allen Li

2017-11-27 15:33:54 -0800

[diff] [blame]

503

except Exception:

Dan Shi

2015-04-07 17:37:09 -0700

[diff] [blame]

504

# Store the whole exc_info leads to a PicklingError.

505

except_type, except_value, tb = sys.exc_info()

506

queue.put((except_type, except_value, traceback.extract_tb(tb)))

507

508

Dan Shi

2015-04-07 17:37:09 -0700

[diff] [blame]

509

def check_queue(queue):

510

"""Check the queue for any exception being raised.

511

512

@param queue: Queue used to store exception for parent process to access.

513

@raise: Any exception found in the queue.

"""

if queue.empty():

return

exc_info = queue.get()

518

# Raise the exception with original backtrace.

519

print 'Original stack trace of the exception:\n%s' % exc_info[2]

520

raise exc_info[0](exc_info[1])

521

522

Shuqian Zhao

2016-10-25 13:31:06 -0700

[diff] [blame]

523

def get_head_of_repos(repos):

524

"""Get HEAD of updated repos, currently are autotest and chromite repos

525

526

@param repos: a map of repo name to the path of the repo. E.g.

527

{'autotest': '/usr/local/autotest'}

528

@return: a map of repo names to the current HEAD of that repo.

"""

@contextmanager

def cd(new_wd):

"""Helper function to change working directory.

533

534

@param new_wd: new working directory that switch to.

535

"""

536

prev_wd = os.getcwd()

537

os.chdir(os.path.expanduser(new_wd))

try:

yield

finally:

os.chdir(prev_wd)

updated_repo_heads = {}

544

for repo_name, path_to_repo in repos.iteritems():

545

with cd(path_to_repo):

546

head = subprocess.check_output('git rev-parse HEAD',

547

shell=True).strip()

548

updated_repo_heads[repo_name] = head

549

return updated_repo_heads

550

551

Shuqian Zhao

2016-11-11 16:37:36 -0800

[diff] [blame]

552

def push_prod_next_branch(updated_repo_heads):

553

"""push prod-next branch to the tested HEAD after all tests pass.

554

555

The push command must be ran as PUSH_USER, since only PUSH_USER has the

556

right to push branches.

557

558

@param updated_repo_heads: a map of repo names to tested HEAD of that repo.

559

"""

560

# prod-next branch for every repo is downloaded under PUSH_USER home dir.

Shuqian Zhao

aa0301c

2016-11-21 09:46:41 -0800

[diff] [blame]

561

cmd = ('cd ~/{repo}; git pull; git rebase {hash} prod-next;'

562

'git push origin prod-next')

Shuqian Zhao

2016-11-11 16:37:36 -0800

[diff] [blame]

563

run_push_as_push_user = "sudo su - %s -c '%s'" % (PUSH_USER, cmd)

564

565

for repo_name, test_hash in updated_repo_heads.iteritems():

566

push_cmd = run_push_as_push_user.format(hash=test_hash, repo=repo_name)

567

print 'Pushing %s prod-next branch to %s' % (repo_name, test_hash)

568

print subprocess.check_output(push_cmd, stderr=subprocess.STDOUT,

shell=True)

Shuqian Zhao

2017-05-30 12:56:57 -0700

[diff] [blame]

572

def _main(arguments):

573

"""Running tests.

574

575

@param arguments: command line arguments.

576

"""

Aviv Keshet

2017-11-08 13:25:01 -0800

[diff] [blame]

577

578

# TODO Use chromite.lib.parallel.Manager instead, to workaround the

579

# too-long-tmp-path problem.

580

mpmanager = multiprocessing.Manager()

581

582

_run_suite_output = mpmanager.list()

583

_all_suite_ids = mpmanager.list()

584

Shuqian Zhao

2016-11-11 16:37:36 -0800

[diff] [blame]

585

updated_repo_heads = get_head_of_repos(UPDATED_REPOS)

586

updated_repo_msg = '\n'.join(

587

['%s: %s' % (k, v) for k, v in updated_repo_heads.iteritems()])

Shuqian Zhao

2017-05-30 12:56:57 -0700

[diff] [blame]

588

test_push_success = False

Dan Shi

2013-07-25 15:08:48 -0700

[diff] [blame]

589

590

try:

Shuqian Zhao

2016-09-21 14:20:50 -0700

[diff] [blame]

591

# Use daemon flag will kill child processes when parent process fails.

592

use_daemon = not arguments.continue_on_failure

Shuqian Zhao

6fc7bf4

2016-12-11 19:10:36 -0800

[diff] [blame]

593

# Verify all the DUTs at the beginning of testing push.

Shuqian Zhao

06deae0

2017-02-28 09:55:59 -0800

[diff] [blame]

594

reverify_all_push_duts()

Shuqian Zhao

6fc7bf4

2016-12-11 19:10:36 -0800

[diff] [blame]

595

time.sleep(15) # Wait 15 secs for the verify test to start.

Shuqian Zhao

f239b31

2017-12-05 16:45:02 -0800

[diff] [blame]

596

check_dut_inventory(arguments.num_duts, arguments.pool)

Dan Shi

2015-04-07 17:37:09 -0700

[diff] [blame]

597

queue = multiprocessing.Queue()

598

599

push_to_prod_suite = multiprocessing.Process(

600

target=test_suite_wrapper,

601

args=(queue, PUSH_TO_PROD_SUITE, EXPECTED_TEST_RESULTS,

602

arguments))

Shuqian Zhao

2016-09-21 14:20:50 -0700

[diff] [blame]

603

push_to_prod_suite.daemon = use_daemon

Dan Shi

2015-04-07 17:37:09 -0700

[diff] [blame]

604

push_to_prod_suite.start()

Jakob Juelich

2014-10-10 14:08:05 -0700

[diff] [blame]

605

Shuqian Zhao

2015-08-06 09:46:22 -0700

[diff] [blame]

606

# suite test with --create_and_return flag

607

asynchronous_suite = multiprocessing.Process(

608

target=test_suite_wrapper,

609

args=(queue, DUMMY_SUITE, EXPECTED_TEST_RESULTS_DUMMY,

Aviv Keshet

d235912

2017-05-03 22:50:10 -0700

[diff] [blame]

610

arguments, True, True))

Shuqian Zhao

1b4ca27

2016-09-18 14:58:19 -0700

[diff] [blame]

611

asynchronous_suite.daemon = True

Shuqian Zhao

2015-08-06 09:46:22 -0700

[diff] [blame]

612

asynchronous_suite.start()

613

Aviv Keshet

d235912

2017-05-03 22:50:10 -0700

[diff] [blame]

614

while (push_to_prod_suite.is_alive()

Xixuan Wu

5c84f2d

2017-09-21 11:01:23 -0700

[diff] [blame]

615

or asynchronous_suite.is_alive()):

Dan Shi

2015-04-07 17:37:09 -0700

[diff] [blame]

616

check_queue(queue)

Dan Shi

2015-04-07 17:37:09 -0700

[diff] [blame]

time.sleep(5)

check_queue(queue)

push_to_prod_suite.join()

Shuqian Zhao

2015-08-06 09:46:22 -0700

[diff] [blame]

622

asynchronous_suite.join()

Shuqian Zhao

2016-11-11 16:37:36 -0800

[diff] [blame]

623

624

# All tests pass, push prod-next branch for UPDATED_REPOS.

Shuqian Zhao

aa0301c

2016-11-21 09:46:41 -0800

[diff] [blame]

625

push_prod_next_branch(updated_repo_heads)

Shuqian Zhao

2017-05-30 12:56:57 -0700

[diff] [blame]

626

test_push_success = True

Dan Shi

2013-07-25 15:08:48 -0700

[diff] [blame]

627

except Exception as e:

628

print 'Test for pushing to prod failed:\n'

629

print str(e)

Shuqian Zhao

2016-09-21 14:20:50 -0700

[diff] [blame]

630

# Abort running jobs when choose not to continue when there is failure.

631

if not arguments.continue_on_failure:

Aviv Keshet

2017-11-08 13:25:01 -0800

[diff] [blame]

632

for suite_id in _all_suite_ids:

Shuqian Zhao

2016-09-21 14:20:50 -0700

[diff] [blame]

633

if AFE.get_jobs(id=suite_id, finished=False):

634

AFE.run('abort_host_queue_entries', job=suite_id)

Dan Shi

2013-07-25 15:08:48 -0700

[diff] [blame]

635

raise

Shuqian Zhao

f794c49

2017-01-06 16:27:23 -0800

[diff] [blame]

636

finally:

Shuqian Zhao

2017-05-30 12:56:57 -0700

[diff] [blame]

637

metrics.Counter('chromeos/autotest/test_push/completed').increment(

638

fields={'success': test_push_success})

Shuqian Zhao

d2a99f0

2016-09-22 13:31:30 -0700

[diff] [blame]

639

# Reverify all the hosts

Shuqian Zhao

06deae0

2017-02-28 09:55:59 -0800

[diff] [blame]

640

reverify_all_push_duts()

Dan Shi

2013-07-25 15:08:48 -0700

[diff] [blame]

641

Prathmesh Prabhu

39bf0a6

2017-08-29 22:03:19 -0700

[diff] [blame]

642

message = ('\nAll tests completed successfully, the prod branch of the '

643

'following repos is ready to be pushed to the hash list below.\n'

Aviv Keshet

51172b2

2017-01-30 16:28:57 -0800

[diff] [blame]

644

'%s\n\n\nInstructions for pushing to prod are available at '

Shuqian Zhao

3002e6e

2017-05-02 18:56:14 -0700

[diff] [blame]

645

'https://goto.google.com/autotest-to-prod ' % updated_repo_msg)

Dan Shi

2013-07-25 15:08:48 -0700

[diff] [blame]

646

print message

Dan Shi

2013-07-25 15:08:48 -0700

[diff] [blame]

647

648

Shuqian Zhao

2017-05-30 12:56:57 -0700

[diff] [blame]

649

def main():

650

"""Entry point."""

651

arguments = parse_arguments()

Shuqian Zhao

034d85e

2017-06-01 11:57:39 -0700

[diff] [blame]

652

with ts_mon_config.SetupTsMonGlobalState(service_name='test_push',

653

indirect=True):

Shuqian Zhao

2017-05-30 12:56:57 -0700

[diff] [blame]

654

return _main(arguments)

655

Dan Shi