Blame - site_utils/test_push.py - platform/external/autotest

2013-07-25 15:08:48 -0700

[diff] [blame]

#!/usr/bin/python

#

# Use of this source code is governed by a BSD-style license that can be

5

# found in the LICENSE file.

6

7

"""Tool to validate code in prod branch before pushing to lab.

8

9

The script runs push_to_prod suite to verify code in prod branch is ready to be

10

pushed. Link to design document:

11

https://docs.google.com/a/google.com/document/d/1JMz0xS3fZRSHMpFkkKAL_rxsdbNZomhHbC3B8L71uuI/edit

12

13

To verify if prod branch can be pushed to lab, run following command in

14

chromeos-autotest.cbf server:

Michael Liang

52d9f1f

2014-06-17 15:01:24 -0700

[diff] [blame]

15

/usr/local/autotest/site_utils/test_push.py -e someone@company.com

Dan Shi

2013-07-25 15:08:48 -0700

[diff] [blame]

16

Shuqian Zhao

2016-09-21 11:02:15 -0700

[diff] [blame]

17

The script uses latest gandof stable build as test build by default.

Dan Shi

2013-07-25 15:08:48 -0700

[diff] [blame]

"""

import argparse

Shuqian Zhao

2016-09-01 19:30:54 -0700

[diff] [blame]

22

import ast

Shuqian Zhao

2016-10-25 13:31:06 -0700

[diff] [blame]

23

from contextlib import contextmanager

Dan Shi

2013-07-25 15:08:48 -0700

[diff] [blame]

24

import getpass

Dan Shi

2015-04-07 17:37:09 -0700

[diff] [blame]

25

import multiprocessing

Dan Shi

2013-07-25 15:08:48 -0700

[diff] [blame]

import os

import re

import subprocess

import sys

Dan Shi

2015-04-07 17:37:09 -0700

[diff] [blame]

30

import time

31

import traceback

Dan Shi

2013-07-25 15:08:48 -0700

[diff] [blame]

32

import urllib2

33

34

import common

Dan Shi

2014-05-09 15:18:15 -0700

[diff] [blame]

35

try:

36

from autotest_lib.frontend import setup_django_environment

37

from autotest_lib.frontend.afe import models

Shuqian Zhao

2016-09-12 10:42:03 -0700

[diff] [blame]

38

from autotest_lib.frontend.afe import rpc_utils

Dan Shi

2014-05-09 15:18:15 -0700

[diff] [blame]

39

except ImportError:

40

# Unittest may not have Django database configured and will fail to import.

41

pass

Dan Shi

2015-03-26 17:54:13 -0700

[diff] [blame]

42

from autotest_lib.client.common_lib import global_config

Shuqian Zhao

2016-09-12 10:42:03 -0700

[diff] [blame]

43

from autotest_lib.client.common_lib import priorities

Dan Shi

2013-07-25 15:08:48 -0700

[diff] [blame]

44

from autotest_lib.server import site_utils

Shuqian Zhao

2016-09-12 10:42:03 -0700

[diff] [blame]

45

from autotest_lib.server import utils

Dan Shi

2014-12-22 16:25:05 -0800

[diff] [blame]

46

from autotest_lib.server.cros import provision

Dan Shi

2013-07-25 15:08:48 -0700

[diff] [blame]

47

from autotest_lib.server.cros.dynamic_suite import frontend_wrappers

Dan Shi

2015-03-26 17:54:13 -0700

[diff] [blame]

48

from autotest_lib.site_utils import gmail_lib

Dan Shi

2014-12-22 16:25:05 -0800

[diff] [blame]

49

from autotest_lib.site_utils.suite_scheduler import constants

Dan Shi

2013-07-25 15:08:48 -0700

[diff] [blame]

50

Shuqian Zhao

2016-10-25 13:31:06 -0700

[diff] [blame]

51

AUTOTEST_DIR=common.autotest_dir

Dan Shi

2013-07-25 15:08:48 -0700

[diff] [blame]

52

CONFIG = global_config.global_config

53

Dan Shi

2016-02-03 11:37:02 -0800

[diff] [blame]

54

AFE = frontend_wrappers.RetryingAFE(timeout_min=0.5, delay_sec=2)

Shuqian Zhao

2016-09-12 10:42:03 -0700

[diff] [blame]

55

TKO = frontend_wrappers.RetryingTKO(timeout_min=0.1, delay_sec=10)

Dan Shi

2016-02-03 11:37:02 -0800

[diff] [blame]

56

Dan Shi

2013-07-25 15:08:48 -0700

[diff] [blame]

57

MAIL_FROM = 'chromeos-test@google.com'

Shuqian Zhao

2016-08-31 19:23:17 -0700

[diff] [blame]

58

BUILD_REGEX = 'R[\d]+-[\d]+\.[\d]+\.[\d]+'

Dan Shi

2013-07-25 15:08:48 -0700

[diff] [blame]

59

RUN_SUITE_COMMAND = 'run_suite.py'

60

PUSH_TO_PROD_SUITE = 'push_to_prod'

Jakob Juelich

2014-10-10 14:08:05 -0700

[diff] [blame]

61

DUMMY_SUITE = 'dummy'

Shuqian Zhao

2016-09-21 11:02:15 -0700

[diff] [blame]

62

AU_SUITE = 'paygen_au_beta'

Dan Shi

2016-09-09 13:58:31 -0700

[diff] [blame]

63

TESTBED_SUITE = 'testbed_push'

Shuqian Zhao

2016-09-22 14:26:18 -0700

[diff] [blame]

64

# TODO(shuqianz): Dynamically get android build after crbug.com/646068 fixed

xixuan

2d66858

2016-06-10 14:02:32 -0700

[diff] [blame]

65

DEFAULT_TIMEOUT_MIN_FOR_SUITE_JOB = 30

Shuqian Zhao

2016-08-31 19:23:17 -0700

[diff] [blame]

66

IMAGE_BUCKET = CONFIG.get_config_value('CROS', 'image_storage_server')

Shuqian Zhao

2016-09-22 14:26:18 -0700

[diff] [blame]

67

DEFAULT_EMAIL = CONFIG.get_config_value(

68

'SCHEDULER', 'notify_email', type=str, default='')

Shuqian Zhao

2016-11-18 11:13:16 -0800

[diff] [blame^]

69

DEFAULT_NUM_DUTS = "{'gandof': 4, 'quawks': 2, 'testbed': 2}"

Dan Shi

2013-07-25 15:08:48 -0700

[diff] [blame]

70

Fang Deng

6dddf60

2014-04-17 17:01:47 -0700

[diff] [blame]

71

SUITE_JOB_START_INFO_REGEX = ('^.*Created suite job:.*'

72

'tab_id=view_job&object_id=(\d+)$')

Dan Shi

2013-07-25 15:08:48 -0700

[diff] [blame]

73

74

# Dictionary of test results keyed by test name regular expression.

75

EXPECTED_TEST_RESULTS = {'^SERVER_JOB$': 'GOOD',

76

# This is related to dummy_Fail/control.dependency.

77

'dummy_Fail.dependency$': 'TEST_NA',

Dan Shi

dc9eb17

2014-12-09 16:05:02 -0800

[diff] [blame]

78

'login_LoginSuccess.*': 'GOOD',

Dan Shi

2014-12-22 16:25:05 -0800

[diff] [blame]

79

'provision_AutoUpdate.double': 'GOOD',

Dan Shi

2013-07-25 15:08:48 -0700

[diff] [blame]

80

'dummy_Pass.*': 'GOOD',

81

'dummy_Fail.Fail$': 'FAIL',

82

'dummy_Fail.RetryFail$': 'FAIL',

83

'dummy_Fail.RetrySuccess': 'GOOD',

84

'dummy_Fail.Error$': 'ERROR',

85

'dummy_Fail.Warn$': 'WARN',

86

'dummy_Fail.NAError$': 'TEST_NA',

87

'dummy_Fail.Crash$': 'GOOD',

88

}

89

Jakob Juelich

2014-10-10 14:08:05 -0700

[diff] [blame]

90

EXPECTED_TEST_RESULTS_DUMMY = {'^SERVER_JOB$': 'GOOD',

91

'dummy_Pass.*': 'GOOD',

92

'dummy_Fail.Fail': 'FAIL',

93

'dummy_Fail.Warn': 'WARN',

94

'dummy_Fail.Crash': 'GOOD',

95

'dummy_Fail.Error': 'ERROR',

96

'dummy_Fail.NAError': 'TEST_NA',}

97

Dan Shi

2013-07-25 15:08:48 -0700

[diff] [blame]

98

EXPECTED_TEST_RESULTS_AU = {'SERVER_JOB$': 'GOOD',

Shuqian Zhao

2016-09-21 11:02:15 -0700

[diff] [blame]

99

'autoupdate_EndToEndTest.paygen_au_beta_delta.*': 'GOOD',

100

'autoupdate_EndToEndTest.paygen_au_beta_full.*': 'GOOD',

Dan Shi

2013-07-25 15:08:48 -0700

[diff] [blame]

101

}

102

Dan Shi

2016-09-09 13:58:31 -0700

[diff] [blame]

103

EXPECTED_TEST_RESULTS_TESTBED = {'^SERVER_JOB$': 'GOOD',

104

'testbed_DummyTest': 'GOOD',}

105

Shuqian Zhao

2016-09-12 10:42:03 -0700

[diff] [blame]

106

EXPECTED_TEST_RESULTS_POWERWASH = {'platform_Powerwash': 'GOOD',

107

'SERVER_JOB': 'GOOD'}

108

Dan Shi

2013-07-25 15:08:48 -0700

[diff] [blame]

109

URL_HOST = CONFIG.get_config_value('SERVER', 'hostname', type=str)

110

URL_PATTERN = CONFIG.get_config_value('CROS', 'log_url_pattern', type=str)

111

Dan Shi

dc9eb17

2014-12-09 16:05:02 -0800

[diff] [blame]

112

# Some test could be missing from the test results for various reasons. Add

113

# such test in this list and explain the reason.

114

IGNORE_MISSING_TESTS = [

115

# For latest build, npo_test_delta does not exist.

116

'autoupdate_EndToEndTest.npo_test_delta.*',

117

# For trybot build, nmo_test_delta does not exist.

118

'autoupdate_EndToEndTest.nmo_test_delta.*',

119

# Older build does not have login_LoginSuccess test in push_to_prod suite.

120

# TODO(dshi): Remove following lines after R41 is stable.

121

'login_LoginSuccess']

122

Dan Shi

2013-07-25 15:08:48 -0700

[diff] [blame]

123

# Save all run_suite command output.

Shuqian Zhao

7b68219

2016-09-16 14:38:41 -0700

[diff] [blame]

124

manager = multiprocessing.Manager()

125

run_suite_output = manager.list()

Shuqian Zhao

1b4ca27

2016-09-18 14:58:19 -0700

[diff] [blame]

126

all_suite_ids = manager.list()

Shuqian Zhao

2016-10-25 13:31:06 -0700

[diff] [blame]

127

# A dict maps the name of the updated repos and the path of them.

128

UPDATED_REPOS = {'autotest': AUTOTEST_DIR,

129

'chromite': '%s/site-packages/chromite/' % AUTOTEST_DIR}

Shuqian Zhao

2016-11-11 16:37:36 -0800

[diff] [blame]

130

PUSH_USER = 'chromeos-test-lab'

Dan Shi

2013-07-25 15:08:48 -0700

[diff] [blame]

131

132

class TestPushException(Exception):

133

"""Exception to be raised when the test to push to prod failed."""

134

pass

135

Dan Shi

2014-05-09 13:47:00 -0700

[diff] [blame]

136

Shuqian Zhao

2016-09-01 19:30:54 -0700

[diff] [blame]

137

def check_dut_inventory(required_num_duts):

138

"""Check DUT inventory for each board.

139

Shuqian Zhao

2016-11-18 11:13:16 -0800

[diff] [blame^]

140

@param required_num_duts: a dict specifying the number of DUT each platform

Shuqian Zhao

2016-09-01 19:30:54 -0700

[diff] [blame]

141

requires in order to finish push tests.

142

@raise TestPushException: if number of DUTs are less than the requirement.

143

"""

144

hosts = AFE.run('get_hosts', status='Ready', locked=False)

Shuqian Zhao

2016-11-18 11:13:16 -0800

[diff] [blame^]

145

platforms = [host['platform'] for host in hosts]

146

current_inventory = {p : platforms.count(p) for p in platforms}

Shuqian Zhao

2016-09-01 19:30:54 -0700

[diff] [blame]

147

error_msg = ''

Shuqian Zhao

2016-11-18 11:13:16 -0800

[diff] [blame^]

148

for platform, req_num in required_num_duts.items():

149

curr_num = current_inventory.get(platform, 0)

Shuqian Zhao

2016-09-01 19:30:54 -0700

[diff] [blame]

150

if curr_num < req_num:

151

error_msg += ('\nRequire %d %s DUTs, only %d are Ready now' %

Shuqian Zhao

2016-11-18 11:13:16 -0800

[diff] [blame^]

152

(req_num, platform, curr_num))

Shuqian Zhao

2016-09-01 19:30:54 -0700

[diff] [blame]

153

if error_msg:

154

raise TestPushException('Not enough DUTs to run push tests. %s' %

error_msg)

Shuqian Zhao

2016-09-12 10:42:03 -0700

[diff] [blame]

158

def powerwash_dut_to_test_repair(hostname, timeout):

159

"""Powerwash dut to test repair workflow.

Kevin Cheng

2015-12-11 09:45:57 -0800

[diff] [blame]

160

161

@param hostname: hostname of the dut.

Shuqian Zhao

2016-09-12 10:42:03 -0700

[diff] [blame]

162

@param timeout: seconds of the powerwash test to hit timeout.

163

@raise TestPushException: if DUT fail to run the test.

Kevin Cheng

2015-12-11 09:45:57 -0800

[diff] [blame]

164

"""

Shuqian Zhao

2016-09-12 10:42:03 -0700

[diff] [blame]

165

t = models.Test.objects.get(name='platform_Powerwash')

166

c = utils.read_file(os.path.join(common.autotest_dir, t.path))

167

job_id = rpc_utils.create_job_common(

168

'powerwash', priority=priorities.Priority.SUPER,

169

control_type='Server', control_file=c, hosts=[hostname])

170

Shuqian Zhao

e83a78c

2016-09-16 15:01:25 -0700

[diff] [blame]

171

end = time.time() + timeout

Shuqian Zhao

2016-09-12 10:42:03 -0700

[diff] [blame]

172

while not TKO.get_job_test_statuses_from_db(job_id):

Shuqian Zhao

e83a78c

2016-09-16 15:01:25 -0700

[diff] [blame]

173

if time.time() >= end:

174

AFE.run('abort_host_queue_entries', job=job_id)

Shuqian Zhao

2016-09-12 10:42:03 -0700

[diff] [blame]

175

raise TestPushException(

Shuqian Zhao

e83a78c

2016-09-16 15:01:25 -0700

[diff] [blame]

176

'Powerwash test on %s timeout after %ds, abort it.' %

177

(hostname, timeout))

Shuqian Zhao

2016-09-12 10:42:03 -0700

[diff] [blame]

178

time.sleep(10)

179

verify_test_results(job_id, EXPECTED_TEST_RESULTS_POWERWASH)

180

# Kick off verify, verify will fail and a repair should be triggered.

181

AFE.reverify_hosts(hostnames=[hostname])

Kevin Cheng

2015-12-11 09:45:57 -0800

[diff] [blame]

182

183

Shuqian Zhao

d2a99f0

2016-09-22 13:31:30 -0700

[diff] [blame]

184

def reverify_all_push_duts(pool):

185

"""Reverify all the push DUTs.

186

187

@param pool: Name of the pool used by test_push.

188

"""

189

pool_label = constants.Labels.POOL_PREFIX + pool

190

hosts = [h.hostname for h in AFE.get_hosts(label=pool_label)]

191

AFE.reverify_hosts(hostnames=hosts)

192

193

Shuqian Zhao

2016-08-31 19:23:17 -0700

[diff] [blame]

194

def get_default_build(board='gandof'):

Dan Shi

2014-05-09 13:47:00 -0700

[diff] [blame]

195

"""Get the default build to be used for test.

196

Dan Shi

8df9c00

2016-03-08 15:37:39 -0800

[diff] [blame]

197

@param board: Name of board to be tested, default is gandof.

198

@return: Build to be tested, e.g., gandof-release/R36-5881.0.0

Dan Shi

2014-05-09 13:47:00 -0700

[diff] [blame]

199

"""

Dan Shi

2014-05-09 13:47:00 -0700

[diff] [blame]

200

build = None

Shuqian Zhao

2016-08-31 19:23:17 -0700

[diff] [blame]

201

cmd = ('%s/cli/atest stable_version list --board=%s -w cautotest' %

202

(AUTOTEST_DIR, board))

203

result = subprocess.check_output(cmd, shell=True).strip()

204

build = re.search(BUILD_REGEX, result)

205

if build:

206

return '%s-release/%s' % (board, build.group(0))

Dan Shi

2014-05-09 13:47:00 -0700

[diff] [blame]

207

Shuqian Zhao

2016-08-31 19:23:17 -0700

[diff] [blame]

208

# If fail to get stable version from cautotest, use that defined in config

Dan Shi

2014-05-09 13:47:00 -0700

[diff] [blame]

209

build = CONFIG.get_config_value('CROS', 'stable_cros_version')

210

return '%s-release/%s' % (board, build)

211

Dan Shi

2013-07-25 15:08:48 -0700

[diff] [blame]

212

def parse_arguments():

213

"""Parse arguments for test_push tool.

214

215

@return: Parsed arguments.

216

217

"""

218

parser = argparse.ArgumentParser()

Dan Shi

8df9c00

2016-03-08 15:37:39 -0800

[diff] [blame]

219

parser.add_argument('-b', '--board', dest='board', default='gandof',

220

help='Default is gandof.')

Jakob Juelich

2014-10-10 14:08:05 -0700

[diff] [blame]

221

parser.add_argument('-sb', '--shard_board', dest='shard_board',

222

default='quawks',

223

help='Default is quawks.')

Dan Shi

2013-07-25 15:08:48 -0700

[diff] [blame]

224

parser.add_argument('-i', '--build', dest='build', default=None,

Shuqian Zhao

2016-09-21 11:02:15 -0700

[diff] [blame]

225

help='Default is the latest stale build of given '

226

'board. Must be a stable build, otherwise AU test '

227

'will fail. (ex: gandolf-release/R54-8743.25.0)')

Jakob Juelich

2014-10-10 14:08:05 -0700

[diff] [blame]

228

parser.add_argument('-si', '--shard_build', dest='shard_build', default=None,

Shuqian Zhao

2016-09-21 11:02:15 -0700

[diff] [blame]

229

help='Default is the latest stable build of given '

230

'board. Must be a stable build, otherwise AU test '

Jakob Juelich

2014-10-10 14:08:05 -0700

[diff] [blame]

231

'will fail.')

Dan Shi

2016-09-09 13:58:31 -0700

[diff] [blame]

232

parser.add_argument('-ab', '--android_board', dest='android_board',

Shuqian Zhao

2016-09-22 14:26:18 -0700

[diff] [blame]

233

default='shamu-2', help='Android board to test.')

Dan Shi

2016-09-09 13:58:31 -0700

[diff] [blame]

234

parser.add_argument('-ai', '--android_build', dest='android_build',

235

help='Android build to test.')

Dan Shi

2013-07-25 15:08:48 -0700

[diff] [blame]

236

parser.add_argument('-p', '--pool', dest='pool', default='bvt')

237

parser.add_argument('-u', '--num', dest='num', type=int, default=3,

238

help='Run on at most NUM machines.')

Shuqian Zhao

2016-09-22 14:26:18 -0700

[diff] [blame]

239

parser.add_argument('-e', '--email', dest='email', default=DEFAULT_EMAIL,

Dan Shi

2013-07-25 15:08:48 -0700

[diff] [blame]

240

help='Email address for the notification to be sent to '

241

'after the script finished running.')

Shuqian Zhao

2015-08-06 09:46:22 -0700

[diff] [blame]

242

parser.add_argument('-t', '--timeout_min', dest='timeout_min', type=int,

xixuan

2d66858

2016-06-10 14:02:32 -0700

[diff] [blame]

243

default=DEFAULT_TIMEOUT_MIN_FOR_SUITE_JOB,

Shuqian Zhao

2015-08-06 09:46:22 -0700

[diff] [blame]

244

help='Time in mins to wait before abort the jobs we '

245

'are waiting on. Only for the asynchronous suites '

246

'triggered by create_and_return flag.')

Shuqian Zhao

2016-09-01 19:30:54 -0700

[diff] [blame]

247

parser.add_argument('-ud', '--num_duts', dest='num_duts',

248

default=DEFAULT_NUM_DUTS,

249

help="String of dict that indicates the required number"

250

" of DUTs for each board. E.g {'gandof':4}")

Shuqian Zhao

2016-09-21 14:20:50 -0700

[diff] [blame]

251

parser.add_argument('-c', '--continue_on_failure', action='store_true',

252

dest='continue_on_failure',

253

help='All tests continue to run when there is failure')

Dan Shi

2013-07-25 15:08:48 -0700

[diff] [blame]

254

255

arguments = parser.parse_args(sys.argv[1:])

256

Shuqian Zhao

2016-09-21 11:02:15 -0700

[diff] [blame]

257

# Get latest stable build as default build.

Dan Shi

2013-07-25 15:08:48 -0700

[diff] [blame]

258

if not arguments.build:

Shuqian Zhao

2016-08-31 19:23:17 -0700

[diff] [blame]

259

arguments.build = get_default_build(arguments.board)

Jakob Juelich

2014-10-10 14:08:05 -0700

[diff] [blame]

260

if not arguments.shard_build:

Shuqian Zhao

2016-08-31 19:23:17 -0700

[diff] [blame]

261

arguments.shard_build = get_default_build(arguments.shard_board)

Dan Shi

2013-07-25 15:08:48 -0700

[diff] [blame]

262

Shuqian Zhao

2016-09-01 19:30:54 -0700

[diff] [blame]

263

arguments.num_duts = ast.literal_eval(arguments.num_duts)

264

Dan Shi

2013-07-25 15:08:48 -0700

[diff] [blame]

return arguments

Shuqian Zhao

2015-08-06 09:46:22 -0700

[diff] [blame]

268

def do_run_suite(suite_name, arguments, use_shard=False,

Dan Shi

2016-09-09 13:58:31 -0700

[diff] [blame]

269

create_and_return=False, testbed_test=False):

Dan Shi

2013-07-25 15:08:48 -0700

[diff] [blame]

270

"""Call run_suite to run a suite job, and return the suite job id.

271

272

The script waits the suite job to finish before returning the suite job id.

273

Also it will echo the run_suite output to stdout.

274

275

@param suite_name: Name of a suite, e.g., dummy.

276

@param arguments: Arguments for run_suite command.

Jakob Juelich

2014-10-10 14:08:05 -0700

[diff] [blame]

277

@param use_shard: If true, suite is scheduled for shard board.

Shuqian Zhao

2015-08-06 09:46:22 -0700

[diff] [blame]

278

@param create_and_return: If True, run_suite just creates the suite, print

279

the job id, then finish immediately.

Dan Shi

2016-09-09 13:58:31 -0700

[diff] [blame]

280

@param testbed_test: True to run testbed test. Default is False.

Jakob Juelich

2014-10-10 14:08:05 -0700

[diff] [blame]

281

Dan Shi

2013-07-25 15:08:48 -0700

[diff] [blame]

282

@return: Suite job ID.

283

284

"""

Dan Shi

2016-09-09 13:58:31 -0700

[diff] [blame]

285

if use_shard and not testbed_test:

Jakob Juelich

2014-10-10 14:08:05 -0700

[diff] [blame]

286

board = arguments.shard_board

287

build = arguments.shard_build

Dan Shi

2016-09-09 13:58:31 -0700

[diff] [blame]

288

elif testbed_test:

289

board = arguments.android_board

290

build = arguments.android_build

291

else:

292

board = arguments.board

293

build = arguments.build

Jakob Juelich

2014-10-10 14:08:05 -0700

[diff] [blame]

294

Dan Shi

2014-12-22 16:25:05 -0800

[diff] [blame]

295

# Remove cros-version label to force provision.

Shuqian Zhao

7a49f1b

2016-10-24 16:48:04 -0700

[diff] [blame]

296

hosts = AFE.get_hosts(label=constants.Labels.BOARD_PREFIX+board,

297

locked=False)

Dan Shi

2014-12-22 16:25:05 -0800

[diff] [blame]

298

for host in hosts:

Dan Shi

2016-09-09 13:58:31 -0700

[diff] [blame]

299

labels_to_remove = [

300

l for l in host.labels

301

if (l.startswith(provision.CROS_VERSION_PREFIX) or

302

l.startswith(provision.TESTBED_BUILD_VERSION_PREFIX))]

303

if labels_to_remove:

304

AFE.run('host_remove_labels', id=host.id, labels=labels_to_remove)

Dan Shi

2014-12-22 16:25:05 -0800

[diff] [blame]

305

Shuqian Zhao

d01fad0

2016-11-18 10:00:22 -0800

[diff] [blame]

306

# Test repair work flow on shards, powerwash test will timeout after 7m.

Kevin Cheng

2015-12-11 09:45:57 -0800

[diff] [blame]

307

if use_shard and not create_and_return:

Shuqian Zhao

d01fad0

2016-11-18 10:00:22 -0800

[diff] [blame]

308

powerwash_dut_to_test_repair(host.hostname, timeout=420)

Kevin Cheng

2015-12-11 09:45:57 -0800

[diff] [blame]

309

Dan Shi

2015-04-07 17:37:09 -0700

[diff] [blame]

310

current_dir = os.path.dirname(os.path.realpath(__file__))

311

cmd = [os.path.join(current_dir, RUN_SUITE_COMMAND),

Dan Shi

2013-07-25 15:08:48 -0700

[diff] [blame]

312

'-s', suite_name,

Jakob Juelich

2014-10-10 14:08:05 -0700

[diff] [blame]

313

'-b', board,

314

'-i', build,

Dan Shi

2013-07-25 15:08:48 -0700

[diff] [blame]

315

'-p', arguments.pool,

Shuqian Zhao

178ac01

2016-06-03 15:08:52 -0700

[diff] [blame]

316

'-u', str(arguments.num)]

Shuqian Zhao

2015-08-06 09:46:22 -0700

[diff] [blame]

317

if create_and_return:

318

cmd += ['-c']

Dan Shi

2016-09-09 13:58:31 -0700

[diff] [blame]

319

if testbed_test:

320

cmd += ['--run_prod_code']

Dan Shi

2013-07-25 15:08:48 -0700

[diff] [blame]

321

322

suite_job_id = None

Dan Shi

2013-07-25 15:08:48 -0700

[diff] [blame]

323

324

proc = subprocess.Popen(cmd, stdout=subprocess.PIPE,

325

stderr=subprocess.STDOUT)

326

327

while True:

328

line = proc.stdout.readline()

329

330

# Break when run_suite process completed.

331

if not line and proc.poll() != None:

332

break

333

print line.rstrip()

334

run_suite_output.append(line.rstrip())

335

336

if not suite_job_id:

337

m = re.match(SUITE_JOB_START_INFO_REGEX, line)

338

if m and m.group(1):

339

suite_job_id = int(m.group(1))

Shuqian Zhao

1b4ca27

2016-09-18 14:58:19 -0700

[diff] [blame]

340

all_suite_ids.append(suite_job_id)

Dan Shi

2013-07-25 15:08:48 -0700

[diff] [blame]

341

342

if not suite_job_id:

343

raise TestPushException('Failed to retrieve suite job ID.')

Dan Shi

2014-05-09 15:18:15 -0700

[diff] [blame]

344

Shuqian Zhao

2015-08-06 09:46:22 -0700

[diff] [blame]

345

# If create_and_return specified, wait for the suite to finish.

346

if create_and_return:

347

end = time.time() + arguments.timeout_min * 60

Dan Shi

2016-02-03 11:37:02 -0800

[diff] [blame]

348

while not AFE.get_jobs(id=suite_job_id, finished=True):

Shuqian Zhao

2015-08-06 09:46:22 -0700

[diff] [blame]

349

if time.time() < end:

350

time.sleep(10)

351

else:

Dan Shi

2016-02-03 11:37:02 -0800

[diff] [blame]

352

AFE.run('abort_host_queue_entries', job=suite_job_id)

Shuqian Zhao

2015-08-06 09:46:22 -0700

[diff] [blame]

353

raise TestPushException(

354

'Asynchronous suite triggered by create_and_return '

355

'flag has timed out after %d mins. Aborting it.' %

356

arguments.timeout_min)

357

Dan Shi

2014-05-09 15:18:15 -0700

[diff] [blame]

358

print 'Suite job %s is completed.' % suite_job_id

Dan Shi

2013-07-25 15:08:48 -0700

[diff] [blame]

return suite_job_id

Dan Shi

2014-05-09 15:18:15 -0700

[diff] [blame]

362

def check_dut_image(build, suite_job_id):

363

"""Confirm all DUTs used for the suite are imaged to expected build.

364

365

@param build: Expected build to be imaged.

366

@param suite_job_id: job ID of the suite job.

367

@raise TestPushException: If a DUT does not have expected build imaged.

368

"""

369

print 'Checking image installed in DUTs...'

370

job_ids = [job.id for job in

371

models.Job.objects.filter(parent_job_id=suite_job_id)]

372

hqes = [models.HostQueueEntry.objects.filter(job_id=job_id)[0]

373

for job_id in job_ids]

374

hostnames = set([hqe.host.hostname for hqe in hqes])

375

for hostname in hostnames:

Dan Shi

2016-02-03 11:37:02 -0800

[diff] [blame]

376

found_build = site_utils.get_build_from_afe(hostname, AFE)

Dan Shi

2014-05-09 15:18:15 -0700

[diff] [blame]

377

if found_build != build:

378

raise TestPushException('DUT is not imaged properly. Host %s has '

379

'build %s, while build %s is expected.' %

380

(hostname, found_build, build))

381

382

Shuqian Zhao

2015-08-06 09:46:22 -0700

[diff] [blame]

383

def test_suite(suite_name, expected_results, arguments, use_shard=False,

Dan Shi

2016-09-09 13:58:31 -0700

[diff] [blame]

384

create_and_return=False, testbed_test=False):

Dan Shi

2013-07-25 15:08:48 -0700

[diff] [blame]

385

"""Call run_suite to start a suite job and verify results.

386

387

@param suite_name: Name of a suite, e.g., dummy

388

@param expected_results: A dictionary of test name to test result.

389

@param arguments: Arguments for run_suite command.

Jakob Juelich

2014-10-10 14:08:05 -0700

[diff] [blame]

390

@param use_shard: If true, suite is scheduled for shard board.

Shuqian Zhao

2015-08-06 09:46:22 -0700

[diff] [blame]

391

@param create_and_return: If True, run_suite just creates the suite, print

392

the job id, then finish immediately.

Dan Shi

2016-09-09 13:58:31 -0700

[diff] [blame]

393

@param testbed_test: True to run testbed test. Default is False.

Dan Shi

2013-07-25 15:08:48 -0700

[diff] [blame]

394

"""

Shuqian Zhao

2015-08-06 09:46:22 -0700

[diff] [blame]

395

suite_job_id = do_run_suite(suite_name, arguments, use_shard,

Dan Shi

2016-09-09 13:58:31 -0700

[diff] [blame]

396

create_and_return, testbed_test)

Dan Shi

2013-07-25 15:08:48 -0700

[diff] [blame]

397

Dan Shi

2014-05-09 15:18:15 -0700

[diff] [blame]

398

# Confirm all DUTs used for the suite are imaged to expected build.

Jakob Juelich

2014-10-10 14:08:05 -0700

[diff] [blame]

399

# hqe.host_id for jobs running in shard is not synced back to master db,

400

# therefore, skip verifying dut build for jobs running in shard.

Dan Shi

2016-09-09 13:58:31 -0700

[diff] [blame]

401

build_expected = (arguments.android_build if testbed_test

402

else arguments.build)

403

if suite_name != AU_SUITE and not use_shard and not testbed_test:

404

check_dut_image(build_expected, suite_job_id)

Dan Shi

2014-05-09 15:18:15 -0700

[diff] [blame]

405

Shuqian Zhao

2016-09-12 10:42:03 -0700

[diff] [blame]

406

# Verify test results are the expected results.

407

verify_test_results(suite_job_id, expected_results)

408

409

410

def verify_test_results(job_id, expected_results):

411

"""Verify the test results with the expected results.

412

413

@param job_id: id of the running jobs. For suite job, it is suite_job_id.

414

@param expected_results: A dictionary of test name to test result.

415

@raise TestPushException: If verify fails.

416

"""

Dan Shi

2014-05-09 15:18:15 -0700

[diff] [blame]

417

print 'Comparing test results...'

Shuqian Zhao

2016-09-12 10:42:03 -0700

[diff] [blame]

418

test_views = site_utils.get_test_views_from_tko(job_id, TKO)

Dan Shi

2013-07-25 15:08:48 -0700

[diff] [blame]

419

420

mismatch_errors = []

421

extra_test_errors = []

422

423

found_keys = set()

Shuqian Zhao

2016-09-12 10:42:03 -0700

[diff] [blame]

424

for test_name, test_status in test_views.items():

Dan Shi

2013-07-25 15:08:48 -0700

[diff] [blame]

425

print "%s%s" % (test_name.ljust(30), test_status)

Dan Shi

80b6ec0

2016-07-21 15:49:18 -0700

[diff] [blame]

426

# platform_InstallTestImage test may exist in old builds.

427

if re.search('platform_InstallTestImage_SERVER_JOB$', test_name):

428

continue

Dan Shi

2013-07-25 15:08:48 -0700

[diff] [blame]

429

test_found = False

430

for key,val in expected_results.items():

431

if re.search(key, test_name):

432

test_found = True

433

found_keys.add(key)

Dan Shi

2013-07-25 15:08:48 -0700

[diff] [blame]

434

if val != test_status:

435

error = ('%s Expected: [%s], Actual: [%s]' %

436

(test_name, val, test_status))

437

mismatch_errors.append(error)

438

if not test_found:

439

extra_test_errors.append(test_name)

440

441

missing_test_errors = set(expected_results.keys()) - found_keys

Dan Shi

dc9eb17

2014-12-09 16:05:02 -0800

[diff] [blame]

442

for exception in IGNORE_MISSING_TESTS:

443

try:

444

missing_test_errors.remove(exception)

except KeyError:

pass

Dan Shi

2013-07-25 15:08:48 -0700

[diff] [blame]

448

summary = []

449

if mismatch_errors:

450

summary.append(('Results of %d test(s) do not match expected '

451

'values:') % len(mismatch_errors))

452

summary.extend(mismatch_errors)

453

summary.append('\n')

454

455

if extra_test_errors:

456

summary.append('%d test(s) are not expected to be run:' %

457

len(extra_test_errors))

458

summary.extend(extra_test_errors)

459

summary.append('\n')

460

461

if missing_test_errors:

462

summary.append('%d test(s) are missing from the results:' %

463

len(missing_test_errors))

464

summary.extend(missing_test_errors)

465

summary.append('\n')

466

467

# Test link to log can be loaded.

Shuqian Zhao

2016-09-12 10:42:03 -0700

[diff] [blame]

468

job_name = '%s-%s' % (job_id, getpass.getuser())

Dan Shi

2013-07-25 15:08:48 -0700

[diff] [blame]

469

log_link = URL_PATTERN % (URL_HOST, job_name)

470

try:

471

urllib2.urlopen(log_link).read()

472

except urllib2.URLError:

473

summary.append('Failed to load page for link to log: %s.' % log_link)

474

475

if summary:

476

raise TestPushException('\n'.join(summary))

477

478

Dan Shi

2015-04-07 17:37:09 -0700

[diff] [blame]

479

def test_suite_wrapper(queue, suite_name, expected_results, arguments,

Dan Shi

2016-09-09 13:58:31 -0700

[diff] [blame]

480

use_shard=False, create_and_return=False,

481

testbed_test=False):

Dan Shi

2015-04-07 17:37:09 -0700

[diff] [blame]

482

"""Wrapper to call test_suite. Handle exception and pipe it to parent

483

process.

484

485

@param queue: Queue to save exception to be accessed by parent process.

486

@param suite_name: Name of a suite, e.g., dummy

487

@param expected_results: A dictionary of test name to test result.

488

@param arguments: Arguments for run_suite command.

489

@param use_shard: If true, suite is scheduled for shard board.

Shuqian Zhao

2015-08-06 09:46:22 -0700

[diff] [blame]

490

@param create_and_return: If True, run_suite just creates the suite, print

491

the job id, then finish immediately.

Dan Shi

2016-09-09 13:58:31 -0700

[diff] [blame]

492

@param testbed_test: True to run testbed test. Default is False.

Dan Shi

2015-04-07 17:37:09 -0700

[diff] [blame]

493

"""

494

try:

Shuqian Zhao

2015-08-06 09:46:22 -0700

[diff] [blame]

495

test_suite(suite_name, expected_results, arguments, use_shard,

Dan Shi

2016-09-09 13:58:31 -0700

[diff] [blame]

496

create_and_return, testbed_test)

Dan Shi

2015-04-07 17:37:09 -0700

[diff] [blame]

497

except:

498

# Store the whole exc_info leads to a PicklingError.

499

except_type, except_value, tb = sys.exc_info()

500

queue.put((except_type, except_value, traceback.extract_tb(tb)))

501

502

Dan Shi

2015-04-07 17:37:09 -0700

[diff] [blame]

503

def check_queue(queue):

504

"""Check the queue for any exception being raised.

505

506

@param queue: Queue used to store exception for parent process to access.

507

@raise: Any exception found in the queue.

"""

if queue.empty():

return

exc_info = queue.get()

512

# Raise the exception with original backtrace.

513

print 'Original stack trace of the exception:\n%s' % exc_info[2]

514

raise exc_info[0](exc_info[1])

515

516

Shuqian Zhao

2016-10-25 13:31:06 -0700

[diff] [blame]

517

def get_head_of_repos(repos):

518

"""Get HEAD of updated repos, currently are autotest and chromite repos

519

520

@param repos: a map of repo name to the path of the repo. E.g.

521

{'autotest': '/usr/local/autotest'}

522

@return: a map of repo names to the current HEAD of that repo.

"""

@contextmanager

def cd(new_wd):

"""Helper function to change working directory.

527

528

@param new_wd: new working directory that switch to.

529

"""

530

prev_wd = os.getcwd()

531

os.chdir(os.path.expanduser(new_wd))

try:

yield

finally:

os.chdir(prev_wd)

updated_repo_heads = {}

538

for repo_name, path_to_repo in repos.iteritems():

539

with cd(path_to_repo):

540

head = subprocess.check_output('git rev-parse HEAD',

541

shell=True).strip()

542

updated_repo_heads[repo_name] = head

543

return updated_repo_heads

544

545

Shuqian Zhao

2016-11-11 16:37:36 -0800

[diff] [blame]

546

def push_prod_next_branch(updated_repo_heads):

547

"""push prod-next branch to the tested HEAD after all tests pass.

548

549

The push command must be ran as PUSH_USER, since only PUSH_USER has the

550

right to push branches.

551

552

@param updated_repo_heads: a map of repo names to tested HEAD of that repo.

553

"""

554

# prod-next branch for every repo is downloaded under PUSH_USER home dir.

555

cmd = 'cd ~/{repo}; git rebase {hash} prod-next; git push origin prod-next'

556

run_push_as_push_user = "sudo su - %s -c '%s'" % (PUSH_USER, cmd)

557

558

for repo_name, test_hash in updated_repo_heads.iteritems():

559

push_cmd = run_push_as_push_user.format(hash=test_hash, repo=repo_name)

560

print 'Pushing %s prod-next branch to %s' % (repo_name, test_hash)

561

print subprocess.check_output(push_cmd, stderr=subprocess.STDOUT,

shell=True)

Dan Shi

2013-07-25 15:08:48 -0700

[diff] [blame]

565

def main():

566

"""Entry point for test_push script."""

567

arguments = parse_arguments()

Shuqian Zhao

2016-11-11 16:37:36 -0800

[diff] [blame]

568

updated_repo_heads = get_head_of_repos(UPDATED_REPOS)

569

updated_repo_msg = '\n'.join(

570

['%s: %s' % (k, v) for k, v in updated_repo_heads.iteritems()])

Dan Shi

2013-07-25 15:08:48 -0700

[diff] [blame]

571

572

try:

Shuqian Zhao

2016-09-21 14:20:50 -0700

[diff] [blame]

573

# Use daemon flag will kill child processes when parent process fails.

574

use_daemon = not arguments.continue_on_failure

Shuqian Zhao

2016-09-01 19:30:54 -0700

[diff] [blame]

575

check_dut_inventory(arguments.num_duts)

Dan Shi

2015-04-07 17:37:09 -0700

[diff] [blame]

576

queue = multiprocessing.Queue()

577

578

push_to_prod_suite = multiprocessing.Process(

579

target=test_suite_wrapper,

580

args=(queue, PUSH_TO_PROD_SUITE, EXPECTED_TEST_RESULTS,

581

arguments))

Shuqian Zhao

2016-09-21 14:20:50 -0700

[diff] [blame]

582

push_to_prod_suite.daemon = use_daemon

Dan Shi

2015-04-07 17:37:09 -0700

[diff] [blame]

583

push_to_prod_suite.start()

Jakob Juelich

2014-10-10 14:08:05 -0700

[diff] [blame]

584

Dan Shi

2013-07-25 15:08:48 -0700

[diff] [blame]

585

# TODO(dshi): Remove following line after crbug.com/267644 is fixed.

586

# Also, merge EXPECTED_TEST_RESULTS_AU to EXPECTED_TEST_RESULTS

Shuqian Zhao

2016-09-23 09:14:13 -0700

[diff] [blame]

587

# AU suite will be on shard until crbug.com/634049 is fixed.

Dan Shi

2015-04-07 17:37:09 -0700

[diff] [blame]

588

au_suite = multiprocessing.Process(

589

target=test_suite_wrapper,

590

args=(queue, AU_SUITE, EXPECTED_TEST_RESULTS_AU,

Shuqian Zhao

2016-09-23 09:14:13 -0700

[diff] [blame]

591

arguments, True))

Shuqian Zhao

2016-09-21 14:20:50 -0700

[diff] [blame]

592

au_suite.daemon = use_daemon

Dan Shi

2015-04-07 17:37:09 -0700

[diff] [blame]

593

au_suite.start()

594

Shuqian Zhao

2015-08-06 09:46:22 -0700

[diff] [blame]

595

# suite test with --create_and_return flag

596

asynchronous_suite = multiprocessing.Process(

597

target=test_suite_wrapper,

598

args=(queue, DUMMY_SUITE, EXPECTED_TEST_RESULTS_DUMMY,

Shuqian Zhao

2016-09-23 09:14:13 -0700

[diff] [blame]

599

arguments, False, True))

Shuqian Zhao

1b4ca27

2016-09-18 14:58:19 -0700

[diff] [blame]

600

asynchronous_suite.daemon = True

Shuqian Zhao

2015-08-06 09:46:22 -0700

[diff] [blame]

601

asynchronous_suite.start()

602

Dan Shi

2016-09-09 13:58:31 -0700

[diff] [blame]

603

# Test suite for testbed

604

testbed_suite = multiprocessing.Process(

605

target=test_suite_wrapper,

606

args=(queue, TESTBED_SUITE, EXPECTED_TEST_RESULTS_TESTBED,

607

arguments, False, False, True))

Shuqian Zhao

2016-09-21 14:20:50 -0700

[diff] [blame]

608

testbed_suite.daemon = use_daemon

Dan Shi

2016-09-09 13:58:31 -0700

[diff] [blame]

609

testbed_suite.start()

610

Dan Shi

2015-04-07 17:37:09 -0700

[diff] [blame]

611

while (push_to_prod_suite.is_alive() or au_suite.is_alive() or

Shuqian Zhao

2016-09-23 09:14:13 -0700

[diff] [blame]

612

asynchronous_suite.is_alive() or testbed_suite.is_alive()):

Dan Shi

2015-04-07 17:37:09 -0700

[diff] [blame]

613

check_queue(queue)

Dan Shi

2015-04-07 17:37:09 -0700

[diff] [blame]

time.sleep(5)

check_queue(queue)

push_to_prod_suite.join()

619

au_suite.join()

Shuqian Zhao

2015-08-06 09:46:22 -0700

[diff] [blame]

620

asynchronous_suite.join()

Dan Shi

2016-09-09 13:58:31 -0700

[diff] [blame]

621

testbed_suite.join()

Shuqian Zhao

2016-11-11 16:37:36 -0800

[diff] [blame]

622

623

# All tests pass, push prod-next branch for UPDATED_REPOS.

Shuqian Zhao

d57ec7d

2016-11-18 12:43:54 -0800

[diff] [blame]

624

#push_prod_next_branch(updated_repo_heads)

Dan Shi

2013-07-25 15:08:48 -0700

[diff] [blame]

625

except Exception as e:

626

print 'Test for pushing to prod failed:\n'

627

print str(e)

Shuqian Zhao

2016-09-21 14:20:50 -0700

[diff] [blame]

628

# Abort running jobs when choose not to continue when there is failure.

629

if not arguments.continue_on_failure:

630

for suite_id in all_suite_ids:

631

if AFE.get_jobs(id=suite_id, finished=False):

632

AFE.run('abort_host_queue_entries', job=suite_id)

Dan Shi

2013-07-25 15:08:48 -0700

[diff] [blame]

633

# Send out email about the test failure.

634

if arguments.email:

Dan Shi

2015-03-26 17:54:13 -0700

[diff] [blame]

635

gmail_lib.send_email(

636

arguments.email,

637

'Test for pushing to prod failed. Do NOT push!',

Shuqian Zhao

2016-10-25 13:31:06 -0700

[diff] [blame]

638

('Test CLs of the following repos failed. Below are the '

639

'repos and the corresponding test HEAD.\n\n%s\n\n.'

640

'Error occurred during test:\n\n%s\n\n' %

641

(updated_repo_msg, str(e)) + '\n'.join(run_suite_output)))

Dan Shi

2013-07-25 15:08:48 -0700

[diff] [blame]

642

raise

Shuqian Zhao

d2a99f0

2016-09-22 13:31:30 -0700

[diff] [blame]

643

finally:

644

# Reverify all the hosts

645

reverify_all_push_duts(arguments.pool)

Dan Shi

2013-07-25 15:08:48 -0700

[diff] [blame]

646

Shuqian Zhao

2016-10-25 13:31:06 -0700

[diff] [blame]

647

message = ('\nAll tests are completed successfully, the prod branch of the '

648

'following repos ready to be pushed to the hash list below.\n'

649

'%s' % updated_repo_msg)

Dan Shi

2013-07-25 15:08:48 -0700

[diff] [blame]

650

print message

651

# Send out email about test completed successfully.

652

if arguments.email:

Dan Shi

2015-03-26 17:54:13 -0700

[diff] [blame]

653

gmail_lib.send_email(

654

arguments.email,

655

'Test for pushing to prod completed successfully',

656

message)

Dan Shi