Blame - site_utils/test_push.py - platform/external/autotest

2013-07-25 15:08:48 -0700

[diff] [blame]

#!/usr/bin/python

#

# Use of this source code is governed by a BSD-style license that can be

5

# found in the LICENSE file.

6

7

"""Tool to validate code in prod branch before pushing to lab.

8

9

The script runs push_to_prod suite to verify code in prod branch is ready to be

10

pushed. Link to design document:

11

https://docs.google.com/a/google.com/document/d/1JMz0xS3fZRSHMpFkkKAL_rxsdbNZomhHbC3B8L71uuI/edit

12

13

To verify if prod branch can be pushed to lab, run following command in

14

chromeos-autotest.cbf server:

Michael Liang

52d9f1f

2014-06-17 15:01:24 -0700

[diff] [blame]

15

/usr/local/autotest/site_utils/test_push.py -e someone@company.com

Dan Shi

2013-07-25 15:08:48 -0700

[diff] [blame]

16

Shuqian Zhao

2016-09-21 11:02:15 -0700

[diff] [blame]

17

The script uses latest gandof stable build as test build by default.

Dan Shi

2013-07-25 15:08:48 -0700

[diff] [blame]

"""

import argparse

Shuqian Zhao

2016-09-01 19:30:54 -0700

[diff] [blame]

22

import ast

Shuqian Zhao

2016-10-25 13:31:06 -0700

[diff] [blame]

23

from contextlib import contextmanager

Dan Shi

2013-07-25 15:08:48 -0700

[diff] [blame]

24

import getpass

Dan Shi

2015-04-07 17:37:09 -0700

[diff] [blame]

25

import multiprocessing

Dan Shi

2013-07-25 15:08:48 -0700

[diff] [blame]

import os

import re

import subprocess

import sys

Dan Shi

2015-04-07 17:37:09 -0700

[diff] [blame]

30

import time

31

import traceback

Dan Shi

2013-07-25 15:08:48 -0700

[diff] [blame]

32

import urllib2

33

34

import common

Dan Shi

2014-05-09 15:18:15 -0700

[diff] [blame]

35

try:

36

from autotest_lib.frontend import setup_django_environment

37

from autotest_lib.frontend.afe import models

Shuqian Zhao

2016-09-12 10:42:03 -0700

[diff] [blame]

38

from autotest_lib.frontend.afe import rpc_utils

Dan Shi

2014-05-09 15:18:15 -0700

[diff] [blame]

39

except ImportError:

40

# Unittest may not have Django database configured and will fail to import.

41

pass

Dan Shi

2015-03-26 17:54:13 -0700

[diff] [blame]

42

from autotest_lib.client.common_lib import global_config

Shuqian Zhao

2016-09-12 10:42:03 -0700

[diff] [blame]

43

from autotest_lib.client.common_lib import priorities

Shuqian Zhao

2016-12-11 19:10:36 -0800

[diff] [blame]

44

from autotest_lib.client.common_lib.cros import retry

Dan Shi

2013-07-25 15:08:48 -0700

[diff] [blame]

45

from autotest_lib.server import site_utils

Shuqian Zhao

2016-09-12 10:42:03 -0700

[diff] [blame]

46

from autotest_lib.server import utils

Dan Shi

2014-12-22 16:25:05 -0800

[diff] [blame]

47

from autotest_lib.server.cros import provision

Dan Shi

2013-07-25 15:08:48 -0700

[diff] [blame]

48

from autotest_lib.server.cros.dynamic_suite import frontend_wrappers

Dan Shi

2015-03-26 17:54:13 -0700

[diff] [blame]

49

from autotest_lib.site_utils import gmail_lib

Dan Shi

2014-12-22 16:25:05 -0800

[diff] [blame]

50

from autotest_lib.site_utils.suite_scheduler import constants

Dan Shi

2013-07-25 15:08:48 -0700

[diff] [blame]

51

Shuqian Zhao

2016-10-25 13:31:06 -0700

[diff] [blame]

52

AUTOTEST_DIR=common.autotest_dir

Dan Shi

2013-07-25 15:08:48 -0700

[diff] [blame]

53

CONFIG = global_config.global_config

54

Dan Shi

2016-02-03 11:37:02 -0800

[diff] [blame]

55

AFE = frontend_wrappers.RetryingAFE(timeout_min=0.5, delay_sec=2)

Shuqian Zhao

2016-09-12 10:42:03 -0700

[diff] [blame]

56

TKO = frontend_wrappers.RetryingTKO(timeout_min=0.1, delay_sec=10)

Dan Shi

2016-02-03 11:37:02 -0800

[diff] [blame]

57

Dan Shi

2013-07-25 15:08:48 -0700

[diff] [blame]

58

MAIL_FROM = 'chromeos-test@google.com'

Shuqian Zhao

2016-08-31 19:23:17 -0700

[diff] [blame]

59

BUILD_REGEX = 'R[\d]+-[\d]+\.[\d]+\.[\d]+'

Dan Shi

2013-07-25 15:08:48 -0700

[diff] [blame]

60

RUN_SUITE_COMMAND = 'run_suite.py'

61

PUSH_TO_PROD_SUITE = 'push_to_prod'

Jakob Juelich

2014-10-10 14:08:05 -0700

[diff] [blame]

62

DUMMY_SUITE = 'dummy'

Shuqian Zhao

2016-09-21 11:02:15 -0700

[diff] [blame]

63

AU_SUITE = 'paygen_au_beta'

Dan Shi

2016-09-09 13:58:31 -0700

[diff] [blame]

64

TESTBED_SUITE = 'testbed_push'

Shuqian Zhao

2016-09-22 14:26:18 -0700

[diff] [blame]

65

# TODO(shuqianz): Dynamically get android build after crbug.com/646068 fixed

xixuan

2d66858

2016-06-10 14:02:32 -0700

[diff] [blame]

66

DEFAULT_TIMEOUT_MIN_FOR_SUITE_JOB = 30

Shuqian Zhao

2016-08-31 19:23:17 -0700

[diff] [blame]

67

IMAGE_BUCKET = CONFIG.get_config_value('CROS', 'image_storage_server')

Shuqian Zhao

2016-09-22 14:26:18 -0700

[diff] [blame]

68

DEFAULT_EMAIL = CONFIG.get_config_value(

69

'SCHEDULER', 'notify_email', type=str, default='')

Shuqian Zhao

671c65f

2016-11-18 15:15:53 -0800

[diff] [blame]

70

DEFAULT_NUM_DUTS = "{'gandof': 4, 'quawks': 2, 'testbed': 1}"

Dan Shi

2013-07-25 15:08:48 -0700

[diff] [blame]

71

Fang Deng

6dddf60

2014-04-17 17:01:47 -0700

[diff] [blame]

72

SUITE_JOB_START_INFO_REGEX = ('^.*Created suite job:.*'

73

'tab_id=view_job&object_id=(\d+)$')

Dan Shi

2013-07-25 15:08:48 -0700

[diff] [blame]

74

75

# Dictionary of test results keyed by test name regular expression.

76

EXPECTED_TEST_RESULTS = {'^SERVER_JOB$': 'GOOD',

77

# This is related to dummy_Fail/control.dependency.

78

'dummy_Fail.dependency$': 'TEST_NA',

Dan Shi

dc9eb17

2014-12-09 16:05:02 -0800

[diff] [blame]

79

'login_LoginSuccess.*': 'GOOD',

Dan Shi

2014-12-22 16:25:05 -0800

[diff] [blame]

80

'provision_AutoUpdate.double': 'GOOD',

Dan Shi

2013-07-25 15:08:48 -0700

[diff] [blame]

81

'dummy_Pass.*': 'GOOD',

82

'dummy_Fail.Fail$': 'FAIL',

83

'dummy_Fail.RetryFail$': 'FAIL',

84

'dummy_Fail.RetrySuccess': 'GOOD',

85

'dummy_Fail.Error$': 'ERROR',

86

'dummy_Fail.Warn$': 'WARN',

87

'dummy_Fail.NAError$': 'TEST_NA',

88

'dummy_Fail.Crash$': 'GOOD',

89

}

90

Jakob Juelich

2014-10-10 14:08:05 -0700

[diff] [blame]

91

EXPECTED_TEST_RESULTS_DUMMY = {'^SERVER_JOB$': 'GOOD',

92

'dummy_Pass.*': 'GOOD',

93

'dummy_Fail.Fail': 'FAIL',

94

'dummy_Fail.Warn': 'WARN',

95

'dummy_Fail.Crash': 'GOOD',

96

'dummy_Fail.Error': 'ERROR',

97

'dummy_Fail.NAError': 'TEST_NA',}

98

Dan Shi

2013-07-25 15:08:48 -0700

[diff] [blame]

99

EXPECTED_TEST_RESULTS_AU = {'SERVER_JOB$': 'GOOD',

Shuqian Zhao

2016-09-21 11:02:15 -0700

[diff] [blame]

100

'autoupdate_EndToEndTest.paygen_au_beta_delta.*': 'GOOD',

101

'autoupdate_EndToEndTest.paygen_au_beta_full.*': 'GOOD',

Dan Shi

2013-07-25 15:08:48 -0700

[diff] [blame]

102

}

103

Dan Shi

2016-09-09 13:58:31 -0700

[diff] [blame]

104

EXPECTED_TEST_RESULTS_TESTBED = {'^SERVER_JOB$': 'GOOD',

105

'testbed_DummyTest': 'GOOD',}

106

Shuqian Zhao

2016-09-12 10:42:03 -0700

[diff] [blame]

107

EXPECTED_TEST_RESULTS_POWERWASH = {'platform_Powerwash': 'GOOD',

108

'SERVER_JOB': 'GOOD'}

109

Dan Shi

2013-07-25 15:08:48 -0700

[diff] [blame]

110

URL_HOST = CONFIG.get_config_value('SERVER', 'hostname', type=str)

111

URL_PATTERN = CONFIG.get_config_value('CROS', 'log_url_pattern', type=str)

112

Dan Shi

dc9eb17

2014-12-09 16:05:02 -0800

[diff] [blame]

113

# Some test could be missing from the test results for various reasons. Add

114

# such test in this list and explain the reason.

115

IGNORE_MISSING_TESTS = [

116

# For latest build, npo_test_delta does not exist.

117

'autoupdate_EndToEndTest.npo_test_delta.*',

118

# For trybot build, nmo_test_delta does not exist.

119

'autoupdate_EndToEndTest.nmo_test_delta.*',

120

# Older build does not have login_LoginSuccess test in push_to_prod suite.

121

# TODO(dshi): Remove following lines after R41 is stable.

122

'login_LoginSuccess']

123

Dan Shi

2013-07-25 15:08:48 -0700

[diff] [blame]

124

# Save all run_suite command output.

Shuqian Zhao

7b68219

2016-09-16 14:38:41 -0700

[diff] [blame]

125

manager = multiprocessing.Manager()

126

run_suite_output = manager.list()

Shuqian Zhao

1b4ca27

2016-09-18 14:58:19 -0700

[diff] [blame]

127

all_suite_ids = manager.list()

Shuqian Zhao

2016-10-25 13:31:06 -0700

[diff] [blame]

128

# A dict maps the name of the updated repos and the path of them.

129

UPDATED_REPOS = {'autotest': AUTOTEST_DIR,

130

'chromite': '%s/site-packages/chromite/' % AUTOTEST_DIR}

Shuqian Zhao

2016-11-11 16:37:36 -0800

[diff] [blame]

131

PUSH_USER = 'chromeos-test-lab'

Dan Shi

2013-07-25 15:08:48 -0700

[diff] [blame]

132

133

class TestPushException(Exception):

134

"""Exception to be raised when the test to push to prod failed."""

135

pass

136

Dan Shi

2014-05-09 13:47:00 -0700

[diff] [blame]

137

Shuqian Zhao

2016-12-11 19:10:36 -0800

[diff] [blame]

138

@retry.retry(TestPushException, timeout_min=5, delay_sec=30)

Shuqian Zhao

2016-09-01 19:30:54 -0700

[diff] [blame]

139

def check_dut_inventory(required_num_duts):

140

"""Check DUT inventory for each board.

141

Shuqian Zhao

2016-11-18 11:13:16 -0800

[diff] [blame]

142

@param required_num_duts: a dict specifying the number of DUT each platform

Shuqian Zhao

2016-09-01 19:30:54 -0700

[diff] [blame]

143

requires in order to finish push tests.

144

@raise TestPushException: if number of DUTs are less than the requirement.

145

"""

Shuqian Zhao

2016-12-11 19:10:36 -0800

[diff] [blame]

146

print 'Checking DUT inventory...'

Shuqian Zhao

2016-09-01 19:30:54 -0700

[diff] [blame]

147

hosts = AFE.run('get_hosts', status='Ready', locked=False)

Shuqian Zhao

2016-11-18 11:13:16 -0800

[diff] [blame]

148

platforms = [host['platform'] for host in hosts]

149

current_inventory = {p : platforms.count(p) for p in platforms}

Shuqian Zhao

2016-09-01 19:30:54 -0700

[diff] [blame]

150

error_msg = ''

Shuqian Zhao

2016-11-18 11:13:16 -0800

[diff] [blame]

151

for platform, req_num in required_num_duts.items():

152

curr_num = current_inventory.get(platform, 0)

Shuqian Zhao

2016-09-01 19:30:54 -0700

[diff] [blame]

153

if curr_num < req_num:

154

error_msg += ('\nRequire %d %s DUTs, only %d are Ready now' %

Shuqian Zhao

2016-11-18 11:13:16 -0800

[diff] [blame]

155

(req_num, platform, curr_num))

Shuqian Zhao

2016-09-01 19:30:54 -0700

[diff] [blame]

156

if error_msg:

157

raise TestPushException('Not enough DUTs to run push tests. %s' %

error_msg)

Shuqian Zhao

2016-09-12 10:42:03 -0700

[diff] [blame]

161

def powerwash_dut_to_test_repair(hostname, timeout):

162

"""Powerwash dut to test repair workflow.

Kevin Cheng

2015-12-11 09:45:57 -0800

[diff] [blame]

163

164

@param hostname: hostname of the dut.

Shuqian Zhao

2016-09-12 10:42:03 -0700

[diff] [blame]

165

@param timeout: seconds of the powerwash test to hit timeout.

166

@raise TestPushException: if DUT fail to run the test.

Kevin Cheng

2015-12-11 09:45:57 -0800

[diff] [blame]

167

"""

Shuqian Zhao

2016-09-12 10:42:03 -0700

[diff] [blame]

168

t = models.Test.objects.get(name='platform_Powerwash')

169

c = utils.read_file(os.path.join(common.autotest_dir, t.path))

170

job_id = rpc_utils.create_job_common(

171

'powerwash', priority=priorities.Priority.SUPER,

172

control_type='Server', control_file=c, hosts=[hostname])

173

Shuqian Zhao

e83a78c

2016-09-16 15:01:25 -0700

[diff] [blame]

174

end = time.time() + timeout

Shuqian Zhao

2016-09-12 10:42:03 -0700

[diff] [blame]

175

while not TKO.get_job_test_statuses_from_db(job_id):

Shuqian Zhao

e83a78c

2016-09-16 15:01:25 -0700

[diff] [blame]

176

if time.time() >= end:

177

AFE.run('abort_host_queue_entries', job=job_id)

Shuqian Zhao

2016-09-12 10:42:03 -0700

[diff] [blame]

178

raise TestPushException(

Shuqian Zhao

e83a78c

2016-09-16 15:01:25 -0700

[diff] [blame]

179

'Powerwash test on %s timeout after %ds, abort it.' %

180

(hostname, timeout))

Shuqian Zhao

2016-09-12 10:42:03 -0700

[diff] [blame]

181

time.sleep(10)

182

verify_test_results(job_id, EXPECTED_TEST_RESULTS_POWERWASH)

183

# Kick off verify, verify will fail and a repair should be triggered.

184

AFE.reverify_hosts(hostnames=[hostname])

Kevin Cheng

2015-12-11 09:45:57 -0800

[diff] [blame]

185

186

Shuqian Zhao

d2a99f0

2016-09-22 13:31:30 -0700

[diff] [blame]

187

def reverify_all_push_duts(pool):

188

"""Reverify all the push DUTs.

189

190

@param pool: Name of the pool used by test_push.

191

"""

Shuqian Zhao

2016-12-11 19:10:36 -0800

[diff] [blame]

192

print 'Reverifying DUTs in pool %s' % pool

Shuqian Zhao

d2a99f0

2016-09-22 13:31:30 -0700

[diff] [blame]

193

pool_label = constants.Labels.POOL_PREFIX + pool

194

hosts = [h.hostname for h in AFE.get_hosts(label=pool_label)]

195

AFE.reverify_hosts(hostnames=hosts)

196

197

Kevin Cheng

2016-12-15 12:17:13 -0800

[diff] [blame]

198

def get_default_build(board='gandof', server='chromeos-autotest.hot'):

Dan Shi

2014-05-09 13:47:00 -0700

[diff] [blame]

199

"""Get the default build to be used for test.

200

Dan Shi

8df9c00

2016-03-08 15:37:39 -0800

[diff] [blame]

201

@param board: Name of board to be tested, default is gandof.

202

@return: Build to be tested, e.g., gandof-release/R36-5881.0.0

Dan Shi

2014-05-09 13:47:00 -0700

[diff] [blame]

203

"""

Dan Shi

2014-05-09 13:47:00 -0700

[diff] [blame]

204

build = None

Kevin Cheng

2016-12-15 12:17:13 -0800

[diff] [blame]

205

cmd = ('%s/cli/atest stable_version list --board=%s -w %s' %

206

(AUTOTEST_DIR, board, server))

Shuqian Zhao

2016-08-31 19:23:17 -0700

[diff] [blame]

207

result = subprocess.check_output(cmd, shell=True).strip()

208

build = re.search(BUILD_REGEX, result)

209

if build:

210

return '%s-release/%s' % (board, build.group(0))

Dan Shi

2014-05-09 13:47:00 -0700

[diff] [blame]

211

Shuqian Zhao

2016-08-31 19:23:17 -0700

[diff] [blame]

212

# If fail to get stable version from cautotest, use that defined in config

Dan Shi

2014-05-09 13:47:00 -0700

[diff] [blame]

213

build = CONFIG.get_config_value('CROS', 'stable_cros_version')

214

return '%s-release/%s' % (board, build)

215

Dan Shi

2013-07-25 15:08:48 -0700

[diff] [blame]

216

def parse_arguments():

217

"""Parse arguments for test_push tool.

218

219

@return: Parsed arguments.

220

221

"""

222

parser = argparse.ArgumentParser()

Dan Shi

8df9c00

2016-03-08 15:37:39 -0800

[diff] [blame]

223

parser.add_argument('-b', '--board', dest='board', default='gandof',

224

help='Default is gandof.')

Jakob Juelich

2014-10-10 14:08:05 -0700

[diff] [blame]

225

parser.add_argument('-sb', '--shard_board', dest='shard_board',

226

default='quawks',

227

help='Default is quawks.')

Dan Shi

2013-07-25 15:08:48 -0700

[diff] [blame]

228

parser.add_argument('-i', '--build', dest='build', default=None,

Shuqian Zhao

2016-09-21 11:02:15 -0700

[diff] [blame]

229

help='Default is the latest stale build of given '

230

'board. Must be a stable build, otherwise AU test '

231

'will fail. (ex: gandolf-release/R54-8743.25.0)')

Jakob Juelich

2014-10-10 14:08:05 -0700

[diff] [blame]

232

parser.add_argument('-si', '--shard_build', dest='shard_build', default=None,

Shuqian Zhao

2016-09-21 11:02:15 -0700

[diff] [blame]

233

help='Default is the latest stable build of given '

234

'board. Must be a stable build, otherwise AU test '

Jakob Juelich

2014-10-10 14:08:05 -0700

[diff] [blame]

235

'will fail.')

Kevin Cheng

2016-12-15 12:17:13 -0800

[diff] [blame]

236

parser.add_argument('-w', '--web', default='chromeos-autotest.hot',

237

help='Specify web server to grab stable version from.')

Dan Shi

2016-09-09 13:58:31 -0700

[diff] [blame]

238

parser.add_argument('-ab', '--android_board', dest='android_board',

Shuqian Zhao

2016-09-22 14:26:18 -0700

[diff] [blame]

239

default='shamu-2', help='Android board to test.')

Dan Shi

2016-09-09 13:58:31 -0700

[diff] [blame]

240

parser.add_argument('-ai', '--android_build', dest='android_build',

241

help='Android build to test.')

Dan Shi

2013-07-25 15:08:48 -0700

[diff] [blame]

242

parser.add_argument('-p', '--pool', dest='pool', default='bvt')

243

parser.add_argument('-u', '--num', dest='num', type=int, default=3,

244

help='Run on at most NUM machines.')

Shuqian Zhao

2016-09-22 14:26:18 -0700

[diff] [blame]

245

parser.add_argument('-e', '--email', dest='email', default=DEFAULT_EMAIL,

Dan Shi

2013-07-25 15:08:48 -0700

[diff] [blame]

246

help='Email address for the notification to be sent to '

247

'after the script finished running.')

Shuqian Zhao

2015-08-06 09:46:22 -0700

[diff] [blame]

248

parser.add_argument('-t', '--timeout_min', dest='timeout_min', type=int,

xixuan

2d66858

2016-06-10 14:02:32 -0700

[diff] [blame]

249

default=DEFAULT_TIMEOUT_MIN_FOR_SUITE_JOB,

Shuqian Zhao

2015-08-06 09:46:22 -0700

[diff] [blame]

250

help='Time in mins to wait before abort the jobs we '

251

'are waiting on. Only for the asynchronous suites '

252

'triggered by create_and_return flag.')

Shuqian Zhao

2016-09-01 19:30:54 -0700

[diff] [blame]

253

parser.add_argument('-ud', '--num_duts', dest='num_duts',

254

default=DEFAULT_NUM_DUTS,

255

help="String of dict that indicates the required number"

256

" of DUTs for each board. E.g {'gandof':4}")

Shuqian Zhao

2016-09-21 14:20:50 -0700

[diff] [blame]

257

parser.add_argument('-c', '--continue_on_failure', action='store_true',

258

dest='continue_on_failure',

259

help='All tests continue to run when there is failure')

Dan Shi

2013-07-25 15:08:48 -0700

[diff] [blame]

260

261

arguments = parser.parse_args(sys.argv[1:])

262

Shuqian Zhao

2016-09-21 11:02:15 -0700

[diff] [blame]

263

# Get latest stable build as default build.

Dan Shi

2013-07-25 15:08:48 -0700

[diff] [blame]

264

if not arguments.build:

Kevin Cheng

2016-12-15 12:17:13 -0800

[diff] [blame]

265

arguments.build = get_default_build(arguments.board, arguments.web)

Jakob Juelich

2014-10-10 14:08:05 -0700

[diff] [blame]

266

if not arguments.shard_build:

Kevin Cheng

2016-12-15 12:17:13 -0800

[diff] [blame]

267

arguments.shard_build = get_default_build(arguments.shard_board,

268

arguments.web)

Dan Shi

2013-07-25 15:08:48 -0700

[diff] [blame]

269

Shuqian Zhao

2016-09-01 19:30:54 -0700

[diff] [blame]

270

arguments.num_duts = ast.literal_eval(arguments.num_duts)

271

Dan Shi

2013-07-25 15:08:48 -0700

[diff] [blame]

return arguments

Shuqian Zhao

2015-08-06 09:46:22 -0700

[diff] [blame]

275

def do_run_suite(suite_name, arguments, use_shard=False,

Dan Shi

2016-09-09 13:58:31 -0700

[diff] [blame]

276

create_and_return=False, testbed_test=False):

Dan Shi

2013-07-25 15:08:48 -0700

[diff] [blame]

277

"""Call run_suite to run a suite job, and return the suite job id.

278

279

The script waits the suite job to finish before returning the suite job id.

280

Also it will echo the run_suite output to stdout.

281

282

@param suite_name: Name of a suite, e.g., dummy.

283

@param arguments: Arguments for run_suite command.

Jakob Juelich

2014-10-10 14:08:05 -0700

[diff] [blame]

284

@param use_shard: If true, suite is scheduled for shard board.

Shuqian Zhao

2015-08-06 09:46:22 -0700

[diff] [blame]

285

@param create_and_return: If True, run_suite just creates the suite, print

286

the job id, then finish immediately.

Dan Shi

2016-09-09 13:58:31 -0700

[diff] [blame]

287

@param testbed_test: True to run testbed test. Default is False.

Jakob Juelich

2014-10-10 14:08:05 -0700

[diff] [blame]

288

Dan Shi

2013-07-25 15:08:48 -0700

[diff] [blame]

289

@return: Suite job ID.

290

291

"""

Dan Shi

2016-09-09 13:58:31 -0700

[diff] [blame]

292

if use_shard and not testbed_test:

Jakob Juelich

2014-10-10 14:08:05 -0700

[diff] [blame]

293

board = arguments.shard_board

294

build = arguments.shard_build

Dan Shi

2016-09-09 13:58:31 -0700

[diff] [blame]

295

elif testbed_test:

296

board = arguments.android_board

297

build = arguments.android_build

298

else:

299

board = arguments.board

300

build = arguments.build

Jakob Juelich

2014-10-10 14:08:05 -0700

[diff] [blame]

301

Dan Shi

2014-12-22 16:25:05 -0800

[diff] [blame]

302

# Remove cros-version label to force provision.

Shuqian Zhao

7a49f1b

2016-10-24 16:48:04 -0700

[diff] [blame]

303

hosts = AFE.get_hosts(label=constants.Labels.BOARD_PREFIX+board,

304

locked=False)

Dan Shi

2014-12-22 16:25:05 -0800

[diff] [blame]

305

for host in hosts:

Dan Shi

2016-09-09 13:58:31 -0700

[diff] [blame]

306

labels_to_remove = [

307

l for l in host.labels

308

if (l.startswith(provision.CROS_VERSION_PREFIX) or

309

l.startswith(provision.TESTBED_BUILD_VERSION_PREFIX))]

310

if labels_to_remove:

311

AFE.run('host_remove_labels', id=host.id, labels=labels_to_remove)

Dan Shi

2014-12-22 16:25:05 -0800

[diff] [blame]

312

Shuqian Zhao

d01fad0

2016-11-18 10:00:22 -0800

[diff] [blame]

313

# Test repair work flow on shards, powerwash test will timeout after 7m.

Kevin Cheng

2015-12-11 09:45:57 -0800

[diff] [blame]

314

if use_shard and not create_and_return:

Shuqian Zhao

d01fad0

2016-11-18 10:00:22 -0800

[diff] [blame]

315

powerwash_dut_to_test_repair(host.hostname, timeout=420)

Kevin Cheng

2015-12-11 09:45:57 -0800

[diff] [blame]

316

Dan Shi

2015-04-07 17:37:09 -0700

[diff] [blame]

317

current_dir = os.path.dirname(os.path.realpath(__file__))

318

cmd = [os.path.join(current_dir, RUN_SUITE_COMMAND),

Dan Shi

2013-07-25 15:08:48 -0700

[diff] [blame]

319

'-s', suite_name,

Jakob Juelich

2014-10-10 14:08:05 -0700

[diff] [blame]

320

'-b', board,

321

'-i', build,

Dan Shi

2013-07-25 15:08:48 -0700

[diff] [blame]

322

'-p', arguments.pool,

Shuqian Zhao

178ac01

2016-06-03 15:08:52 -0700

[diff] [blame]

323

'-u', str(arguments.num)]

Shuqian Zhao

2015-08-06 09:46:22 -0700

[diff] [blame]

324

if create_and_return:

325

cmd += ['-c']

Dan Shi

2016-09-09 13:58:31 -0700

[diff] [blame]

326

if testbed_test:

327

cmd += ['--run_prod_code']

Dan Shi

2013-07-25 15:08:48 -0700

[diff] [blame]

328

329

suite_job_id = None

Dan Shi

2013-07-25 15:08:48 -0700

[diff] [blame]

330

331

proc = subprocess.Popen(cmd, stdout=subprocess.PIPE,

332

stderr=subprocess.STDOUT)

333

334

while True:

335

line = proc.stdout.readline()

336

337

# Break when run_suite process completed.

338

if not line and proc.poll() != None:

339

break

340

print line.rstrip()

341

run_suite_output.append(line.rstrip())

342

343

if not suite_job_id:

344

m = re.match(SUITE_JOB_START_INFO_REGEX, line)

345

if m and m.group(1):

346

suite_job_id = int(m.group(1))

Shuqian Zhao

1b4ca27

2016-09-18 14:58:19 -0700

[diff] [blame]

347

all_suite_ids.append(suite_job_id)

Dan Shi

2013-07-25 15:08:48 -0700

[diff] [blame]

348

349

if not suite_job_id:

350

raise TestPushException('Failed to retrieve suite job ID.')

Dan Shi

2014-05-09 15:18:15 -0700

[diff] [blame]

351

Shuqian Zhao

2015-08-06 09:46:22 -0700

[diff] [blame]

352

# If create_and_return specified, wait for the suite to finish.

353

if create_and_return:

354

end = time.time() + arguments.timeout_min * 60

Dan Shi

2016-02-03 11:37:02 -0800

[diff] [blame]

355

while not AFE.get_jobs(id=suite_job_id, finished=True):

Shuqian Zhao

2015-08-06 09:46:22 -0700

[diff] [blame]

356

if time.time() < end:

357

time.sleep(10)

358

else:

Dan Shi

2016-02-03 11:37:02 -0800

[diff] [blame]

359

AFE.run('abort_host_queue_entries', job=suite_job_id)

Shuqian Zhao

2015-08-06 09:46:22 -0700

[diff] [blame]

360

raise TestPushException(

361

'Asynchronous suite triggered by create_and_return '

362

'flag has timed out after %d mins. Aborting it.' %

363

arguments.timeout_min)

364

Dan Shi

2014-05-09 15:18:15 -0700

[diff] [blame]

365

print 'Suite job %s is completed.' % suite_job_id

Dan Shi

2013-07-25 15:08:48 -0700

[diff] [blame]

return suite_job_id

Dan Shi

2014-05-09 15:18:15 -0700

[diff] [blame]

369

def check_dut_image(build, suite_job_id):

370

"""Confirm all DUTs used for the suite are imaged to expected build.

371

372

@param build: Expected build to be imaged.

373

@param suite_job_id: job ID of the suite job.

374

@raise TestPushException: If a DUT does not have expected build imaged.

375

"""

376

print 'Checking image installed in DUTs...'

377

job_ids = [job.id for job in

378

models.Job.objects.filter(parent_job_id=suite_job_id)]

379

hqes = [models.HostQueueEntry.objects.filter(job_id=job_id)[0]

380

for job_id in job_ids]

381

hostnames = set([hqe.host.hostname for hqe in hqes])

382

for hostname in hostnames:

Dan Shi

2016-02-03 11:37:02 -0800

[diff] [blame]

383

found_build = site_utils.get_build_from_afe(hostname, AFE)

Dan Shi

2014-05-09 15:18:15 -0700

[diff] [blame]

384

if found_build != build:

385

raise TestPushException('DUT is not imaged properly. Host %s has '

386

'build %s, while build %s is expected.' %

387

(hostname, found_build, build))

388

389

Shuqian Zhao

2015-08-06 09:46:22 -0700

[diff] [blame]

390

def test_suite(suite_name, expected_results, arguments, use_shard=False,

Dan Shi

2016-09-09 13:58:31 -0700

[diff] [blame]

391

create_and_return=False, testbed_test=False):

Dan Shi

2013-07-25 15:08:48 -0700

[diff] [blame]

392

"""Call run_suite to start a suite job and verify results.

393

394

@param suite_name: Name of a suite, e.g., dummy

395

@param expected_results: A dictionary of test name to test result.

396

@param arguments: Arguments for run_suite command.

Jakob Juelich

2014-10-10 14:08:05 -0700

[diff] [blame]

397

@param use_shard: If true, suite is scheduled for shard board.

Shuqian Zhao

2015-08-06 09:46:22 -0700

[diff] [blame]

398

@param create_and_return: If True, run_suite just creates the suite, print

399

the job id, then finish immediately.

Dan Shi

2016-09-09 13:58:31 -0700

[diff] [blame]

400

@param testbed_test: True to run testbed test. Default is False.

Dan Shi

2013-07-25 15:08:48 -0700

[diff] [blame]

401

"""

Shuqian Zhao

2015-08-06 09:46:22 -0700

[diff] [blame]

402

suite_job_id = do_run_suite(suite_name, arguments, use_shard,

Dan Shi

2016-09-09 13:58:31 -0700

[diff] [blame]

403

create_and_return, testbed_test)

Dan Shi

2013-07-25 15:08:48 -0700

[diff] [blame]

404

Dan Shi

2014-05-09 15:18:15 -0700

[diff] [blame]

405

# Confirm all DUTs used for the suite are imaged to expected build.

Jakob Juelich

2014-10-10 14:08:05 -0700

[diff] [blame]

406

# hqe.host_id for jobs running in shard is not synced back to master db,

407

# therefore, skip verifying dut build for jobs running in shard.

Dan Shi

2016-09-09 13:58:31 -0700

[diff] [blame]

408

build_expected = (arguments.android_build if testbed_test

409

else arguments.build)

410

if suite_name != AU_SUITE and not use_shard and not testbed_test:

411

check_dut_image(build_expected, suite_job_id)

Dan Shi

2014-05-09 15:18:15 -0700

[diff] [blame]

412

Shuqian Zhao

2016-09-12 10:42:03 -0700

[diff] [blame]

413

# Verify test results are the expected results.

414

verify_test_results(suite_job_id, expected_results)

415

416

417

def verify_test_results(job_id, expected_results):

418

"""Verify the test results with the expected results.

419

420

@param job_id: id of the running jobs. For suite job, it is suite_job_id.

421

@param expected_results: A dictionary of test name to test result.

422

@raise TestPushException: If verify fails.

423

"""

Dan Shi

2014-05-09 15:18:15 -0700

[diff] [blame]

424

print 'Comparing test results...'

Shuqian Zhao

2016-09-12 10:42:03 -0700

[diff] [blame]

425

test_views = site_utils.get_test_views_from_tko(job_id, TKO)

Dan Shi

2013-07-25 15:08:48 -0700

[diff] [blame]

426

427

mismatch_errors = []

428

extra_test_errors = []

429

430

found_keys = set()

Shuqian Zhao

2016-09-12 10:42:03 -0700

[diff] [blame]

431

for test_name, test_status in test_views.items():

Dan Shi

2013-07-25 15:08:48 -0700

[diff] [blame]

432

print "%s%s" % (test_name.ljust(30), test_status)

Dan Shi

80b6ec0

2016-07-21 15:49:18 -0700

[diff] [blame]

433

# platform_InstallTestImage test may exist in old builds.

434

if re.search('platform_InstallTestImage_SERVER_JOB$', test_name):

435

continue

Dan Shi

2013-07-25 15:08:48 -0700

[diff] [blame]

436

test_found = False

437

for key,val in expected_results.items():

438

if re.search(key, test_name):

439

test_found = True

440

found_keys.add(key)

Dan Shi

2013-07-25 15:08:48 -0700

[diff] [blame]

441

if val != test_status:

442

error = ('%s Expected: [%s], Actual: [%s]' %

443

(test_name, val, test_status))

444

mismatch_errors.append(error)

445

if not test_found:

446

extra_test_errors.append(test_name)

447

448

missing_test_errors = set(expected_results.keys()) - found_keys

Dan Shi

dc9eb17

2014-12-09 16:05:02 -0800

[diff] [blame]

449

for exception in IGNORE_MISSING_TESTS:

450

try:

451

missing_test_errors.remove(exception)

except KeyError:

pass

Dan Shi

2013-07-25 15:08:48 -0700

[diff] [blame]

455

summary = []

456

if mismatch_errors:

457

summary.append(('Results of %d test(s) do not match expected '

458

'values:') % len(mismatch_errors))

459

summary.extend(mismatch_errors)

460

summary.append('\n')

461

462

if extra_test_errors:

463

summary.append('%d test(s) are not expected to be run:' %

464

len(extra_test_errors))

465

summary.extend(extra_test_errors)

466

summary.append('\n')

467

468

if missing_test_errors:

469

summary.append('%d test(s) are missing from the results:' %

470

len(missing_test_errors))

471

summary.extend(missing_test_errors)

472

summary.append('\n')

473

474

# Test link to log can be loaded.

Shuqian Zhao

2016-09-12 10:42:03 -0700

[diff] [blame]

475

job_name = '%s-%s' % (job_id, getpass.getuser())

Dan Shi

2013-07-25 15:08:48 -0700

[diff] [blame]

476

log_link = URL_PATTERN % (URL_HOST, job_name)

477

try:

478

urllib2.urlopen(log_link).read()

479

except urllib2.URLError:

480

summary.append('Failed to load page for link to log: %s.' % log_link)

481

482

if summary:

483

raise TestPushException('\n'.join(summary))

484

485

Dan Shi

2015-04-07 17:37:09 -0700

[diff] [blame]

486

def test_suite_wrapper(queue, suite_name, expected_results, arguments,

Dan Shi

2016-09-09 13:58:31 -0700

[diff] [blame]

487

use_shard=False, create_and_return=False,

488

testbed_test=False):

Dan Shi

2015-04-07 17:37:09 -0700

[diff] [blame]

489

"""Wrapper to call test_suite. Handle exception and pipe it to parent

490

process.

491

492

@param queue: Queue to save exception to be accessed by parent process.

493

@param suite_name: Name of a suite, e.g., dummy

494

@param expected_results: A dictionary of test name to test result.

495

@param arguments: Arguments for run_suite command.

496

@param use_shard: If true, suite is scheduled for shard board.

Shuqian Zhao

2015-08-06 09:46:22 -0700

[diff] [blame]

497

@param create_and_return: If True, run_suite just creates the suite, print

498

the job id, then finish immediately.

Dan Shi

2016-09-09 13:58:31 -0700

[diff] [blame]

499

@param testbed_test: True to run testbed test. Default is False.

Dan Shi

2015-04-07 17:37:09 -0700

[diff] [blame]

500

"""

501

try:

Shuqian Zhao

2015-08-06 09:46:22 -0700

[diff] [blame]

502

test_suite(suite_name, expected_results, arguments, use_shard,

Dan Shi

2016-09-09 13:58:31 -0700

[diff] [blame]

503

create_and_return, testbed_test)

Dan Shi

2015-04-07 17:37:09 -0700

[diff] [blame]

504

except:

505

# Store the whole exc_info leads to a PicklingError.

506

except_type, except_value, tb = sys.exc_info()

507

queue.put((except_type, except_value, traceback.extract_tb(tb)))

508

509

Dan Shi

2015-04-07 17:37:09 -0700

[diff] [blame]

510

def check_queue(queue):

511

"""Check the queue for any exception being raised.

512

513

@param queue: Queue used to store exception for parent process to access.

514

@raise: Any exception found in the queue.

"""

if queue.empty():

return

exc_info = queue.get()

519

# Raise the exception with original backtrace.

520

print 'Original stack trace of the exception:\n%s' % exc_info[2]

521

raise exc_info[0](exc_info[1])

522

523

Shuqian Zhao

2016-10-25 13:31:06 -0700

[diff] [blame]

524

def get_head_of_repos(repos):

525

"""Get HEAD of updated repos, currently are autotest and chromite repos

526

527

@param repos: a map of repo name to the path of the repo. E.g.

528

{'autotest': '/usr/local/autotest'}

529

@return: a map of repo names to the current HEAD of that repo.

"""

@contextmanager

def cd(new_wd):

"""Helper function to change working directory.

534

535

@param new_wd: new working directory that switch to.

536

"""

537

prev_wd = os.getcwd()

538

os.chdir(os.path.expanduser(new_wd))

try:

yield

finally:

os.chdir(prev_wd)

updated_repo_heads = {}

545

for repo_name, path_to_repo in repos.iteritems():

546

with cd(path_to_repo):

547

head = subprocess.check_output('git rev-parse HEAD',

548

shell=True).strip()

549

updated_repo_heads[repo_name] = head

550

return updated_repo_heads

551

552

Shuqian Zhao

2016-11-11 16:37:36 -0800

[diff] [blame]

553

def push_prod_next_branch(updated_repo_heads):

554

"""push prod-next branch to the tested HEAD after all tests pass.

555

556

The push command must be ran as PUSH_USER, since only PUSH_USER has the

557

right to push branches.

558

559

@param updated_repo_heads: a map of repo names to tested HEAD of that repo.

560

"""

561

# prod-next branch for every repo is downloaded under PUSH_USER home dir.

Shuqian Zhao

aa0301c

2016-11-21 09:46:41 -0800

[diff] [blame]

562

cmd = ('cd ~/{repo}; git pull; git rebase {hash} prod-next;'

563

'git push origin prod-next')

Shuqian Zhao

2016-11-11 16:37:36 -0800

[diff] [blame]

564

run_push_as_push_user = "sudo su - %s -c '%s'" % (PUSH_USER, cmd)

565

566

for repo_name, test_hash in updated_repo_heads.iteritems():

567

push_cmd = run_push_as_push_user.format(hash=test_hash, repo=repo_name)

568

print 'Pushing %s prod-next branch to %s' % (repo_name, test_hash)

569

print subprocess.check_output(push_cmd, stderr=subprocess.STDOUT,

shell=True)

Dan Shi

2013-07-25 15:08:48 -0700

[diff] [blame]

573

def main():

574

"""Entry point for test_push script."""

575

arguments = parse_arguments()

Shuqian Zhao

2016-11-11 16:37:36 -0800

[diff] [blame]

576

updated_repo_heads = get_head_of_repos(UPDATED_REPOS)

577

updated_repo_msg = '\n'.join(

578

['%s: %s' % (k, v) for k, v in updated_repo_heads.iteritems()])

Dan Shi

2013-07-25 15:08:48 -0700

[diff] [blame]

579

580

try:

Shuqian Zhao

2016-09-21 14:20:50 -0700

[diff] [blame]

581

# Use daemon flag will kill child processes when parent process fails.

582

use_daemon = not arguments.continue_on_failure

Shuqian Zhao

2016-12-11 19:10:36 -0800

[diff] [blame]

583

# Verify all the DUTs at the beginning of testing push.

Shuqian Zhao

f794c49

2017-01-06 16:27:23 -0800

[diff] [blame]

584

reverify_all_push_duts(arguments.pool)

Shuqian Zhao

2016-12-11 19:10:36 -0800

[diff] [blame]

585

time.sleep(15) # Wait 15 secs for the verify test to start.

Shuqian Zhao

2016-09-01 19:30:54 -0700

[diff] [blame]

586

check_dut_inventory(arguments.num_duts)

Dan Shi

2015-04-07 17:37:09 -0700

[diff] [blame]

587

queue = multiprocessing.Queue()

588

589

push_to_prod_suite = multiprocessing.Process(

590

target=test_suite_wrapper,

591

args=(queue, PUSH_TO_PROD_SUITE, EXPECTED_TEST_RESULTS,

592

arguments))

Shuqian Zhao

2016-09-21 14:20:50 -0700

[diff] [blame]

593

push_to_prod_suite.daemon = use_daemon

Dan Shi

2015-04-07 17:37:09 -0700

[diff] [blame]

594

push_to_prod_suite.start()

Jakob Juelich

2014-10-10 14:08:05 -0700

[diff] [blame]

595

Dan Shi

2013-07-25 15:08:48 -0700

[diff] [blame]

596

# TODO(dshi): Remove following line after crbug.com/267644 is fixed.

597

# Also, merge EXPECTED_TEST_RESULTS_AU to EXPECTED_TEST_RESULTS

Shuqian Zhao

7de0428

2016-09-23 09:14:13 -0700

[diff] [blame]

598

# AU suite will be on shard until crbug.com/634049 is fixed.

Dan Shi

2015-04-07 17:37:09 -0700

[diff] [blame]

599

au_suite = multiprocessing.Process(

600

target=test_suite_wrapper,

601

args=(queue, AU_SUITE, EXPECTED_TEST_RESULTS_AU,

Shuqian Zhao

7de0428

2016-09-23 09:14:13 -0700

[diff] [blame]

602

arguments, True))

Shuqian Zhao

2016-09-21 14:20:50 -0700

[diff] [blame]

603

au_suite.daemon = use_daemon

Dan Shi

2015-04-07 17:37:09 -0700

[diff] [blame]

604

au_suite.start()

605

Shuqian Zhao

2015-08-06 09:46:22 -0700

[diff] [blame]

606

# suite test with --create_and_return flag

607

asynchronous_suite = multiprocessing.Process(

608

target=test_suite_wrapper,

609

args=(queue, DUMMY_SUITE, EXPECTED_TEST_RESULTS_DUMMY,

Shuqian Zhao

7de0428

2016-09-23 09:14:13 -0700

[diff] [blame]

610

arguments, False, True))

Shuqian Zhao

1b4ca27

2016-09-18 14:58:19 -0700

[diff] [blame]

611

asynchronous_suite.daemon = True

Shuqian Zhao

2015-08-06 09:46:22 -0700

[diff] [blame]

612

asynchronous_suite.start()

613

Dan Shi

c239c0a

2017-03-01 21:50:54 +0000

[diff] [blame^]

614

# Test suite for testbed

615

testbed_suite = multiprocessing.Process(

616

target=test_suite_wrapper,

617

args=(queue, TESTBED_SUITE, EXPECTED_TEST_RESULTS_TESTBED,

618

arguments, False, False, True))

619

testbed_suite.daemon = use_daemon

620

testbed_suite.start()

621

Dan Shi

2015-04-07 17:37:09 -0700

[diff] [blame]

622

while (push_to_prod_suite.is_alive() or au_suite.is_alive() or

Dan Shi

c239c0a

2017-03-01 21:50:54 +0000

[diff] [blame^]

623

asynchronous_suite.is_alive() or testbed_suite.is_alive()):

Dan Shi

2015-04-07 17:37:09 -0700

[diff] [blame]

624

check_queue(queue)

Dan Shi

2015-04-07 17:37:09 -0700

[diff] [blame]

time.sleep(5)

check_queue(queue)

push_to_prod_suite.join()

630

au_suite.join()

Shuqian Zhao

2015-08-06 09:46:22 -0700

[diff] [blame]

631

asynchronous_suite.join()

Dan Shi

c239c0a

2017-03-01 21:50:54 +0000

[diff] [blame^]

632

testbed_suite.join()

Shuqian Zhao

2016-11-11 16:37:36 -0800

[diff] [blame]

633

634

# All tests pass, push prod-next branch for UPDATED_REPOS.

Shuqian Zhao

aa0301c

2016-11-21 09:46:41 -0800

[diff] [blame]

635

push_prod_next_branch(updated_repo_heads)

Dan Shi

2013-07-25 15:08:48 -0700

[diff] [blame]

636

except Exception as e:

637

print 'Test for pushing to prod failed:\n'

638

print str(e)

Shuqian Zhao

2016-09-21 14:20:50 -0700

[diff] [blame]

639

# Abort running jobs when choose not to continue when there is failure.

640

if not arguments.continue_on_failure:

641

for suite_id in all_suite_ids:

642

if AFE.get_jobs(id=suite_id, finished=False):

643

AFE.run('abort_host_queue_entries', job=suite_id)

Dan Shi

2013-07-25 15:08:48 -0700

[diff] [blame]

644

# Send out email about the test failure.

645

if arguments.email:

Dan Shi

2015-03-26 17:54:13 -0700

[diff] [blame]

646

gmail_lib.send_email(

647

arguments.email,

648

'Test for pushing to prod failed. Do NOT push!',

Shuqian Zhao

2016-10-25 13:31:06 -0700

[diff] [blame]

649

('Test CLs of the following repos failed. Below are the '

650

'repos and the corresponding test HEAD.\n\n%s\n\n.'

651

'Error occurred during test:\n\n%s\n\n' %

652

(updated_repo_msg, str(e)) + '\n'.join(run_suite_output)))

Dan Shi

2013-07-25 15:08:48 -0700

[diff] [blame]

653

raise

Shuqian Zhao

f794c49

2017-01-06 16:27:23 -0800

[diff] [blame]

654

finally:

Shuqian Zhao

d2a99f0

2016-09-22 13:31:30 -0700

[diff] [blame]

655

# Reverify all the hosts

Shuqian Zhao

f794c49

2017-01-06 16:27:23 -0800

[diff] [blame]

656

reverify_all_push_duts(arguments.pool)

Dan Shi

2013-07-25 15:08:48 -0700

[diff] [blame]

657

Shuqian Zhao

2016-10-25 13:31:06 -0700

[diff] [blame]

658

message = ('\nAll tests are completed successfully, the prod branch of the '

659

'following repos ready to be pushed to the hash list below.\n'

Aviv Keshet

51172b2

2017-01-30 16:28:57 -0800

[diff] [blame]

660

'%s\n\n\nInstructions for pushing to prod are available at '

661

'https://goto.google.com/autotest-to-prod' % updated_repo_msg)

Dan Shi

2013-07-25 15:08:48 -0700

[diff] [blame]

662

print message

663

# Send out email about test completed successfully.

664

if arguments.email:

Dan Shi

2015-03-26 17:54:13 -0700

[diff] [blame]

665

gmail_lib.send_email(

666

arguments.email,

667

'Test for pushing to prod completed successfully',

668

message)

Dan Shi