blob: adcdbc6b716d2a544ec41748d1d1472bd3c94277 [file] [log] [blame]
Dan Shi7e04fa82013-07-25 15:08:48 -07001#!/usr/bin/python
2#
3# Copyright (c) 2013 The Chromium OS Authors. All rights reserved.
4# Use of this source code is governed by a BSD-style license that can be
5# found in the LICENSE file.
6
7"""Tool to validate code in prod branch before pushing to lab.
8
9The script runs push_to_prod suite to verify code in prod branch is ready to be
10pushed. Link to design document:
11https://docs.google.com/a/google.com/document/d/1JMz0xS3fZRSHMpFkkKAL_rxsdbNZomhHbC3B8L71uuI/edit
12
13To verify if prod branch can be pushed to lab, run following command in
Shuqian Zhaobb030ff2017-09-21 17:36:13 -070014chromeos-staging-master2.hot server:
Michael Liang52d9f1f2014-06-17 15:01:24 -070015/usr/local/autotest/site_utils/test_push.py -e someone@company.com
Dan Shi7e04fa82013-07-25 15:08:48 -070016
Shuqian Zhaof3a114c2016-09-21 11:02:15 -070017The script uses latest gandof stable build as test build by default.
Dan Shi7e04fa82013-07-25 15:08:48 -070018
19"""
20
21import argparse
Shuqian Zhao1f311c02016-09-01 19:30:54 -070022import ast
Shuqian Zhao7b2daea2016-10-25 13:31:06 -070023from contextlib import contextmanager
Dan Shi7e04fa82013-07-25 15:08:48 -070024import getpass
Dan Shief1a5c02015-04-07 17:37:09 -070025import multiprocessing
Dan Shi7e04fa82013-07-25 15:08:48 -070026import os
27import re
28import subprocess
29import sys
Dan Shief1a5c02015-04-07 17:37:09 -070030import time
31import traceback
Dan Shi7e04fa82013-07-25 15:08:48 -070032import urllib2
33
34import common
Dan Shia8da7602014-05-09 15:18:15 -070035try:
36 from autotest_lib.frontend import setup_django_environment
37 from autotest_lib.frontend.afe import models
Shuqian Zhao327b6952016-09-12 10:42:03 -070038 from autotest_lib.frontend.afe import rpc_utils
Dan Shia8da7602014-05-09 15:18:15 -070039except ImportError:
40 # Unittest may not have Django database configured and will fail to import.
41 pass
Dan Shi5fa602c2015-03-26 17:54:13 -070042from autotest_lib.client.common_lib import global_config
Shuqian Zhao327b6952016-09-12 10:42:03 -070043from autotest_lib.client.common_lib import priorities
Shuqian Zhaof239b312017-12-05 16:45:02 -080044from autotest_lib.client.common_lib.cros import retry
Xixuan Wu93e646c2017-12-07 18:36:10 -080045from autotest_lib.server import constants
Dan Shi7e04fa82013-07-25 15:08:48 -070046from autotest_lib.server import site_utils
Shuqian Zhao327b6952016-09-12 10:42:03 -070047from autotest_lib.server import utils
Dan Shi47d32882014-12-22 16:25:05 -080048from autotest_lib.server.cros import provision
Dan Shi7e04fa82013-07-25 15:08:48 -070049from autotest_lib.server.cros.dynamic_suite import frontend_wrappers
Dan Shi7e04fa82013-07-25 15:08:48 -070050
Shuqian Zhao56969542017-05-30 12:56:57 -070051try:
52 from chromite.lib import metrics
53 from chromite.lib import ts_mon_config
54except ImportError:
55 metrics = site_utils.metrics_mock
56 ts_mon_config = site_utils.metrics_mock
57
Shuqian Zhao7b2daea2016-10-25 13:31:06 -070058AUTOTEST_DIR=common.autotest_dir
Dan Shi7e04fa82013-07-25 15:08:48 -070059CONFIG = global_config.global_config
60
Dan Shiefd403e2016-02-03 11:37:02 -080061AFE = frontend_wrappers.RetryingAFE(timeout_min=0.5, delay_sec=2)
Shuqian Zhao327b6952016-09-12 10:42:03 -070062TKO = frontend_wrappers.RetryingTKO(timeout_min=0.1, delay_sec=10)
Dan Shiefd403e2016-02-03 11:37:02 -080063
Dan Shi7e04fa82013-07-25 15:08:48 -070064MAIL_FROM = 'chromeos-test@google.com'
Shuqian Zhao12861662016-08-31 19:23:17 -070065BUILD_REGEX = 'R[\d]+-[\d]+\.[\d]+\.[\d]+'
Dan Shi7e04fa82013-07-25 15:08:48 -070066RUN_SUITE_COMMAND = 'run_suite.py'
67PUSH_TO_PROD_SUITE = 'push_to_prod'
Jakob Juelich8f143912014-10-10 14:08:05 -070068DUMMY_SUITE = 'dummy'
Shuqian Zhao11cedee2017-09-13 16:51:12 -070069TESTBED_SUITE = 'testbed_push'
Shuqian Zhao8ac22e82016-09-22 14:26:18 -070070# TODO(shuqianz): Dynamically get android build after crbug.com/646068 fixed
xixuan2d668582016-06-10 14:02:32 -070071DEFAULT_TIMEOUT_MIN_FOR_SUITE_JOB = 30
Shuqian Zhao12861662016-08-31 19:23:17 -070072IMAGE_BUCKET = CONFIG.get_config_value('CROS', 'image_storage_server')
Xixuan Wu5c84f2d2017-09-21 11:01:23 -070073# TODO(crbug.com/767302): Bump up tesbed requirement back to 1 when we
74# re-enable testbed tests.
Allen Li64edf062017-11-27 15:33:54 -080075DEFAULT_NUM_DUTS = (
76 ('gandof', 4),
77 ('quawks', 2),
78 ('testbed', 0),
79)
Dan Shi7e04fa82013-07-25 15:08:48 -070080
Fang Deng6dddf602014-04-17 17:01:47 -070081SUITE_JOB_START_INFO_REGEX = ('^.*Created suite job:.*'
82 'tab_id=view_job&object_id=(\d+)$')
Dan Shi7e04fa82013-07-25 15:08:48 -070083
84# Dictionary of test results keyed by test name regular expression.
85EXPECTED_TEST_RESULTS = {'^SERVER_JOB$': 'GOOD',
86 # This is related to dummy_Fail/control.dependency.
87 'dummy_Fail.dependency$': 'TEST_NA',
Dan Shidc9eb172014-12-09 16:05:02 -080088 'login_LoginSuccess.*': 'GOOD',
Dan Shi47d32882014-12-22 16:25:05 -080089 'provision_AutoUpdate.double': 'GOOD',
Dan Shi7e04fa82013-07-25 15:08:48 -070090 'dummy_Pass.*': 'GOOD',
91 'dummy_Fail.Fail$': 'FAIL',
92 'dummy_Fail.RetryFail$': 'FAIL',
93 'dummy_Fail.RetrySuccess': 'GOOD',
94 'dummy_Fail.Error$': 'ERROR',
95 'dummy_Fail.Warn$': 'WARN',
96 'dummy_Fail.NAError$': 'TEST_NA',
97 'dummy_Fail.Crash$': 'GOOD',
Aviv Keshetff024f92017-09-26 13:43:14 -070098 'autotest_SyncCount$': 'GOOD',
Dan Shi7e04fa82013-07-25 15:08:48 -070099 }
100
Jakob Juelich8f143912014-10-10 14:08:05 -0700101EXPECTED_TEST_RESULTS_DUMMY = {'^SERVER_JOB$': 'GOOD',
102 'dummy_Pass.*': 'GOOD',
103 'dummy_Fail.Fail': 'FAIL',
104 'dummy_Fail.Warn': 'WARN',
105 'dummy_Fail.Crash': 'GOOD',
106 'dummy_Fail.Error': 'ERROR',
107 'dummy_Fail.NAError': 'TEST_NA',}
108
Dan Shi81ddc422016-09-09 13:58:31 -0700109EXPECTED_TEST_RESULTS_TESTBED = {'^SERVER_JOB$': 'GOOD',
110 'testbed_DummyTest': 'GOOD',}
111
Shuqian Zhao327b6952016-09-12 10:42:03 -0700112EXPECTED_TEST_RESULTS_POWERWASH = {'platform_Powerwash': 'GOOD',
113 'SERVER_JOB': 'GOOD'}
114
Dan Shi7e04fa82013-07-25 15:08:48 -0700115URL_HOST = CONFIG.get_config_value('SERVER', 'hostname', type=str)
116URL_PATTERN = CONFIG.get_config_value('CROS', 'log_url_pattern', type=str)
117
Dan Shidc9eb172014-12-09 16:05:02 -0800118# Some test could be missing from the test results for various reasons. Add
119# such test in this list and explain the reason.
120IGNORE_MISSING_TESTS = [
121 # For latest build, npo_test_delta does not exist.
122 'autoupdate_EndToEndTest.npo_test_delta.*',
123 # For trybot build, nmo_test_delta does not exist.
124 'autoupdate_EndToEndTest.nmo_test_delta.*',
125 # Older build does not have login_LoginSuccess test in push_to_prod suite.
126 # TODO(dshi): Remove following lines after R41 is stable.
127 'login_LoginSuccess']
128
Aviv Keshet0d679eb2017-11-08 13:25:01 -0800129# Multiprocessing proxy objects that are used to share data between background
130# suite-running processes and main process. The multiprocessing-compatible
131# versions are initialized in _main.
132_run_suite_output = []
133_all_suite_ids = []
134
Shuqian Zhao7b2daea2016-10-25 13:31:06 -0700135# A dict maps the name of the updated repos and the path of them.
136UPDATED_REPOS = {'autotest': AUTOTEST_DIR,
137 'chromite': '%s/site-packages/chromite/' % AUTOTEST_DIR}
Shuqian Zhao80d32712016-11-11 16:37:36 -0800138PUSH_USER = 'chromeos-test-lab'
Dan Shi7e04fa82013-07-25 15:08:48 -0700139
140class TestPushException(Exception):
141 """Exception to be raised when the test to push to prod failed."""
142 pass
143
Shuqian Zhaof239b312017-12-05 16:45:02 -0800144@retry.retry(TestPushException, timeout_min=5, delay_sec=30)
145def check_dut_inventory(required_num_duts, pool):
146 """Check DUT inventory for each board in the pool specified..
147
148 @param required_num_duts: a dict specifying the number of DUT each platform
149 requires in order to finish push tests.
150 @param pool: the pool used by test_push.
151 @raise TestPushException: if number of DUTs are less than the requirement.
152 """
153 print 'Checking DUT inventory...'
154 pool_label = constants.Labels.POOL_PREFIX + pool
155 hosts = AFE.run('get_hosts', status='Ready', locked=False)
156 hosts = [h for h in hosts if pool_label in h.get('labels', [])]
157 platforms = [host['platform'] for host in hosts]
158 current_inventory = {p : platforms.count(p) for p in platforms}
159 error_msg = ''
160 for platform, req_num in required_num_duts.items():
161 curr_num = current_inventory.get(platform, 0)
162 if curr_num < req_num:
163 error_msg += ('\nRequire %d %s DUTs in pool: %s, only %d are Ready'
164 ' now' % (req_num, platform, pool, curr_num))
165 if error_msg:
166 raise TestPushException('Not enough DUTs to run push tests. %s' %
167 error_msg)
168
Dan Shi5ba5d2e2014-05-09 13:47:00 -0700169
Shuqian Zhao327b6952016-09-12 10:42:03 -0700170def powerwash_dut_to_test_repair(hostname, timeout):
171 """Powerwash dut to test repair workflow.
Kevin Cheng6e4c2642015-12-11 09:45:57 -0800172
173 @param hostname: hostname of the dut.
Shuqian Zhao327b6952016-09-12 10:42:03 -0700174 @param timeout: seconds of the powerwash test to hit timeout.
175 @raise TestPushException: if DUT fail to run the test.
Kevin Cheng6e4c2642015-12-11 09:45:57 -0800176 """
Shuqian Zhao327b6952016-09-12 10:42:03 -0700177 t = models.Test.objects.get(name='platform_Powerwash')
178 c = utils.read_file(os.path.join(common.autotest_dir, t.path))
179 job_id = rpc_utils.create_job_common(
180 'powerwash', priority=priorities.Priority.SUPER,
181 control_type='Server', control_file=c, hosts=[hostname])
182
Shuqian Zhaoe83a78c2016-09-16 15:01:25 -0700183 end = time.time() + timeout
Shuqian Zhao327b6952016-09-12 10:42:03 -0700184 while not TKO.get_job_test_statuses_from_db(job_id):
Shuqian Zhaoe83a78c2016-09-16 15:01:25 -0700185 if time.time() >= end:
186 AFE.run('abort_host_queue_entries', job=job_id)
Shuqian Zhao327b6952016-09-12 10:42:03 -0700187 raise TestPushException(
Shuqian Zhaoe83a78c2016-09-16 15:01:25 -0700188 'Powerwash test on %s timeout after %ds, abort it.' %
189 (hostname, timeout))
Shuqian Zhao327b6952016-09-12 10:42:03 -0700190 time.sleep(10)
191 verify_test_results(job_id, EXPECTED_TEST_RESULTS_POWERWASH)
192 # Kick off verify, verify will fail and a repair should be triggered.
193 AFE.reverify_hosts(hostnames=[hostname])
Kevin Cheng6e4c2642015-12-11 09:45:57 -0800194
195
Shuqian Zhao06deae02017-02-28 09:55:59 -0800196def reverify_all_push_duts():
197 """Reverify all the push DUTs."""
198 print 'Reverifying all DUTs.'
199 hosts = [h.hostname for h in AFE.get_hosts()]
Shuqian Zhaod2a99f02016-09-22 13:31:30 -0700200 AFE.reverify_hosts(hostnames=hosts)
201
202
Shuqian Zhaobb030ff2017-09-21 17:36:13 -0700203def get_default_build(board='gandof', server='chromeos-staging-master2.hot'):
Dan Shi5ba5d2e2014-05-09 13:47:00 -0700204 """Get the default build to be used for test.
205
Dan Shi8df9c002016-03-08 15:37:39 -0800206 @param board: Name of board to be tested, default is gandof.
207 @return: Build to be tested, e.g., gandof-release/R36-5881.0.0
Dan Shi5ba5d2e2014-05-09 13:47:00 -0700208 """
Dan Shi5ba5d2e2014-05-09 13:47:00 -0700209 build = None
Kevin Chenge691ce92016-12-15 12:17:13 -0800210 cmd = ('%s/cli/atest stable_version list --board=%s -w %s' %
211 (AUTOTEST_DIR, board, server))
Shuqian Zhao12861662016-08-31 19:23:17 -0700212 result = subprocess.check_output(cmd, shell=True).strip()
213 build = re.search(BUILD_REGEX, result)
214 if build:
215 return '%s-release/%s' % (board, build.group(0))
Dan Shi5ba5d2e2014-05-09 13:47:00 -0700216
Shuqian Zhao12861662016-08-31 19:23:17 -0700217 # If fail to get stable version from cautotest, use that defined in config
Dan Shi5ba5d2e2014-05-09 13:47:00 -0700218 build = CONFIG.get_config_value('CROS', 'stable_cros_version')
219 return '%s-release/%s' % (board, build)
220
Dan Shi7e04fa82013-07-25 15:08:48 -0700221def parse_arguments():
222 """Parse arguments for test_push tool.
223
224 @return: Parsed arguments.
225
226 """
227 parser = argparse.ArgumentParser()
Dan Shi8df9c002016-03-08 15:37:39 -0800228 parser.add_argument('-b', '--board', dest='board', default='gandof',
229 help='Default is gandof.')
Jakob Juelich8f143912014-10-10 14:08:05 -0700230 parser.add_argument('-sb', '--shard_board', dest='shard_board',
231 default='quawks',
232 help='Default is quawks.')
Dan Shi7e04fa82013-07-25 15:08:48 -0700233 parser.add_argument('-i', '--build', dest='build', default=None,
Shuqian Zhaof3a114c2016-09-21 11:02:15 -0700234 help='Default is the latest stale build of given '
235 'board. Must be a stable build, otherwise AU test '
236 'will fail. (ex: gandolf-release/R54-8743.25.0)')
Jakob Juelich8f143912014-10-10 14:08:05 -0700237 parser.add_argument('-si', '--shard_build', dest='shard_build', default=None,
Shuqian Zhaof3a114c2016-09-21 11:02:15 -0700238 help='Default is the latest stable build of given '
239 'board. Must be a stable build, otherwise AU test '
Jakob Juelich8f143912014-10-10 14:08:05 -0700240 'will fail.')
Shuqian Zhaobb030ff2017-09-21 17:36:13 -0700241 parser.add_argument('-w', '--web', default='chromeos-staging-master2.hot',
Kevin Chenge691ce92016-12-15 12:17:13 -0800242 help='Specify web server to grab stable version from.')
Dan Shi81ddc422016-09-09 13:58:31 -0700243 parser.add_argument('-ab', '--android_board', dest='android_board',
Shuqian Zhao8ac22e82016-09-22 14:26:18 -0700244 default='shamu-2', help='Android board to test.')
Dan Shi81ddc422016-09-09 13:58:31 -0700245 parser.add_argument('-ai', '--android_build', dest='android_build',
246 help='Android build to test.')
Dan Shi7e04fa82013-07-25 15:08:48 -0700247 parser.add_argument('-p', '--pool', dest='pool', default='bvt')
Shuqian Zhaod4864772015-08-06 09:46:22 -0700248 parser.add_argument('-t', '--timeout_min', dest='timeout_min', type=int,
xixuan2d668582016-06-10 14:02:32 -0700249 default=DEFAULT_TIMEOUT_MIN_FOR_SUITE_JOB,
Shuqian Zhaod4864772015-08-06 09:46:22 -0700250 help='Time in mins to wait before abort the jobs we '
251 'are waiting on. Only for the asynchronous suites '
252 'triggered by create_and_return flag.')
Shuqian Zhao1f311c02016-09-01 19:30:54 -0700253 parser.add_argument('-ud', '--num_duts', dest='num_duts',
Allen Li64edf062017-11-27 15:33:54 -0800254 default=dict(DEFAULT_NUM_DUTS),
255 type=ast.literal_eval,
256 help="Python dict literal that specifies the required"
257 " number of DUTs for each board. E.g {'gandof':4}")
Shuqian Zhao676ed6f2016-09-21 14:20:50 -0700258 parser.add_argument('-c', '--continue_on_failure', action='store_true',
259 dest='continue_on_failure',
260 help='All tests continue to run when there is failure')
Dan Shi7e04fa82013-07-25 15:08:48 -0700261
262 arguments = parser.parse_args(sys.argv[1:])
263
Shuqian Zhaof3a114c2016-09-21 11:02:15 -0700264 # Get latest stable build as default build.
Dan Shi7e04fa82013-07-25 15:08:48 -0700265 if not arguments.build:
Kevin Chenge691ce92016-12-15 12:17:13 -0800266 arguments.build = get_default_build(arguments.board, arguments.web)
Jakob Juelich8f143912014-10-10 14:08:05 -0700267 if not arguments.shard_build:
Kevin Chenge691ce92016-12-15 12:17:13 -0800268 arguments.shard_build = get_default_build(arguments.shard_board,
269 arguments.web)
Dan Shi7e04fa82013-07-25 15:08:48 -0700270
271 return arguments
272
273
Shuqian Zhaod4864772015-08-06 09:46:22 -0700274def do_run_suite(suite_name, arguments, use_shard=False,
Dan Shi81ddc422016-09-09 13:58:31 -0700275 create_and_return=False, testbed_test=False):
Dan Shi7e04fa82013-07-25 15:08:48 -0700276 """Call run_suite to run a suite job, and return the suite job id.
277
278 The script waits the suite job to finish before returning the suite job id.
279 Also it will echo the run_suite output to stdout.
280
281 @param suite_name: Name of a suite, e.g., dummy.
282 @param arguments: Arguments for run_suite command.
Jakob Juelich8f143912014-10-10 14:08:05 -0700283 @param use_shard: If true, suite is scheduled for shard board.
Shuqian Zhaod4864772015-08-06 09:46:22 -0700284 @param create_and_return: If True, run_suite just creates the suite, print
285 the job id, then finish immediately.
Dan Shi81ddc422016-09-09 13:58:31 -0700286 @param testbed_test: True to run testbed test. Default is False.
Jakob Juelich8f143912014-10-10 14:08:05 -0700287
Dan Shi7e04fa82013-07-25 15:08:48 -0700288 @return: Suite job ID.
289
290 """
Dan Shi81ddc422016-09-09 13:58:31 -0700291 if use_shard and not testbed_test:
Jakob Juelich8f143912014-10-10 14:08:05 -0700292 board = arguments.shard_board
293 build = arguments.shard_build
Dan Shi81ddc422016-09-09 13:58:31 -0700294 elif testbed_test:
295 board = arguments.android_board
296 build = arguments.android_build
297 else:
298 board = arguments.board
299 build = arguments.build
Jakob Juelich8f143912014-10-10 14:08:05 -0700300
Dan Shi47d32882014-12-22 16:25:05 -0800301 # Remove cros-version label to force provision.
Shuqian Zhao7a49f1b2016-10-24 16:48:04 -0700302 hosts = AFE.get_hosts(label=constants.Labels.BOARD_PREFIX+board,
303 locked=False)
Dan Shi47d32882014-12-22 16:25:05 -0800304 for host in hosts:
Dan Shi81ddc422016-09-09 13:58:31 -0700305 labels_to_remove = [
306 l for l in host.labels
307 if (l.startswith(provision.CROS_VERSION_PREFIX) or
308 l.startswith(provision.TESTBED_BUILD_VERSION_PREFIX))]
309 if labels_to_remove:
310 AFE.run('host_remove_labels', id=host.id, labels=labels_to_remove)
Dan Shi47d32882014-12-22 16:25:05 -0800311
Shuqian Zhaod01fad02016-11-18 10:00:22 -0800312 # Test repair work flow on shards, powerwash test will timeout after 7m.
Kevin Cheng6e4c2642015-12-11 09:45:57 -0800313 if use_shard and not create_and_return:
Shuqian Zhaod01fad02016-11-18 10:00:22 -0800314 powerwash_dut_to_test_repair(host.hostname, timeout=420)
Kevin Cheng6e4c2642015-12-11 09:45:57 -0800315
Dan Shief1a5c02015-04-07 17:37:09 -0700316 current_dir = os.path.dirname(os.path.realpath(__file__))
317 cmd = [os.path.join(current_dir, RUN_SUITE_COMMAND),
Dan Shi7e04fa82013-07-25 15:08:48 -0700318 '-s', suite_name,
Jakob Juelich8f143912014-10-10 14:08:05 -0700319 '-b', board,
320 '-i', build,
Dan Shi7e04fa82013-07-25 15:08:48 -0700321 '-p', arguments.pool,
Allen Li64edf062017-11-27 15:33:54 -0800322 '--minimum_duts', str(arguments.num_duts[board])]
Shuqian Zhaod4864772015-08-06 09:46:22 -0700323 if create_and_return:
324 cmd += ['-c']
Dan Shi81ddc422016-09-09 13:58:31 -0700325 if testbed_test:
326 cmd += ['--run_prod_code']
Dan Shi7e04fa82013-07-25 15:08:48 -0700327
328 suite_job_id = None
Dan Shi7e04fa82013-07-25 15:08:48 -0700329
330 proc = subprocess.Popen(cmd, stdout=subprocess.PIPE,
331 stderr=subprocess.STDOUT)
332
333 while True:
334 line = proc.stdout.readline()
335
336 # Break when run_suite process completed.
337 if not line and proc.poll() != None:
338 break
339 print line.rstrip()
Aviv Keshet0d679eb2017-11-08 13:25:01 -0800340 _run_suite_output.append(line.rstrip())
Dan Shi7e04fa82013-07-25 15:08:48 -0700341
342 if not suite_job_id:
343 m = re.match(SUITE_JOB_START_INFO_REGEX, line)
344 if m and m.group(1):
345 suite_job_id = int(m.group(1))
Aviv Keshet0d679eb2017-11-08 13:25:01 -0800346 _all_suite_ids.append(suite_job_id)
Dan Shi7e04fa82013-07-25 15:08:48 -0700347
348 if not suite_job_id:
349 raise TestPushException('Failed to retrieve suite job ID.')
Dan Shia8da7602014-05-09 15:18:15 -0700350
Shuqian Zhaod4864772015-08-06 09:46:22 -0700351 # If create_and_return specified, wait for the suite to finish.
352 if create_and_return:
353 end = time.time() + arguments.timeout_min * 60
Dan Shiefd403e2016-02-03 11:37:02 -0800354 while not AFE.get_jobs(id=suite_job_id, finished=True):
Shuqian Zhaod4864772015-08-06 09:46:22 -0700355 if time.time() < end:
356 time.sleep(10)
357 else:
Dan Shiefd403e2016-02-03 11:37:02 -0800358 AFE.run('abort_host_queue_entries', job=suite_job_id)
Shuqian Zhaod4864772015-08-06 09:46:22 -0700359 raise TestPushException(
360 'Asynchronous suite triggered by create_and_return '
361 'flag has timed out after %d mins. Aborting it.' %
362 arguments.timeout_min)
363
Dan Shia8da7602014-05-09 15:18:15 -0700364 print 'Suite job %s is completed.' % suite_job_id
Dan Shi7e04fa82013-07-25 15:08:48 -0700365 return suite_job_id
366
367
Dan Shia8da7602014-05-09 15:18:15 -0700368def check_dut_image(build, suite_job_id):
369 """Confirm all DUTs used for the suite are imaged to expected build.
370
371 @param build: Expected build to be imaged.
372 @param suite_job_id: job ID of the suite job.
373 @raise TestPushException: If a DUT does not have expected build imaged.
374 """
375 print 'Checking image installed in DUTs...'
376 job_ids = [job.id for job in
377 models.Job.objects.filter(parent_job_id=suite_job_id)]
378 hqes = [models.HostQueueEntry.objects.filter(job_id=job_id)[0]
379 for job_id in job_ids]
380 hostnames = set([hqe.host.hostname for hqe in hqes])
381 for hostname in hostnames:
Prathmesh Prabhuf10f41a2017-04-21 11:52:16 -0700382 found_build = site_utils.get_build_from_afe(hostname, AFE)
383 if found_build != build:
Dan Shia8da7602014-05-09 15:18:15 -0700384 raise TestPushException('DUT is not imaged properly. Host %s has '
385 'build %s, while build %s is expected.' %
Prathmesh Prabhuf10f41a2017-04-21 11:52:16 -0700386 (hostname, found_build, build))
Dan Shia8da7602014-05-09 15:18:15 -0700387
388
Shuqian Zhaod4864772015-08-06 09:46:22 -0700389def test_suite(suite_name, expected_results, arguments, use_shard=False,
Dan Shi81ddc422016-09-09 13:58:31 -0700390 create_and_return=False, testbed_test=False):
Dan Shi7e04fa82013-07-25 15:08:48 -0700391 """Call run_suite to start a suite job and verify results.
392
393 @param suite_name: Name of a suite, e.g., dummy
394 @param expected_results: A dictionary of test name to test result.
395 @param arguments: Arguments for run_suite command.
Jakob Juelich8f143912014-10-10 14:08:05 -0700396 @param use_shard: If true, suite is scheduled for shard board.
Shuqian Zhaod4864772015-08-06 09:46:22 -0700397 @param create_and_return: If True, run_suite just creates the suite, print
398 the job id, then finish immediately.
Dan Shi81ddc422016-09-09 13:58:31 -0700399 @param testbed_test: True to run testbed test. Default is False.
Dan Shi7e04fa82013-07-25 15:08:48 -0700400 """
Shuqian Zhaod4864772015-08-06 09:46:22 -0700401 suite_job_id = do_run_suite(suite_name, arguments, use_shard,
Dan Shi81ddc422016-09-09 13:58:31 -0700402 create_and_return, testbed_test)
Dan Shi7e04fa82013-07-25 15:08:48 -0700403
Dan Shia8da7602014-05-09 15:18:15 -0700404 # Confirm all DUTs used for the suite are imaged to expected build.
Jakob Juelich8f143912014-10-10 14:08:05 -0700405 # hqe.host_id for jobs running in shard is not synced back to master db,
406 # therefore, skip verifying dut build for jobs running in shard.
Dan Shi81ddc422016-09-09 13:58:31 -0700407 build_expected = (arguments.android_build if testbed_test
408 else arguments.build)
Aviv Keshetd2359122017-05-03 22:50:10 -0700409 if not use_shard and not testbed_test:
Dan Shi81ddc422016-09-09 13:58:31 -0700410 check_dut_image(build_expected, suite_job_id)
Dan Shia8da7602014-05-09 15:18:15 -0700411
Shuqian Zhao327b6952016-09-12 10:42:03 -0700412 # Verify test results are the expected results.
413 verify_test_results(suite_job_id, expected_results)
414
415
416def verify_test_results(job_id, expected_results):
417 """Verify the test results with the expected results.
418
419 @param job_id: id of the running jobs. For suite job, it is suite_job_id.
420 @param expected_results: A dictionary of test name to test result.
421 @raise TestPushException: If verify fails.
422 """
Dan Shia8da7602014-05-09 15:18:15 -0700423 print 'Comparing test results...'
Shuqian Zhao327b6952016-09-12 10:42:03 -0700424 test_views = site_utils.get_test_views_from_tko(job_id, TKO)
Dan Shi7e04fa82013-07-25 15:08:48 -0700425
426 mismatch_errors = []
427 extra_test_errors = []
428
429 found_keys = set()
Shuqian Zhao327b6952016-09-12 10:42:03 -0700430 for test_name, test_status in test_views.items():
Dan Shi7e04fa82013-07-25 15:08:48 -0700431 print "%s%s" % (test_name.ljust(30), test_status)
Dan Shi80b6ec02016-07-21 15:49:18 -0700432 # platform_InstallTestImage test may exist in old builds.
433 if re.search('platform_InstallTestImage_SERVER_JOB$', test_name):
434 continue
Dan Shi7e04fa82013-07-25 15:08:48 -0700435 test_found = False
436 for key,val in expected_results.items():
437 if re.search(key, test_name):
438 test_found = True
439 found_keys.add(key)
Dan Shi7e04fa82013-07-25 15:08:48 -0700440 if val != test_status:
441 error = ('%s Expected: [%s], Actual: [%s]' %
442 (test_name, val, test_status))
443 mismatch_errors.append(error)
444 if not test_found:
445 extra_test_errors.append(test_name)
446
447 missing_test_errors = set(expected_results.keys()) - found_keys
Dan Shidc9eb172014-12-09 16:05:02 -0800448 for exception in IGNORE_MISSING_TESTS:
449 try:
450 missing_test_errors.remove(exception)
451 except KeyError:
452 pass
453
Dan Shi7e04fa82013-07-25 15:08:48 -0700454 summary = []
455 if mismatch_errors:
456 summary.append(('Results of %d test(s) do not match expected '
457 'values:') % len(mismatch_errors))
458 summary.extend(mismatch_errors)
459 summary.append('\n')
460
461 if extra_test_errors:
462 summary.append('%d test(s) are not expected to be run:' %
463 len(extra_test_errors))
464 summary.extend(extra_test_errors)
465 summary.append('\n')
466
467 if missing_test_errors:
468 summary.append('%d test(s) are missing from the results:' %
469 len(missing_test_errors))
470 summary.extend(missing_test_errors)
471 summary.append('\n')
472
473 # Test link to log can be loaded.
Shuqian Zhao327b6952016-09-12 10:42:03 -0700474 job_name = '%s-%s' % (job_id, getpass.getuser())
Dan Shi7e04fa82013-07-25 15:08:48 -0700475 log_link = URL_PATTERN % (URL_HOST, job_name)
476 try:
477 urllib2.urlopen(log_link).read()
478 except urllib2.URLError:
479 summary.append('Failed to load page for link to log: %s.' % log_link)
480
481 if summary:
482 raise TestPushException('\n'.join(summary))
483
484
Dan Shief1a5c02015-04-07 17:37:09 -0700485def test_suite_wrapper(queue, suite_name, expected_results, arguments,
Dan Shi81ddc422016-09-09 13:58:31 -0700486 use_shard=False, create_and_return=False,
487 testbed_test=False):
Dan Shief1a5c02015-04-07 17:37:09 -0700488 """Wrapper to call test_suite. Handle exception and pipe it to parent
489 process.
490
491 @param queue: Queue to save exception to be accessed by parent process.
492 @param suite_name: Name of a suite, e.g., dummy
493 @param expected_results: A dictionary of test name to test result.
494 @param arguments: Arguments for run_suite command.
495 @param use_shard: If true, suite is scheduled for shard board.
Shuqian Zhaod4864772015-08-06 09:46:22 -0700496 @param create_and_return: If True, run_suite just creates the suite, print
497 the job id, then finish immediately.
Dan Shi81ddc422016-09-09 13:58:31 -0700498 @param testbed_test: True to run testbed test. Default is False.
Dan Shief1a5c02015-04-07 17:37:09 -0700499 """
500 try:
Shuqian Zhaod4864772015-08-06 09:46:22 -0700501 test_suite(suite_name, expected_results, arguments, use_shard,
Dan Shi81ddc422016-09-09 13:58:31 -0700502 create_and_return, testbed_test)
Allen Li64edf062017-11-27 15:33:54 -0800503 except Exception:
Dan Shief1a5c02015-04-07 17:37:09 -0700504 # Store the whole exc_info leads to a PicklingError.
505 except_type, except_value, tb = sys.exc_info()
506 queue.put((except_type, except_value, traceback.extract_tb(tb)))
507
508
Dan Shief1a5c02015-04-07 17:37:09 -0700509def check_queue(queue):
510 """Check the queue for any exception being raised.
511
512 @param queue: Queue used to store exception for parent process to access.
513 @raise: Any exception found in the queue.
514 """
515 if queue.empty():
516 return
517 exc_info = queue.get()
518 # Raise the exception with original backtrace.
519 print 'Original stack trace of the exception:\n%s' % exc_info[2]
520 raise exc_info[0](exc_info[1])
521
522
Shuqian Zhao7b2daea2016-10-25 13:31:06 -0700523def get_head_of_repos(repos):
524 """Get HEAD of updated repos, currently are autotest and chromite repos
525
526 @param repos: a map of repo name to the path of the repo. E.g.
527 {'autotest': '/usr/local/autotest'}
528 @return: a map of repo names to the current HEAD of that repo.
529 """
530 @contextmanager
531 def cd(new_wd):
532 """Helper function to change working directory.
533
534 @param new_wd: new working directory that switch to.
535 """
536 prev_wd = os.getcwd()
537 os.chdir(os.path.expanduser(new_wd))
538 try:
539 yield
540 finally:
541 os.chdir(prev_wd)
542
543 updated_repo_heads = {}
544 for repo_name, path_to_repo in repos.iteritems():
545 with cd(path_to_repo):
546 head = subprocess.check_output('git rev-parse HEAD',
547 shell=True).strip()
548 updated_repo_heads[repo_name] = head
549 return updated_repo_heads
550
551
Shuqian Zhao80d32712016-11-11 16:37:36 -0800552def push_prod_next_branch(updated_repo_heads):
553 """push prod-next branch to the tested HEAD after all tests pass.
554
555 The push command must be ran as PUSH_USER, since only PUSH_USER has the
556 right to push branches.
557
558 @param updated_repo_heads: a map of repo names to tested HEAD of that repo.
559 """
560 # prod-next branch for every repo is downloaded under PUSH_USER home dir.
Shuqian Zhaoaa0301c2016-11-21 09:46:41 -0800561 cmd = ('cd ~/{repo}; git pull; git rebase {hash} prod-next;'
562 'git push origin prod-next')
Shuqian Zhao80d32712016-11-11 16:37:36 -0800563 run_push_as_push_user = "sudo su - %s -c '%s'" % (PUSH_USER, cmd)
564
565 for repo_name, test_hash in updated_repo_heads.iteritems():
566 push_cmd = run_push_as_push_user.format(hash=test_hash, repo=repo_name)
567 print 'Pushing %s prod-next branch to %s' % (repo_name, test_hash)
568 print subprocess.check_output(push_cmd, stderr=subprocess.STDOUT,
569 shell=True)
570
571
Shuqian Zhao56969542017-05-30 12:56:57 -0700572def _main(arguments):
573 """Running tests.
574
575 @param arguments: command line arguments.
576 """
Aviv Keshet0d679eb2017-11-08 13:25:01 -0800577
578 # TODO Use chromite.lib.parallel.Manager instead, to workaround the
579 # too-long-tmp-path problem.
580 mpmanager = multiprocessing.Manager()
581
582 _run_suite_output = mpmanager.list()
583 _all_suite_ids = mpmanager.list()
584
Shuqian Zhao80d32712016-11-11 16:37:36 -0800585 updated_repo_heads = get_head_of_repos(UPDATED_REPOS)
586 updated_repo_msg = '\n'.join(
587 ['%s: %s' % (k, v) for k, v in updated_repo_heads.iteritems()])
Shuqian Zhao56969542017-05-30 12:56:57 -0700588 test_push_success = False
Dan Shi7e04fa82013-07-25 15:08:48 -0700589
590 try:
Shuqian Zhao676ed6f2016-09-21 14:20:50 -0700591 # Use daemon flag will kill child processes when parent process fails.
592 use_daemon = not arguments.continue_on_failure
Shuqian Zhao6fc7bf42016-12-11 19:10:36 -0800593 # Verify all the DUTs at the beginning of testing push.
Shuqian Zhao06deae02017-02-28 09:55:59 -0800594 reverify_all_push_duts()
Shuqian Zhao6fc7bf42016-12-11 19:10:36 -0800595 time.sleep(15) # Wait 15 secs for the verify test to start.
Shuqian Zhaof239b312017-12-05 16:45:02 -0800596 check_dut_inventory(arguments.num_duts, arguments.pool)
Dan Shief1a5c02015-04-07 17:37:09 -0700597 queue = multiprocessing.Queue()
598
599 push_to_prod_suite = multiprocessing.Process(
600 target=test_suite_wrapper,
601 args=(queue, PUSH_TO_PROD_SUITE, EXPECTED_TEST_RESULTS,
602 arguments))
Shuqian Zhao676ed6f2016-09-21 14:20:50 -0700603 push_to_prod_suite.daemon = use_daemon
Dan Shief1a5c02015-04-07 17:37:09 -0700604 push_to_prod_suite.start()
Jakob Juelich8f143912014-10-10 14:08:05 -0700605
Shuqian Zhaod4864772015-08-06 09:46:22 -0700606 # suite test with --create_and_return flag
607 asynchronous_suite = multiprocessing.Process(
608 target=test_suite_wrapper,
609 args=(queue, DUMMY_SUITE, EXPECTED_TEST_RESULTS_DUMMY,
Aviv Keshetd2359122017-05-03 22:50:10 -0700610 arguments, True, True))
Shuqian Zhao1b4ca272016-09-18 14:58:19 -0700611 asynchronous_suite.daemon = True
Shuqian Zhaod4864772015-08-06 09:46:22 -0700612 asynchronous_suite.start()
613
Aviv Keshetd2359122017-05-03 22:50:10 -0700614 while (push_to_prod_suite.is_alive()
Xixuan Wu5c84f2d2017-09-21 11:01:23 -0700615 or asynchronous_suite.is_alive()):
Dan Shief1a5c02015-04-07 17:37:09 -0700616 check_queue(queue)
Dan Shief1a5c02015-04-07 17:37:09 -0700617 time.sleep(5)
618
619 check_queue(queue)
620
621 push_to_prod_suite.join()
Shuqian Zhaod4864772015-08-06 09:46:22 -0700622 asynchronous_suite.join()
Shuqian Zhao80d32712016-11-11 16:37:36 -0800623
624 # All tests pass, push prod-next branch for UPDATED_REPOS.
Shuqian Zhaoaa0301c2016-11-21 09:46:41 -0800625 push_prod_next_branch(updated_repo_heads)
Shuqian Zhao56969542017-05-30 12:56:57 -0700626 test_push_success = True
Dan Shi7e04fa82013-07-25 15:08:48 -0700627 except Exception as e:
628 print 'Test for pushing to prod failed:\n'
629 print str(e)
Shuqian Zhao676ed6f2016-09-21 14:20:50 -0700630 # Abort running jobs when choose not to continue when there is failure.
631 if not arguments.continue_on_failure:
Aviv Keshet0d679eb2017-11-08 13:25:01 -0800632 for suite_id in _all_suite_ids:
Shuqian Zhao676ed6f2016-09-21 14:20:50 -0700633 if AFE.get_jobs(id=suite_id, finished=False):
634 AFE.run('abort_host_queue_entries', job=suite_id)
Dan Shi7e04fa82013-07-25 15:08:48 -0700635 raise
Shuqian Zhaof794c492017-01-06 16:27:23 -0800636 finally:
Shuqian Zhao56969542017-05-30 12:56:57 -0700637 metrics.Counter('chromeos/autotest/test_push/completed').increment(
638 fields={'success': test_push_success})
Shuqian Zhaod2a99f02016-09-22 13:31:30 -0700639 # Reverify all the hosts
Shuqian Zhao06deae02017-02-28 09:55:59 -0800640 reverify_all_push_duts()
Dan Shi7e04fa82013-07-25 15:08:48 -0700641
Prathmesh Prabhu39bf0a62017-08-29 22:03:19 -0700642 message = ('\nAll tests completed successfully, the prod branch of the '
643 'following repos is ready to be pushed to the hash list below.\n'
Aviv Keshet51172b22017-01-30 16:28:57 -0800644 '%s\n\n\nInstructions for pushing to prod are available at '
Shuqian Zhao3002e6e2017-05-02 18:56:14 -0700645 'https://goto.google.com/autotest-to-prod ' % updated_repo_msg)
Dan Shi7e04fa82013-07-25 15:08:48 -0700646 print message
Dan Shi7e04fa82013-07-25 15:08:48 -0700647
648
Shuqian Zhao56969542017-05-30 12:56:57 -0700649def main():
650 """Entry point."""
651 arguments = parse_arguments()
Shuqian Zhao034d85e2017-06-01 11:57:39 -0700652 with ts_mon_config.SetupTsMonGlobalState(service_name='test_push',
653 indirect=True):
Shuqian Zhao56969542017-05-30 12:56:57 -0700654 return _main(arguments)
655
Dan Shi7e04fa82013-07-25 15:08:48 -0700656if __name__ == '__main__':
657 sys.exit(main())