blob: 2118c9eef7190906418d7cfa6a09dfce935666d6 [file] [log] [blame]
Dan Shi7e04fa82013-07-25 15:08:48 -07001#!/usr/bin/python
2#
3# Copyright (c) 2013 The Chromium OS Authors. All rights reserved.
4# Use of this source code is governed by a BSD-style license that can be
5# found in the LICENSE file.
6
7"""Tool to validate code in prod branch before pushing to lab.
8
9The script runs push_to_prod suite to verify code in prod branch is ready to be
10pushed. Link to design document:
11https://docs.google.com/a/google.com/document/d/1JMz0xS3fZRSHMpFkkKAL_rxsdbNZomhHbC3B8L71uuI/edit
12
13To verify if prod branch can be pushed to lab, run following command in
14chromeos-autotest.cbf server:
Michael Liang52d9f1f2014-06-17 15:01:24 -070015/usr/local/autotest/site_utils/test_push.py -e someone@company.com
Dan Shi7e04fa82013-07-25 15:08:48 -070016
Shuqian Zhaof3a114c2016-09-21 11:02:15 -070017The script uses latest gandof stable build as test build by default.
Dan Shi7e04fa82013-07-25 15:08:48 -070018
19"""
20
21import argparse
Shuqian Zhao1f311c02016-09-01 19:30:54 -070022import ast
Shuqian Zhao7b2daea2016-10-25 13:31:06 -070023from contextlib import contextmanager
Dan Shi7e04fa82013-07-25 15:08:48 -070024import getpass
Dan Shief1a5c02015-04-07 17:37:09 -070025import multiprocessing
Dan Shi7e04fa82013-07-25 15:08:48 -070026import os
27import re
28import subprocess
29import sys
Dan Shief1a5c02015-04-07 17:37:09 -070030import time
31import traceback
Dan Shi7e04fa82013-07-25 15:08:48 -070032import urllib2
33
34import common
Dan Shia8da7602014-05-09 15:18:15 -070035try:
36 from autotest_lib.frontend import setup_django_environment
37 from autotest_lib.frontend.afe import models
Shuqian Zhao327b6952016-09-12 10:42:03 -070038 from autotest_lib.frontend.afe import rpc_utils
Dan Shia8da7602014-05-09 15:18:15 -070039except ImportError:
40 # Unittest may not have Django database configured and will fail to import.
41 pass
Dan Shi5fa602c2015-03-26 17:54:13 -070042from autotest_lib.client.common_lib import global_config
Shuqian Zhao327b6952016-09-12 10:42:03 -070043from autotest_lib.client.common_lib import priorities
Shuqian Zhao6fc7bf42016-12-11 19:10:36 -080044from autotest_lib.client.common_lib.cros import retry
Dan Shi7e04fa82013-07-25 15:08:48 -070045from autotest_lib.server import site_utils
Shuqian Zhao327b6952016-09-12 10:42:03 -070046from autotest_lib.server import utils
Dan Shi47d32882014-12-22 16:25:05 -080047from autotest_lib.server.cros import provision
Dan Shi7e04fa82013-07-25 15:08:48 -070048from autotest_lib.server.cros.dynamic_suite import frontend_wrappers
Dan Shi5fa602c2015-03-26 17:54:13 -070049from autotest_lib.site_utils import gmail_lib
Dan Shi47d32882014-12-22 16:25:05 -080050from autotest_lib.site_utils.suite_scheduler import constants
Dan Shi7e04fa82013-07-25 15:08:48 -070051
Shuqian Zhao7b2daea2016-10-25 13:31:06 -070052AUTOTEST_DIR=common.autotest_dir
Dan Shi7e04fa82013-07-25 15:08:48 -070053CONFIG = global_config.global_config
54
Dan Shiefd403e2016-02-03 11:37:02 -080055AFE = frontend_wrappers.RetryingAFE(timeout_min=0.5, delay_sec=2)
Shuqian Zhao327b6952016-09-12 10:42:03 -070056TKO = frontend_wrappers.RetryingTKO(timeout_min=0.1, delay_sec=10)
Dan Shiefd403e2016-02-03 11:37:02 -080057
Dan Shi7e04fa82013-07-25 15:08:48 -070058MAIL_FROM = 'chromeos-test@google.com'
Shuqian Zhao12861662016-08-31 19:23:17 -070059BUILD_REGEX = 'R[\d]+-[\d]+\.[\d]+\.[\d]+'
Dan Shi7e04fa82013-07-25 15:08:48 -070060RUN_SUITE_COMMAND = 'run_suite.py'
61PUSH_TO_PROD_SUITE = 'push_to_prod'
Jakob Juelich8f143912014-10-10 14:08:05 -070062DUMMY_SUITE = 'dummy'
Shuqian Zhaof3a114c2016-09-21 11:02:15 -070063AU_SUITE = 'paygen_au_beta'
Dan Shi81ddc422016-09-09 13:58:31 -070064TESTBED_SUITE = 'testbed_push'
Shuqian Zhao8ac22e82016-09-22 14:26:18 -070065# TODO(shuqianz): Dynamically get android build after crbug.com/646068 fixed
xixuan2d668582016-06-10 14:02:32 -070066DEFAULT_TIMEOUT_MIN_FOR_SUITE_JOB = 30
Shuqian Zhao12861662016-08-31 19:23:17 -070067IMAGE_BUCKET = CONFIG.get_config_value('CROS', 'image_storage_server')
Shuqian Zhao8ac22e82016-09-22 14:26:18 -070068DEFAULT_EMAIL = CONFIG.get_config_value(
69 'SCHEDULER', 'notify_email', type=str, default='')
Shuqian Zhao671c65f2016-11-18 15:15:53 -080070DEFAULT_NUM_DUTS = "{'gandof': 4, 'quawks': 2, 'testbed': 1}"
Dan Shi7e04fa82013-07-25 15:08:48 -070071
Fang Deng6dddf602014-04-17 17:01:47 -070072SUITE_JOB_START_INFO_REGEX = ('^.*Created suite job:.*'
73 'tab_id=view_job&object_id=(\d+)$')
Dan Shi7e04fa82013-07-25 15:08:48 -070074
75# Dictionary of test results keyed by test name regular expression.
76EXPECTED_TEST_RESULTS = {'^SERVER_JOB$': 'GOOD',
77 # This is related to dummy_Fail/control.dependency.
78 'dummy_Fail.dependency$': 'TEST_NA',
Dan Shidc9eb172014-12-09 16:05:02 -080079 'login_LoginSuccess.*': 'GOOD',
Dan Shi47d32882014-12-22 16:25:05 -080080 'provision_AutoUpdate.double': 'GOOD',
Dan Shi7e04fa82013-07-25 15:08:48 -070081 'dummy_Pass.*': 'GOOD',
82 'dummy_Fail.Fail$': 'FAIL',
83 'dummy_Fail.RetryFail$': 'FAIL',
84 'dummy_Fail.RetrySuccess': 'GOOD',
85 'dummy_Fail.Error$': 'ERROR',
86 'dummy_Fail.Warn$': 'WARN',
87 'dummy_Fail.NAError$': 'TEST_NA',
88 'dummy_Fail.Crash$': 'GOOD',
89 }
90
Jakob Juelich8f143912014-10-10 14:08:05 -070091EXPECTED_TEST_RESULTS_DUMMY = {'^SERVER_JOB$': 'GOOD',
92 'dummy_Pass.*': 'GOOD',
93 'dummy_Fail.Fail': 'FAIL',
94 'dummy_Fail.Warn': 'WARN',
95 'dummy_Fail.Crash': 'GOOD',
96 'dummy_Fail.Error': 'ERROR',
97 'dummy_Fail.NAError': 'TEST_NA',}
98
Dan Shi7e04fa82013-07-25 15:08:48 -070099EXPECTED_TEST_RESULTS_AU = {'SERVER_JOB$': 'GOOD',
Shuqian Zhaof3a114c2016-09-21 11:02:15 -0700100 'autoupdate_EndToEndTest.paygen_au_beta_delta.*': 'GOOD',
101 'autoupdate_EndToEndTest.paygen_au_beta_full.*': 'GOOD',
Dan Shi7e04fa82013-07-25 15:08:48 -0700102 }
103
Dan Shi81ddc422016-09-09 13:58:31 -0700104EXPECTED_TEST_RESULTS_TESTBED = {'^SERVER_JOB$': 'GOOD',
105 'testbed_DummyTest': 'GOOD',}
106
Shuqian Zhao327b6952016-09-12 10:42:03 -0700107EXPECTED_TEST_RESULTS_POWERWASH = {'platform_Powerwash': 'GOOD',
108 'SERVER_JOB': 'GOOD'}
109
Dan Shi7e04fa82013-07-25 15:08:48 -0700110URL_HOST = CONFIG.get_config_value('SERVER', 'hostname', type=str)
111URL_PATTERN = CONFIG.get_config_value('CROS', 'log_url_pattern', type=str)
112
Dan Shidc9eb172014-12-09 16:05:02 -0800113# Some test could be missing from the test results for various reasons. Add
114# such test in this list and explain the reason.
115IGNORE_MISSING_TESTS = [
116 # For latest build, npo_test_delta does not exist.
117 'autoupdate_EndToEndTest.npo_test_delta.*',
118 # For trybot build, nmo_test_delta does not exist.
119 'autoupdate_EndToEndTest.nmo_test_delta.*',
120 # Older build does not have login_LoginSuccess test in push_to_prod suite.
121 # TODO(dshi): Remove following lines after R41 is stable.
122 'login_LoginSuccess']
123
Dan Shi7e04fa82013-07-25 15:08:48 -0700124# Save all run_suite command output.
Shuqian Zhao7b682192016-09-16 14:38:41 -0700125manager = multiprocessing.Manager()
126run_suite_output = manager.list()
Shuqian Zhao1b4ca272016-09-18 14:58:19 -0700127all_suite_ids = manager.list()
Shuqian Zhao7b2daea2016-10-25 13:31:06 -0700128# A dict maps the name of the updated repos and the path of them.
129UPDATED_REPOS = {'autotest': AUTOTEST_DIR,
130 'chromite': '%s/site-packages/chromite/' % AUTOTEST_DIR}
Shuqian Zhao80d32712016-11-11 16:37:36 -0800131PUSH_USER = 'chromeos-test-lab'
Dan Shi7e04fa82013-07-25 15:08:48 -0700132
133class TestPushException(Exception):
134 """Exception to be raised when the test to push to prod failed."""
135 pass
136
Dan Shi5ba5d2e2014-05-09 13:47:00 -0700137
Shuqian Zhao6fc7bf42016-12-11 19:10:36 -0800138@retry.retry(TestPushException, timeout_min=5, delay_sec=30)
Shuqian Zhao1f311c02016-09-01 19:30:54 -0700139def check_dut_inventory(required_num_duts):
140 """Check DUT inventory for each board.
141
Shuqian Zhaoa7fa5b62016-11-18 11:13:16 -0800142 @param required_num_duts: a dict specifying the number of DUT each platform
Shuqian Zhao1f311c02016-09-01 19:30:54 -0700143 requires in order to finish push tests.
144 @raise TestPushException: if number of DUTs are less than the requirement.
145 """
Shuqian Zhao6fc7bf42016-12-11 19:10:36 -0800146 print 'Checking DUT inventory...'
Shuqian Zhao1f311c02016-09-01 19:30:54 -0700147 hosts = AFE.run('get_hosts', status='Ready', locked=False)
Shuqian Zhaoa7fa5b62016-11-18 11:13:16 -0800148 platforms = [host['platform'] for host in hosts]
149 current_inventory = {p : platforms.count(p) for p in platforms}
Shuqian Zhao1f311c02016-09-01 19:30:54 -0700150 error_msg = ''
Shuqian Zhaoa7fa5b62016-11-18 11:13:16 -0800151 for platform, req_num in required_num_duts.items():
152 curr_num = current_inventory.get(platform, 0)
Shuqian Zhao1f311c02016-09-01 19:30:54 -0700153 if curr_num < req_num:
154 error_msg += ('\nRequire %d %s DUTs, only %d are Ready now' %
Shuqian Zhaoa7fa5b62016-11-18 11:13:16 -0800155 (req_num, platform, curr_num))
Shuqian Zhao1f311c02016-09-01 19:30:54 -0700156 if error_msg:
157 raise TestPushException('Not enough DUTs to run push tests. %s' %
158 error_msg)
159
160
Shuqian Zhao327b6952016-09-12 10:42:03 -0700161def powerwash_dut_to_test_repair(hostname, timeout):
162 """Powerwash dut to test repair workflow.
Kevin Cheng6e4c2642015-12-11 09:45:57 -0800163
164 @param hostname: hostname of the dut.
Shuqian Zhao327b6952016-09-12 10:42:03 -0700165 @param timeout: seconds of the powerwash test to hit timeout.
166 @raise TestPushException: if DUT fail to run the test.
Kevin Cheng6e4c2642015-12-11 09:45:57 -0800167 """
Shuqian Zhao327b6952016-09-12 10:42:03 -0700168 t = models.Test.objects.get(name='platform_Powerwash')
169 c = utils.read_file(os.path.join(common.autotest_dir, t.path))
170 job_id = rpc_utils.create_job_common(
171 'powerwash', priority=priorities.Priority.SUPER,
172 control_type='Server', control_file=c, hosts=[hostname])
173
Shuqian Zhaoe83a78c2016-09-16 15:01:25 -0700174 end = time.time() + timeout
Shuqian Zhao327b6952016-09-12 10:42:03 -0700175 while not TKO.get_job_test_statuses_from_db(job_id):
Shuqian Zhaoe83a78c2016-09-16 15:01:25 -0700176 if time.time() >= end:
177 AFE.run('abort_host_queue_entries', job=job_id)
Shuqian Zhao327b6952016-09-12 10:42:03 -0700178 raise TestPushException(
Shuqian Zhaoe83a78c2016-09-16 15:01:25 -0700179 'Powerwash test on %s timeout after %ds, abort it.' %
180 (hostname, timeout))
Shuqian Zhao327b6952016-09-12 10:42:03 -0700181 time.sleep(10)
182 verify_test_results(job_id, EXPECTED_TEST_RESULTS_POWERWASH)
183 # Kick off verify, verify will fail and a repair should be triggered.
184 AFE.reverify_hosts(hostnames=[hostname])
Kevin Cheng6e4c2642015-12-11 09:45:57 -0800185
186
Shuqian Zhaod2a99f02016-09-22 13:31:30 -0700187def reverify_all_push_duts(pool):
188 """Reverify all the push DUTs.
189
190 @param pool: Name of the pool used by test_push.
191 """
Shuqian Zhao6fc7bf42016-12-11 19:10:36 -0800192 print 'Reverifying DUTs in pool %s' % pool
Shuqian Zhaod2a99f02016-09-22 13:31:30 -0700193 pool_label = constants.Labels.POOL_PREFIX + pool
194 hosts = [h.hostname for h in AFE.get_hosts(label=pool_label)]
195 AFE.reverify_hosts(hostnames=hosts)
196
197
Kevin Chenge691ce92016-12-15 12:17:13 -0800198def get_default_build(board='gandof', server='chromeos-autotest.hot'):
Dan Shi5ba5d2e2014-05-09 13:47:00 -0700199 """Get the default build to be used for test.
200
Dan Shi8df9c002016-03-08 15:37:39 -0800201 @param board: Name of board to be tested, default is gandof.
202 @return: Build to be tested, e.g., gandof-release/R36-5881.0.0
Dan Shi5ba5d2e2014-05-09 13:47:00 -0700203 """
Dan Shi5ba5d2e2014-05-09 13:47:00 -0700204 build = None
Kevin Chenge691ce92016-12-15 12:17:13 -0800205 cmd = ('%s/cli/atest stable_version list --board=%s -w %s' %
206 (AUTOTEST_DIR, board, server))
Shuqian Zhao12861662016-08-31 19:23:17 -0700207 result = subprocess.check_output(cmd, shell=True).strip()
208 build = re.search(BUILD_REGEX, result)
209 if build:
210 return '%s-release/%s' % (board, build.group(0))
Dan Shi5ba5d2e2014-05-09 13:47:00 -0700211
Shuqian Zhao12861662016-08-31 19:23:17 -0700212 # If fail to get stable version from cautotest, use that defined in config
Dan Shi5ba5d2e2014-05-09 13:47:00 -0700213 build = CONFIG.get_config_value('CROS', 'stable_cros_version')
214 return '%s-release/%s' % (board, build)
215
Dan Shi7e04fa82013-07-25 15:08:48 -0700216def parse_arguments():
217 """Parse arguments for test_push tool.
218
219 @return: Parsed arguments.
220
221 """
222 parser = argparse.ArgumentParser()
Dan Shi8df9c002016-03-08 15:37:39 -0800223 parser.add_argument('-b', '--board', dest='board', default='gandof',
224 help='Default is gandof.')
Jakob Juelich8f143912014-10-10 14:08:05 -0700225 parser.add_argument('-sb', '--shard_board', dest='shard_board',
226 default='quawks',
227 help='Default is quawks.')
Dan Shi7e04fa82013-07-25 15:08:48 -0700228 parser.add_argument('-i', '--build', dest='build', default=None,
Shuqian Zhaof3a114c2016-09-21 11:02:15 -0700229 help='Default is the latest stale build of given '
230 'board. Must be a stable build, otherwise AU test '
231 'will fail. (ex: gandolf-release/R54-8743.25.0)')
Jakob Juelich8f143912014-10-10 14:08:05 -0700232 parser.add_argument('-si', '--shard_build', dest='shard_build', default=None,
Shuqian Zhaof3a114c2016-09-21 11:02:15 -0700233 help='Default is the latest stable build of given '
234 'board. Must be a stable build, otherwise AU test '
Jakob Juelich8f143912014-10-10 14:08:05 -0700235 'will fail.')
Kevin Chenge691ce92016-12-15 12:17:13 -0800236 parser.add_argument('-w', '--web', default='chromeos-autotest.hot',
237 help='Specify web server to grab stable version from.')
Dan Shi81ddc422016-09-09 13:58:31 -0700238 parser.add_argument('-ab', '--android_board', dest='android_board',
Shuqian Zhao8ac22e82016-09-22 14:26:18 -0700239 default='shamu-2', help='Android board to test.')
Dan Shi81ddc422016-09-09 13:58:31 -0700240 parser.add_argument('-ai', '--android_build', dest='android_build',
241 help='Android build to test.')
Dan Shi7e04fa82013-07-25 15:08:48 -0700242 parser.add_argument('-p', '--pool', dest='pool', default='bvt')
243 parser.add_argument('-u', '--num', dest='num', type=int, default=3,
244 help='Run on at most NUM machines.')
Shuqian Zhao8ac22e82016-09-22 14:26:18 -0700245 parser.add_argument('-e', '--email', dest='email', default=DEFAULT_EMAIL,
Dan Shi7e04fa82013-07-25 15:08:48 -0700246 help='Email address for the notification to be sent to '
247 'after the script finished running.')
Shuqian Zhaod4864772015-08-06 09:46:22 -0700248 parser.add_argument('-t', '--timeout_min', dest='timeout_min', type=int,
xixuan2d668582016-06-10 14:02:32 -0700249 default=DEFAULT_TIMEOUT_MIN_FOR_SUITE_JOB,
Shuqian Zhaod4864772015-08-06 09:46:22 -0700250 help='Time in mins to wait before abort the jobs we '
251 'are waiting on. Only for the asynchronous suites '
252 'triggered by create_and_return flag.')
Shuqian Zhao1f311c02016-09-01 19:30:54 -0700253 parser.add_argument('-ud', '--num_duts', dest='num_duts',
254 default=DEFAULT_NUM_DUTS,
255 help="String of dict that indicates the required number"
256 " of DUTs for each board. E.g {'gandof':4}")
Shuqian Zhao676ed6f2016-09-21 14:20:50 -0700257 parser.add_argument('-c', '--continue_on_failure', action='store_true',
258 dest='continue_on_failure',
259 help='All tests continue to run when there is failure')
Dan Shi7e04fa82013-07-25 15:08:48 -0700260
261 arguments = parser.parse_args(sys.argv[1:])
262
Shuqian Zhaof3a114c2016-09-21 11:02:15 -0700263 # Get latest stable build as default build.
Dan Shi7e04fa82013-07-25 15:08:48 -0700264 if not arguments.build:
Kevin Chenge691ce92016-12-15 12:17:13 -0800265 arguments.build = get_default_build(arguments.board, arguments.web)
Jakob Juelich8f143912014-10-10 14:08:05 -0700266 if not arguments.shard_build:
Kevin Chenge691ce92016-12-15 12:17:13 -0800267 arguments.shard_build = get_default_build(arguments.shard_board,
268 arguments.web)
Dan Shi7e04fa82013-07-25 15:08:48 -0700269
Shuqian Zhao1f311c02016-09-01 19:30:54 -0700270 arguments.num_duts = ast.literal_eval(arguments.num_duts)
271
Dan Shi7e04fa82013-07-25 15:08:48 -0700272 return arguments
273
274
Shuqian Zhaod4864772015-08-06 09:46:22 -0700275def do_run_suite(suite_name, arguments, use_shard=False,
Dan Shi81ddc422016-09-09 13:58:31 -0700276 create_and_return=False, testbed_test=False):
Dan Shi7e04fa82013-07-25 15:08:48 -0700277 """Call run_suite to run a suite job, and return the suite job id.
278
279 The script waits the suite job to finish before returning the suite job id.
280 Also it will echo the run_suite output to stdout.
281
282 @param suite_name: Name of a suite, e.g., dummy.
283 @param arguments: Arguments for run_suite command.
Jakob Juelich8f143912014-10-10 14:08:05 -0700284 @param use_shard: If true, suite is scheduled for shard board.
Shuqian Zhaod4864772015-08-06 09:46:22 -0700285 @param create_and_return: If True, run_suite just creates the suite, print
286 the job id, then finish immediately.
Dan Shi81ddc422016-09-09 13:58:31 -0700287 @param testbed_test: True to run testbed test. Default is False.
Jakob Juelich8f143912014-10-10 14:08:05 -0700288
Dan Shi7e04fa82013-07-25 15:08:48 -0700289 @return: Suite job ID.
290
291 """
Dan Shi81ddc422016-09-09 13:58:31 -0700292 if use_shard and not testbed_test:
Jakob Juelich8f143912014-10-10 14:08:05 -0700293 board = arguments.shard_board
294 build = arguments.shard_build
Dan Shi81ddc422016-09-09 13:58:31 -0700295 elif testbed_test:
296 board = arguments.android_board
297 build = arguments.android_build
298 else:
299 board = arguments.board
300 build = arguments.build
Jakob Juelich8f143912014-10-10 14:08:05 -0700301
Dan Shi47d32882014-12-22 16:25:05 -0800302 # Remove cros-version label to force provision.
Shuqian Zhao7a49f1b2016-10-24 16:48:04 -0700303 hosts = AFE.get_hosts(label=constants.Labels.BOARD_PREFIX+board,
304 locked=False)
Dan Shi47d32882014-12-22 16:25:05 -0800305 for host in hosts:
Dan Shi81ddc422016-09-09 13:58:31 -0700306 labels_to_remove = [
307 l for l in host.labels
308 if (l.startswith(provision.CROS_VERSION_PREFIX) or
309 l.startswith(provision.TESTBED_BUILD_VERSION_PREFIX))]
310 if labels_to_remove:
311 AFE.run('host_remove_labels', id=host.id, labels=labels_to_remove)
Dan Shi47d32882014-12-22 16:25:05 -0800312
Shuqian Zhaod01fad02016-11-18 10:00:22 -0800313 # Test repair work flow on shards, powerwash test will timeout after 7m.
Kevin Cheng6e4c2642015-12-11 09:45:57 -0800314 if use_shard and not create_and_return:
Shuqian Zhaod01fad02016-11-18 10:00:22 -0800315 powerwash_dut_to_test_repair(host.hostname, timeout=420)
Kevin Cheng6e4c2642015-12-11 09:45:57 -0800316
Dan Shief1a5c02015-04-07 17:37:09 -0700317 current_dir = os.path.dirname(os.path.realpath(__file__))
318 cmd = [os.path.join(current_dir, RUN_SUITE_COMMAND),
Dan Shi7e04fa82013-07-25 15:08:48 -0700319 '-s', suite_name,
Jakob Juelich8f143912014-10-10 14:08:05 -0700320 '-b', board,
321 '-i', build,
Dan Shi7e04fa82013-07-25 15:08:48 -0700322 '-p', arguments.pool,
Shuqian Zhao178ac012016-06-03 15:08:52 -0700323 '-u', str(arguments.num)]
Shuqian Zhaod4864772015-08-06 09:46:22 -0700324 if create_and_return:
325 cmd += ['-c']
Dan Shi81ddc422016-09-09 13:58:31 -0700326 if testbed_test:
327 cmd += ['--run_prod_code']
Dan Shi7e04fa82013-07-25 15:08:48 -0700328
329 suite_job_id = None
Dan Shi7e04fa82013-07-25 15:08:48 -0700330
331 proc = subprocess.Popen(cmd, stdout=subprocess.PIPE,
332 stderr=subprocess.STDOUT)
333
334 while True:
335 line = proc.stdout.readline()
336
337 # Break when run_suite process completed.
338 if not line and proc.poll() != None:
339 break
340 print line.rstrip()
341 run_suite_output.append(line.rstrip())
342
343 if not suite_job_id:
344 m = re.match(SUITE_JOB_START_INFO_REGEX, line)
345 if m and m.group(1):
346 suite_job_id = int(m.group(1))
Shuqian Zhao1b4ca272016-09-18 14:58:19 -0700347 all_suite_ids.append(suite_job_id)
Dan Shi7e04fa82013-07-25 15:08:48 -0700348
349 if not suite_job_id:
350 raise TestPushException('Failed to retrieve suite job ID.')
Dan Shia8da7602014-05-09 15:18:15 -0700351
Shuqian Zhaod4864772015-08-06 09:46:22 -0700352 # If create_and_return specified, wait for the suite to finish.
353 if create_and_return:
354 end = time.time() + arguments.timeout_min * 60
Dan Shiefd403e2016-02-03 11:37:02 -0800355 while not AFE.get_jobs(id=suite_job_id, finished=True):
Shuqian Zhaod4864772015-08-06 09:46:22 -0700356 if time.time() < end:
357 time.sleep(10)
358 else:
Dan Shiefd403e2016-02-03 11:37:02 -0800359 AFE.run('abort_host_queue_entries', job=suite_job_id)
Shuqian Zhaod4864772015-08-06 09:46:22 -0700360 raise TestPushException(
361 'Asynchronous suite triggered by create_and_return '
362 'flag has timed out after %d mins. Aborting it.' %
363 arguments.timeout_min)
364
Dan Shia8da7602014-05-09 15:18:15 -0700365 print 'Suite job %s is completed.' % suite_job_id
Dan Shi7e04fa82013-07-25 15:08:48 -0700366 return suite_job_id
367
368
Dan Shia8da7602014-05-09 15:18:15 -0700369def check_dut_image(build, suite_job_id):
370 """Confirm all DUTs used for the suite are imaged to expected build.
371
372 @param build: Expected build to be imaged.
373 @param suite_job_id: job ID of the suite job.
374 @raise TestPushException: If a DUT does not have expected build imaged.
375 """
376 print 'Checking image installed in DUTs...'
377 job_ids = [job.id for job in
378 models.Job.objects.filter(parent_job_id=suite_job_id)]
379 hqes = [models.HostQueueEntry.objects.filter(job_id=job_id)[0]
380 for job_id in job_ids]
381 hostnames = set([hqe.host.hostname for hqe in hqes])
382 for hostname in hostnames:
Dan Shiefd403e2016-02-03 11:37:02 -0800383 found_build = site_utils.get_build_from_afe(hostname, AFE)
Dan Shia8da7602014-05-09 15:18:15 -0700384 if found_build != build:
385 raise TestPushException('DUT is not imaged properly. Host %s has '
386 'build %s, while build %s is expected.' %
387 (hostname, found_build, build))
388
389
Shuqian Zhaod4864772015-08-06 09:46:22 -0700390def test_suite(suite_name, expected_results, arguments, use_shard=False,
Dan Shi81ddc422016-09-09 13:58:31 -0700391 create_and_return=False, testbed_test=False):
Dan Shi7e04fa82013-07-25 15:08:48 -0700392 """Call run_suite to start a suite job and verify results.
393
394 @param suite_name: Name of a suite, e.g., dummy
395 @param expected_results: A dictionary of test name to test result.
396 @param arguments: Arguments for run_suite command.
Jakob Juelich8f143912014-10-10 14:08:05 -0700397 @param use_shard: If true, suite is scheduled for shard board.
Shuqian Zhaod4864772015-08-06 09:46:22 -0700398 @param create_and_return: If True, run_suite just creates the suite, print
399 the job id, then finish immediately.
Dan Shi81ddc422016-09-09 13:58:31 -0700400 @param testbed_test: True to run testbed test. Default is False.
Dan Shi7e04fa82013-07-25 15:08:48 -0700401 """
Shuqian Zhaod4864772015-08-06 09:46:22 -0700402 suite_job_id = do_run_suite(suite_name, arguments, use_shard,
Dan Shi81ddc422016-09-09 13:58:31 -0700403 create_and_return, testbed_test)
Dan Shi7e04fa82013-07-25 15:08:48 -0700404
Dan Shia8da7602014-05-09 15:18:15 -0700405 # Confirm all DUTs used for the suite are imaged to expected build.
Jakob Juelich8f143912014-10-10 14:08:05 -0700406 # hqe.host_id for jobs running in shard is not synced back to master db,
407 # therefore, skip verifying dut build for jobs running in shard.
Dan Shi81ddc422016-09-09 13:58:31 -0700408 build_expected = (arguments.android_build if testbed_test
409 else arguments.build)
410 if suite_name != AU_SUITE and not use_shard and not testbed_test:
411 check_dut_image(build_expected, suite_job_id)
Dan Shia8da7602014-05-09 15:18:15 -0700412
Shuqian Zhao327b6952016-09-12 10:42:03 -0700413 # Verify test results are the expected results.
414 verify_test_results(suite_job_id, expected_results)
415
416
417def verify_test_results(job_id, expected_results):
418 """Verify the test results with the expected results.
419
420 @param job_id: id of the running jobs. For suite job, it is suite_job_id.
421 @param expected_results: A dictionary of test name to test result.
422 @raise TestPushException: If verify fails.
423 """
Dan Shia8da7602014-05-09 15:18:15 -0700424 print 'Comparing test results...'
Shuqian Zhao327b6952016-09-12 10:42:03 -0700425 test_views = site_utils.get_test_views_from_tko(job_id, TKO)
Dan Shi7e04fa82013-07-25 15:08:48 -0700426
427 mismatch_errors = []
428 extra_test_errors = []
429
430 found_keys = set()
Shuqian Zhao327b6952016-09-12 10:42:03 -0700431 for test_name, test_status in test_views.items():
Dan Shi7e04fa82013-07-25 15:08:48 -0700432 print "%s%s" % (test_name.ljust(30), test_status)
Dan Shi80b6ec02016-07-21 15:49:18 -0700433 # platform_InstallTestImage test may exist in old builds.
434 if re.search('platform_InstallTestImage_SERVER_JOB$', test_name):
435 continue
Dan Shi7e04fa82013-07-25 15:08:48 -0700436 test_found = False
437 for key,val in expected_results.items():
438 if re.search(key, test_name):
439 test_found = True
440 found_keys.add(key)
Dan Shi7e04fa82013-07-25 15:08:48 -0700441 if val != test_status:
442 error = ('%s Expected: [%s], Actual: [%s]' %
443 (test_name, val, test_status))
444 mismatch_errors.append(error)
445 if not test_found:
446 extra_test_errors.append(test_name)
447
448 missing_test_errors = set(expected_results.keys()) - found_keys
Dan Shidc9eb172014-12-09 16:05:02 -0800449 for exception in IGNORE_MISSING_TESTS:
450 try:
451 missing_test_errors.remove(exception)
452 except KeyError:
453 pass
454
Dan Shi7e04fa82013-07-25 15:08:48 -0700455 summary = []
456 if mismatch_errors:
457 summary.append(('Results of %d test(s) do not match expected '
458 'values:') % len(mismatch_errors))
459 summary.extend(mismatch_errors)
460 summary.append('\n')
461
462 if extra_test_errors:
463 summary.append('%d test(s) are not expected to be run:' %
464 len(extra_test_errors))
465 summary.extend(extra_test_errors)
466 summary.append('\n')
467
468 if missing_test_errors:
469 summary.append('%d test(s) are missing from the results:' %
470 len(missing_test_errors))
471 summary.extend(missing_test_errors)
472 summary.append('\n')
473
474 # Test link to log can be loaded.
Shuqian Zhao327b6952016-09-12 10:42:03 -0700475 job_name = '%s-%s' % (job_id, getpass.getuser())
Dan Shi7e04fa82013-07-25 15:08:48 -0700476 log_link = URL_PATTERN % (URL_HOST, job_name)
477 try:
478 urllib2.urlopen(log_link).read()
479 except urllib2.URLError:
480 summary.append('Failed to load page for link to log: %s.' % log_link)
481
482 if summary:
483 raise TestPushException('\n'.join(summary))
484
485
Dan Shief1a5c02015-04-07 17:37:09 -0700486def test_suite_wrapper(queue, suite_name, expected_results, arguments,
Dan Shi81ddc422016-09-09 13:58:31 -0700487 use_shard=False, create_and_return=False,
488 testbed_test=False):
Dan Shief1a5c02015-04-07 17:37:09 -0700489 """Wrapper to call test_suite. Handle exception and pipe it to parent
490 process.
491
492 @param queue: Queue to save exception to be accessed by parent process.
493 @param suite_name: Name of a suite, e.g., dummy
494 @param expected_results: A dictionary of test name to test result.
495 @param arguments: Arguments for run_suite command.
496 @param use_shard: If true, suite is scheduled for shard board.
Shuqian Zhaod4864772015-08-06 09:46:22 -0700497 @param create_and_return: If True, run_suite just creates the suite, print
498 the job id, then finish immediately.
Dan Shi81ddc422016-09-09 13:58:31 -0700499 @param testbed_test: True to run testbed test. Default is False.
Dan Shief1a5c02015-04-07 17:37:09 -0700500 """
501 try:
Shuqian Zhaod4864772015-08-06 09:46:22 -0700502 test_suite(suite_name, expected_results, arguments, use_shard,
Dan Shi81ddc422016-09-09 13:58:31 -0700503 create_and_return, testbed_test)
Dan Shief1a5c02015-04-07 17:37:09 -0700504 except:
505 # Store the whole exc_info leads to a PicklingError.
506 except_type, except_value, tb = sys.exc_info()
507 queue.put((except_type, except_value, traceback.extract_tb(tb)))
508
509
Dan Shief1a5c02015-04-07 17:37:09 -0700510def check_queue(queue):
511 """Check the queue for any exception being raised.
512
513 @param queue: Queue used to store exception for parent process to access.
514 @raise: Any exception found in the queue.
515 """
516 if queue.empty():
517 return
518 exc_info = queue.get()
519 # Raise the exception with original backtrace.
520 print 'Original stack trace of the exception:\n%s' % exc_info[2]
521 raise exc_info[0](exc_info[1])
522
523
Shuqian Zhao7b2daea2016-10-25 13:31:06 -0700524def get_head_of_repos(repos):
525 """Get HEAD of updated repos, currently are autotest and chromite repos
526
527 @param repos: a map of repo name to the path of the repo. E.g.
528 {'autotest': '/usr/local/autotest'}
529 @return: a map of repo names to the current HEAD of that repo.
530 """
531 @contextmanager
532 def cd(new_wd):
533 """Helper function to change working directory.
534
535 @param new_wd: new working directory that switch to.
536 """
537 prev_wd = os.getcwd()
538 os.chdir(os.path.expanduser(new_wd))
539 try:
540 yield
541 finally:
542 os.chdir(prev_wd)
543
544 updated_repo_heads = {}
545 for repo_name, path_to_repo in repos.iteritems():
546 with cd(path_to_repo):
547 head = subprocess.check_output('git rev-parse HEAD',
548 shell=True).strip()
549 updated_repo_heads[repo_name] = head
550 return updated_repo_heads
551
552
Shuqian Zhao80d32712016-11-11 16:37:36 -0800553def push_prod_next_branch(updated_repo_heads):
554 """push prod-next branch to the tested HEAD after all tests pass.
555
556 The push command must be ran as PUSH_USER, since only PUSH_USER has the
557 right to push branches.
558
559 @param updated_repo_heads: a map of repo names to tested HEAD of that repo.
560 """
561 # prod-next branch for every repo is downloaded under PUSH_USER home dir.
Shuqian Zhaoaa0301c2016-11-21 09:46:41 -0800562 cmd = ('cd ~/{repo}; git pull; git rebase {hash} prod-next;'
563 'git push origin prod-next')
Shuqian Zhao80d32712016-11-11 16:37:36 -0800564 run_push_as_push_user = "sudo su - %s -c '%s'" % (PUSH_USER, cmd)
565
566 for repo_name, test_hash in updated_repo_heads.iteritems():
567 push_cmd = run_push_as_push_user.format(hash=test_hash, repo=repo_name)
568 print 'Pushing %s prod-next branch to %s' % (repo_name, test_hash)
569 print subprocess.check_output(push_cmd, stderr=subprocess.STDOUT,
570 shell=True)
571
572
Dan Shi7e04fa82013-07-25 15:08:48 -0700573def main():
574 """Entry point for test_push script."""
575 arguments = parse_arguments()
Shuqian Zhao80d32712016-11-11 16:37:36 -0800576 updated_repo_heads = get_head_of_repos(UPDATED_REPOS)
577 updated_repo_msg = '\n'.join(
578 ['%s: %s' % (k, v) for k, v in updated_repo_heads.iteritems()])
Dan Shi7e04fa82013-07-25 15:08:48 -0700579
580 try:
Shuqian Zhao676ed6f2016-09-21 14:20:50 -0700581 # Use daemon flag will kill child processes when parent process fails.
582 use_daemon = not arguments.continue_on_failure
Shuqian Zhao6fc7bf42016-12-11 19:10:36 -0800583 # Verify all the DUTs at the beginning of testing push.
Shuqian Zhaof794c492017-01-06 16:27:23 -0800584 reverify_all_push_duts(arguments.pool)
Shuqian Zhao6fc7bf42016-12-11 19:10:36 -0800585 time.sleep(15) # Wait 15 secs for the verify test to start.
Shuqian Zhao1f311c02016-09-01 19:30:54 -0700586 check_dut_inventory(arguments.num_duts)
Dan Shief1a5c02015-04-07 17:37:09 -0700587 queue = multiprocessing.Queue()
588
589 push_to_prod_suite = multiprocessing.Process(
590 target=test_suite_wrapper,
591 args=(queue, PUSH_TO_PROD_SUITE, EXPECTED_TEST_RESULTS,
592 arguments))
Shuqian Zhao676ed6f2016-09-21 14:20:50 -0700593 push_to_prod_suite.daemon = use_daemon
Dan Shief1a5c02015-04-07 17:37:09 -0700594 push_to_prod_suite.start()
Jakob Juelich8f143912014-10-10 14:08:05 -0700595
Dan Shi7e04fa82013-07-25 15:08:48 -0700596 # TODO(dshi): Remove following line after crbug.com/267644 is fixed.
597 # Also, merge EXPECTED_TEST_RESULTS_AU to EXPECTED_TEST_RESULTS
Shuqian Zhao7de04282016-09-23 09:14:13 -0700598 # AU suite will be on shard until crbug.com/634049 is fixed.
Dan Shief1a5c02015-04-07 17:37:09 -0700599 au_suite = multiprocessing.Process(
600 target=test_suite_wrapper,
601 args=(queue, AU_SUITE, EXPECTED_TEST_RESULTS_AU,
Shuqian Zhao7de04282016-09-23 09:14:13 -0700602 arguments, True))
Shuqian Zhao676ed6f2016-09-21 14:20:50 -0700603 au_suite.daemon = use_daemon
Dan Shief1a5c02015-04-07 17:37:09 -0700604 au_suite.start()
605
Shuqian Zhaod4864772015-08-06 09:46:22 -0700606 # suite test with --create_and_return flag
607 asynchronous_suite = multiprocessing.Process(
608 target=test_suite_wrapper,
609 args=(queue, DUMMY_SUITE, EXPECTED_TEST_RESULTS_DUMMY,
Shuqian Zhao7de04282016-09-23 09:14:13 -0700610 arguments, False, True))
Shuqian Zhao1b4ca272016-09-18 14:58:19 -0700611 asynchronous_suite.daemon = True
Shuqian Zhaod4864772015-08-06 09:46:22 -0700612 asynchronous_suite.start()
613
Dan Shic239c0a2017-03-01 21:50:54 +0000614 # Test suite for testbed
615 testbed_suite = multiprocessing.Process(
616 target=test_suite_wrapper,
617 args=(queue, TESTBED_SUITE, EXPECTED_TEST_RESULTS_TESTBED,
618 arguments, False, False, True))
619 testbed_suite.daemon = use_daemon
620 testbed_suite.start()
621
Dan Shief1a5c02015-04-07 17:37:09 -0700622 while (push_to_prod_suite.is_alive() or au_suite.is_alive() or
Dan Shic239c0a2017-03-01 21:50:54 +0000623 asynchronous_suite.is_alive() or testbed_suite.is_alive()):
Dan Shief1a5c02015-04-07 17:37:09 -0700624 check_queue(queue)
Dan Shief1a5c02015-04-07 17:37:09 -0700625 time.sleep(5)
626
627 check_queue(queue)
628
629 push_to_prod_suite.join()
630 au_suite.join()
Shuqian Zhaod4864772015-08-06 09:46:22 -0700631 asynchronous_suite.join()
Dan Shic239c0a2017-03-01 21:50:54 +0000632 testbed_suite.join()
Shuqian Zhao80d32712016-11-11 16:37:36 -0800633
634 # All tests pass, push prod-next branch for UPDATED_REPOS.
Shuqian Zhaoaa0301c2016-11-21 09:46:41 -0800635 push_prod_next_branch(updated_repo_heads)
Dan Shi7e04fa82013-07-25 15:08:48 -0700636 except Exception as e:
637 print 'Test for pushing to prod failed:\n'
638 print str(e)
Shuqian Zhao676ed6f2016-09-21 14:20:50 -0700639 # Abort running jobs when choose not to continue when there is failure.
640 if not arguments.continue_on_failure:
641 for suite_id in all_suite_ids:
642 if AFE.get_jobs(id=suite_id, finished=False):
643 AFE.run('abort_host_queue_entries', job=suite_id)
Dan Shi7e04fa82013-07-25 15:08:48 -0700644 # Send out email about the test failure.
645 if arguments.email:
Dan Shi5fa602c2015-03-26 17:54:13 -0700646 gmail_lib.send_email(
647 arguments.email,
648 'Test for pushing to prod failed. Do NOT push!',
Shuqian Zhao7b2daea2016-10-25 13:31:06 -0700649 ('Test CLs of the following repos failed. Below are the '
650 'repos and the corresponding test HEAD.\n\n%s\n\n.'
651 'Error occurred during test:\n\n%s\n\n' %
652 (updated_repo_msg, str(e)) + '\n'.join(run_suite_output)))
Dan Shi7e04fa82013-07-25 15:08:48 -0700653 raise
Shuqian Zhaof794c492017-01-06 16:27:23 -0800654 finally:
Shuqian Zhaod2a99f02016-09-22 13:31:30 -0700655 # Reverify all the hosts
Shuqian Zhaof794c492017-01-06 16:27:23 -0800656 reverify_all_push_duts(arguments.pool)
Dan Shi7e04fa82013-07-25 15:08:48 -0700657
Shuqian Zhao7b2daea2016-10-25 13:31:06 -0700658 message = ('\nAll tests are completed successfully, the prod branch of the '
659 'following repos ready to be pushed to the hash list below.\n'
Aviv Keshet51172b22017-01-30 16:28:57 -0800660 '%s\n\n\nInstructions for pushing to prod are available at '
661 'https://goto.google.com/autotest-to-prod' % updated_repo_msg)
Dan Shi7e04fa82013-07-25 15:08:48 -0700662 print message
663 # Send out email about test completed successfully.
664 if arguments.email:
Dan Shi5fa602c2015-03-26 17:54:13 -0700665 gmail_lib.send_email(
666 arguments.email,
667 'Test for pushing to prod completed successfully',
668 message)
Dan Shi7e04fa82013-07-25 15:08:48 -0700669
670
671if __name__ == '__main__':
672 sys.exit(main())