blob: 961d2756c97d9ca4a574046ef607e1ece677a7da [file] [log] [blame]
Dan Shi7e04fa82013-07-25 15:08:48 -07001#!/usr/bin/python
2#
3# Copyright (c) 2013 The Chromium OS Authors. All rights reserved.
4# Use of this source code is governed by a BSD-style license that can be
5# found in the LICENSE file.
6
7"""Tool to validate code in prod branch before pushing to lab.
8
9The script runs push_to_prod suite to verify code in prod branch is ready to be
10pushed. Link to design document:
11https://docs.google.com/a/google.com/document/d/1JMz0xS3fZRSHMpFkkKAL_rxsdbNZomhHbC3B8L71uuI/edit
12
13To verify if prod branch can be pushed to lab, run following command in
14chromeos-autotest.cbf server:
Michael Liang52d9f1f2014-06-17 15:01:24 -070015/usr/local/autotest/site_utils/test_push.py -e someone@company.com
Dan Shi7e04fa82013-07-25 15:08:48 -070016
Shuqian Zhaof3a114c2016-09-21 11:02:15 -070017The script uses latest gandof stable build as test build by default.
Dan Shi7e04fa82013-07-25 15:08:48 -070018
19"""
20
21import argparse
Shuqian Zhao1f311c02016-09-01 19:30:54 -070022import ast
Shuqian Zhao7b2daea2016-10-25 13:31:06 -070023from contextlib import contextmanager
Dan Shi7e04fa82013-07-25 15:08:48 -070024import getpass
Dan Shief1a5c02015-04-07 17:37:09 -070025import multiprocessing
Dan Shi7e04fa82013-07-25 15:08:48 -070026import os
27import re
28import subprocess
29import sys
Dan Shief1a5c02015-04-07 17:37:09 -070030import time
31import traceback
Dan Shi7e04fa82013-07-25 15:08:48 -070032import urllib2
33
34import common
Dan Shia8da7602014-05-09 15:18:15 -070035try:
36 from autotest_lib.frontend import setup_django_environment
37 from autotest_lib.frontend.afe import models
Shuqian Zhao327b6952016-09-12 10:42:03 -070038 from autotest_lib.frontend.afe import rpc_utils
Dan Shia8da7602014-05-09 15:18:15 -070039except ImportError:
40 # Unittest may not have Django database configured and will fail to import.
41 pass
Dan Shi5fa602c2015-03-26 17:54:13 -070042from autotest_lib.client.common_lib import global_config
Shuqian Zhao327b6952016-09-12 10:42:03 -070043from autotest_lib.client.common_lib import priorities
Dan Shi7e04fa82013-07-25 15:08:48 -070044from autotest_lib.server import site_utils
Shuqian Zhao327b6952016-09-12 10:42:03 -070045from autotest_lib.server import utils
Dan Shi47d32882014-12-22 16:25:05 -080046from autotest_lib.server.cros import provision
Dan Shi7e04fa82013-07-25 15:08:48 -070047from autotest_lib.server.cros.dynamic_suite import frontend_wrappers
Dan Shi5fa602c2015-03-26 17:54:13 -070048from autotest_lib.site_utils import gmail_lib
Dan Shi47d32882014-12-22 16:25:05 -080049from autotest_lib.site_utils.suite_scheduler import constants
Dan Shi7e04fa82013-07-25 15:08:48 -070050
Shuqian Zhao7b2daea2016-10-25 13:31:06 -070051AUTOTEST_DIR=common.autotest_dir
Dan Shi7e04fa82013-07-25 15:08:48 -070052CONFIG = global_config.global_config
53
Dan Shiefd403e2016-02-03 11:37:02 -080054AFE = frontend_wrappers.RetryingAFE(timeout_min=0.5, delay_sec=2)
Shuqian Zhao327b6952016-09-12 10:42:03 -070055TKO = frontend_wrappers.RetryingTKO(timeout_min=0.1, delay_sec=10)
Dan Shiefd403e2016-02-03 11:37:02 -080056
Dan Shi7e04fa82013-07-25 15:08:48 -070057MAIL_FROM = 'chromeos-test@google.com'
Shuqian Zhao12861662016-08-31 19:23:17 -070058BUILD_REGEX = 'R[\d]+-[\d]+\.[\d]+\.[\d]+'
Dan Shi7e04fa82013-07-25 15:08:48 -070059RUN_SUITE_COMMAND = 'run_suite.py'
60PUSH_TO_PROD_SUITE = 'push_to_prod'
Jakob Juelich8f143912014-10-10 14:08:05 -070061DUMMY_SUITE = 'dummy'
Shuqian Zhaof3a114c2016-09-21 11:02:15 -070062AU_SUITE = 'paygen_au_beta'
Dan Shi81ddc422016-09-09 13:58:31 -070063TESTBED_SUITE = 'testbed_push'
Shuqian Zhao8ac22e82016-09-22 14:26:18 -070064# TODO(shuqianz): Dynamically get android build after crbug.com/646068 fixed
xixuan2d668582016-06-10 14:02:32 -070065DEFAULT_TIMEOUT_MIN_FOR_SUITE_JOB = 30
Shuqian Zhao12861662016-08-31 19:23:17 -070066IMAGE_BUCKET = CONFIG.get_config_value('CROS', 'image_storage_server')
Shuqian Zhao8ac22e82016-09-22 14:26:18 -070067DEFAULT_EMAIL = CONFIG.get_config_value(
68 'SCHEDULER', 'notify_email', type=str, default='')
Shuqian Zhaoa7fa5b62016-11-18 11:13:16 -080069DEFAULT_NUM_DUTS = "{'gandof': 4, 'quawks': 2, 'testbed': 2}"
Dan Shi7e04fa82013-07-25 15:08:48 -070070
Fang Deng6dddf602014-04-17 17:01:47 -070071SUITE_JOB_START_INFO_REGEX = ('^.*Created suite job:.*'
72 'tab_id=view_job&object_id=(\d+)$')
Dan Shi7e04fa82013-07-25 15:08:48 -070073
74# Dictionary of test results keyed by test name regular expression.
75EXPECTED_TEST_RESULTS = {'^SERVER_JOB$': 'GOOD',
76 # This is related to dummy_Fail/control.dependency.
77 'dummy_Fail.dependency$': 'TEST_NA',
Dan Shidc9eb172014-12-09 16:05:02 -080078 'login_LoginSuccess.*': 'GOOD',
Dan Shi47d32882014-12-22 16:25:05 -080079 'provision_AutoUpdate.double': 'GOOD',
Dan Shi7e04fa82013-07-25 15:08:48 -070080 'dummy_Pass.*': 'GOOD',
81 'dummy_Fail.Fail$': 'FAIL',
82 'dummy_Fail.RetryFail$': 'FAIL',
83 'dummy_Fail.RetrySuccess': 'GOOD',
84 'dummy_Fail.Error$': 'ERROR',
85 'dummy_Fail.Warn$': 'WARN',
86 'dummy_Fail.NAError$': 'TEST_NA',
87 'dummy_Fail.Crash$': 'GOOD',
88 }
89
Jakob Juelich8f143912014-10-10 14:08:05 -070090EXPECTED_TEST_RESULTS_DUMMY = {'^SERVER_JOB$': 'GOOD',
91 'dummy_Pass.*': 'GOOD',
92 'dummy_Fail.Fail': 'FAIL',
93 'dummy_Fail.Warn': 'WARN',
94 'dummy_Fail.Crash': 'GOOD',
95 'dummy_Fail.Error': 'ERROR',
96 'dummy_Fail.NAError': 'TEST_NA',}
97
Dan Shi7e04fa82013-07-25 15:08:48 -070098EXPECTED_TEST_RESULTS_AU = {'SERVER_JOB$': 'GOOD',
Shuqian Zhaof3a114c2016-09-21 11:02:15 -070099 'autoupdate_EndToEndTest.paygen_au_beta_delta.*': 'GOOD',
100 'autoupdate_EndToEndTest.paygen_au_beta_full.*': 'GOOD',
Dan Shi7e04fa82013-07-25 15:08:48 -0700101 }
102
Dan Shi81ddc422016-09-09 13:58:31 -0700103EXPECTED_TEST_RESULTS_TESTBED = {'^SERVER_JOB$': 'GOOD',
104 'testbed_DummyTest': 'GOOD',}
105
Shuqian Zhao327b6952016-09-12 10:42:03 -0700106EXPECTED_TEST_RESULTS_POWERWASH = {'platform_Powerwash': 'GOOD',
107 'SERVER_JOB': 'GOOD'}
108
Dan Shi7e04fa82013-07-25 15:08:48 -0700109URL_HOST = CONFIG.get_config_value('SERVER', 'hostname', type=str)
110URL_PATTERN = CONFIG.get_config_value('CROS', 'log_url_pattern', type=str)
111
Dan Shidc9eb172014-12-09 16:05:02 -0800112# Some test could be missing from the test results for various reasons. Add
113# such test in this list and explain the reason.
114IGNORE_MISSING_TESTS = [
115 # For latest build, npo_test_delta does not exist.
116 'autoupdate_EndToEndTest.npo_test_delta.*',
117 # For trybot build, nmo_test_delta does not exist.
118 'autoupdate_EndToEndTest.nmo_test_delta.*',
119 # Older build does not have login_LoginSuccess test in push_to_prod suite.
120 # TODO(dshi): Remove following lines after R41 is stable.
121 'login_LoginSuccess']
122
Dan Shi7e04fa82013-07-25 15:08:48 -0700123# Save all run_suite command output.
Shuqian Zhao7b682192016-09-16 14:38:41 -0700124manager = multiprocessing.Manager()
125run_suite_output = manager.list()
Shuqian Zhao1b4ca272016-09-18 14:58:19 -0700126all_suite_ids = manager.list()
Shuqian Zhao7b2daea2016-10-25 13:31:06 -0700127# A dict maps the name of the updated repos and the path of them.
128UPDATED_REPOS = {'autotest': AUTOTEST_DIR,
129 'chromite': '%s/site-packages/chromite/' % AUTOTEST_DIR}
Shuqian Zhao80d32712016-11-11 16:37:36 -0800130PUSH_USER = 'chromeos-test-lab'
Dan Shi7e04fa82013-07-25 15:08:48 -0700131
132class TestPushException(Exception):
133 """Exception to be raised when the test to push to prod failed."""
134 pass
135
Dan Shi5ba5d2e2014-05-09 13:47:00 -0700136
Shuqian Zhao1f311c02016-09-01 19:30:54 -0700137def check_dut_inventory(required_num_duts):
138 """Check DUT inventory for each board.
139
Shuqian Zhaoa7fa5b62016-11-18 11:13:16 -0800140 @param required_num_duts: a dict specifying the number of DUT each platform
Shuqian Zhao1f311c02016-09-01 19:30:54 -0700141 requires in order to finish push tests.
142 @raise TestPushException: if number of DUTs are less than the requirement.
143 """
144 hosts = AFE.run('get_hosts', status='Ready', locked=False)
Shuqian Zhaoa7fa5b62016-11-18 11:13:16 -0800145 platforms = [host['platform'] for host in hosts]
146 current_inventory = {p : platforms.count(p) for p in platforms}
Shuqian Zhao1f311c02016-09-01 19:30:54 -0700147 error_msg = ''
Shuqian Zhaoa7fa5b62016-11-18 11:13:16 -0800148 for platform, req_num in required_num_duts.items():
149 curr_num = current_inventory.get(platform, 0)
Shuqian Zhao1f311c02016-09-01 19:30:54 -0700150 if curr_num < req_num:
151 error_msg += ('\nRequire %d %s DUTs, only %d are Ready now' %
Shuqian Zhaoa7fa5b62016-11-18 11:13:16 -0800152 (req_num, platform, curr_num))
Shuqian Zhao1f311c02016-09-01 19:30:54 -0700153 if error_msg:
154 raise TestPushException('Not enough DUTs to run push tests. %s' %
155 error_msg)
156
157
Shuqian Zhao327b6952016-09-12 10:42:03 -0700158def powerwash_dut_to_test_repair(hostname, timeout):
159 """Powerwash dut to test repair workflow.
Kevin Cheng6e4c2642015-12-11 09:45:57 -0800160
161 @param hostname: hostname of the dut.
Shuqian Zhao327b6952016-09-12 10:42:03 -0700162 @param timeout: seconds of the powerwash test to hit timeout.
163 @raise TestPushException: if DUT fail to run the test.
Kevin Cheng6e4c2642015-12-11 09:45:57 -0800164 """
Shuqian Zhao327b6952016-09-12 10:42:03 -0700165 t = models.Test.objects.get(name='platform_Powerwash')
166 c = utils.read_file(os.path.join(common.autotest_dir, t.path))
167 job_id = rpc_utils.create_job_common(
168 'powerwash', priority=priorities.Priority.SUPER,
169 control_type='Server', control_file=c, hosts=[hostname])
170
Shuqian Zhaoe83a78c2016-09-16 15:01:25 -0700171 end = time.time() + timeout
Shuqian Zhao327b6952016-09-12 10:42:03 -0700172 while not TKO.get_job_test_statuses_from_db(job_id):
Shuqian Zhaoe83a78c2016-09-16 15:01:25 -0700173 if time.time() >= end:
174 AFE.run('abort_host_queue_entries', job=job_id)
Shuqian Zhao327b6952016-09-12 10:42:03 -0700175 raise TestPushException(
Shuqian Zhaoe83a78c2016-09-16 15:01:25 -0700176 'Powerwash test on %s timeout after %ds, abort it.' %
177 (hostname, timeout))
Shuqian Zhao327b6952016-09-12 10:42:03 -0700178 time.sleep(10)
179 verify_test_results(job_id, EXPECTED_TEST_RESULTS_POWERWASH)
180 # Kick off verify, verify will fail and a repair should be triggered.
181 AFE.reverify_hosts(hostnames=[hostname])
Kevin Cheng6e4c2642015-12-11 09:45:57 -0800182
183
Shuqian Zhaod2a99f02016-09-22 13:31:30 -0700184def reverify_all_push_duts(pool):
185 """Reverify all the push DUTs.
186
187 @param pool: Name of the pool used by test_push.
188 """
189 pool_label = constants.Labels.POOL_PREFIX + pool
190 hosts = [h.hostname for h in AFE.get_hosts(label=pool_label)]
191 AFE.reverify_hosts(hostnames=hosts)
192
193
Shuqian Zhao12861662016-08-31 19:23:17 -0700194def get_default_build(board='gandof'):
Dan Shi5ba5d2e2014-05-09 13:47:00 -0700195 """Get the default build to be used for test.
196
Dan Shi8df9c002016-03-08 15:37:39 -0800197 @param board: Name of board to be tested, default is gandof.
198 @return: Build to be tested, e.g., gandof-release/R36-5881.0.0
Dan Shi5ba5d2e2014-05-09 13:47:00 -0700199 """
Dan Shi5ba5d2e2014-05-09 13:47:00 -0700200 build = None
Shuqian Zhao12861662016-08-31 19:23:17 -0700201 cmd = ('%s/cli/atest stable_version list --board=%s -w cautotest' %
202 (AUTOTEST_DIR, board))
203 result = subprocess.check_output(cmd, shell=True).strip()
204 build = re.search(BUILD_REGEX, result)
205 if build:
206 return '%s-release/%s' % (board, build.group(0))
Dan Shi5ba5d2e2014-05-09 13:47:00 -0700207
Shuqian Zhao12861662016-08-31 19:23:17 -0700208 # If fail to get stable version from cautotest, use that defined in config
Dan Shi5ba5d2e2014-05-09 13:47:00 -0700209 build = CONFIG.get_config_value('CROS', 'stable_cros_version')
210 return '%s-release/%s' % (board, build)
211
Dan Shi7e04fa82013-07-25 15:08:48 -0700212def parse_arguments():
213 """Parse arguments for test_push tool.
214
215 @return: Parsed arguments.
216
217 """
218 parser = argparse.ArgumentParser()
Dan Shi8df9c002016-03-08 15:37:39 -0800219 parser.add_argument('-b', '--board', dest='board', default='gandof',
220 help='Default is gandof.')
Jakob Juelich8f143912014-10-10 14:08:05 -0700221 parser.add_argument('-sb', '--shard_board', dest='shard_board',
222 default='quawks',
223 help='Default is quawks.')
Dan Shi7e04fa82013-07-25 15:08:48 -0700224 parser.add_argument('-i', '--build', dest='build', default=None,
Shuqian Zhaof3a114c2016-09-21 11:02:15 -0700225 help='Default is the latest stale build of given '
226 'board. Must be a stable build, otherwise AU test '
227 'will fail. (ex: gandolf-release/R54-8743.25.0)')
Jakob Juelich8f143912014-10-10 14:08:05 -0700228 parser.add_argument('-si', '--shard_build', dest='shard_build', default=None,
Shuqian Zhaof3a114c2016-09-21 11:02:15 -0700229 help='Default is the latest stable build of given '
230 'board. Must be a stable build, otherwise AU test '
Jakob Juelich8f143912014-10-10 14:08:05 -0700231 'will fail.')
Dan Shi81ddc422016-09-09 13:58:31 -0700232 parser.add_argument('-ab', '--android_board', dest='android_board',
Shuqian Zhao8ac22e82016-09-22 14:26:18 -0700233 default='shamu-2', help='Android board to test.')
Dan Shi81ddc422016-09-09 13:58:31 -0700234 parser.add_argument('-ai', '--android_build', dest='android_build',
235 help='Android build to test.')
Dan Shi7e04fa82013-07-25 15:08:48 -0700236 parser.add_argument('-p', '--pool', dest='pool', default='bvt')
237 parser.add_argument('-u', '--num', dest='num', type=int, default=3,
238 help='Run on at most NUM machines.')
Shuqian Zhao8ac22e82016-09-22 14:26:18 -0700239 parser.add_argument('-e', '--email', dest='email', default=DEFAULT_EMAIL,
Dan Shi7e04fa82013-07-25 15:08:48 -0700240 help='Email address for the notification to be sent to '
241 'after the script finished running.')
Shuqian Zhaod4864772015-08-06 09:46:22 -0700242 parser.add_argument('-t', '--timeout_min', dest='timeout_min', type=int,
xixuan2d668582016-06-10 14:02:32 -0700243 default=DEFAULT_TIMEOUT_MIN_FOR_SUITE_JOB,
Shuqian Zhaod4864772015-08-06 09:46:22 -0700244 help='Time in mins to wait before abort the jobs we '
245 'are waiting on. Only for the asynchronous suites '
246 'triggered by create_and_return flag.')
Shuqian Zhao1f311c02016-09-01 19:30:54 -0700247 parser.add_argument('-ud', '--num_duts', dest='num_duts',
248 default=DEFAULT_NUM_DUTS,
249 help="String of dict that indicates the required number"
250 " of DUTs for each board. E.g {'gandof':4}")
Shuqian Zhao676ed6f2016-09-21 14:20:50 -0700251 parser.add_argument('-c', '--continue_on_failure', action='store_true',
252 dest='continue_on_failure',
253 help='All tests continue to run when there is failure')
Dan Shi7e04fa82013-07-25 15:08:48 -0700254
255 arguments = parser.parse_args(sys.argv[1:])
256
Shuqian Zhaof3a114c2016-09-21 11:02:15 -0700257 # Get latest stable build as default build.
Dan Shi7e04fa82013-07-25 15:08:48 -0700258 if not arguments.build:
Shuqian Zhao12861662016-08-31 19:23:17 -0700259 arguments.build = get_default_build(arguments.board)
Jakob Juelich8f143912014-10-10 14:08:05 -0700260 if not arguments.shard_build:
Shuqian Zhao12861662016-08-31 19:23:17 -0700261 arguments.shard_build = get_default_build(arguments.shard_board)
Dan Shi7e04fa82013-07-25 15:08:48 -0700262
Shuqian Zhao1f311c02016-09-01 19:30:54 -0700263 arguments.num_duts = ast.literal_eval(arguments.num_duts)
264
Dan Shi7e04fa82013-07-25 15:08:48 -0700265 return arguments
266
267
Shuqian Zhaod4864772015-08-06 09:46:22 -0700268def do_run_suite(suite_name, arguments, use_shard=False,
Dan Shi81ddc422016-09-09 13:58:31 -0700269 create_and_return=False, testbed_test=False):
Dan Shi7e04fa82013-07-25 15:08:48 -0700270 """Call run_suite to run a suite job, and return the suite job id.
271
272 The script waits the suite job to finish before returning the suite job id.
273 Also it will echo the run_suite output to stdout.
274
275 @param suite_name: Name of a suite, e.g., dummy.
276 @param arguments: Arguments for run_suite command.
Jakob Juelich8f143912014-10-10 14:08:05 -0700277 @param use_shard: If true, suite is scheduled for shard board.
Shuqian Zhaod4864772015-08-06 09:46:22 -0700278 @param create_and_return: If True, run_suite just creates the suite, print
279 the job id, then finish immediately.
Dan Shi81ddc422016-09-09 13:58:31 -0700280 @param testbed_test: True to run testbed test. Default is False.
Jakob Juelich8f143912014-10-10 14:08:05 -0700281
Dan Shi7e04fa82013-07-25 15:08:48 -0700282 @return: Suite job ID.
283
284 """
Dan Shi81ddc422016-09-09 13:58:31 -0700285 if use_shard and not testbed_test:
Jakob Juelich8f143912014-10-10 14:08:05 -0700286 board = arguments.shard_board
287 build = arguments.shard_build
Dan Shi81ddc422016-09-09 13:58:31 -0700288 elif testbed_test:
289 board = arguments.android_board
290 build = arguments.android_build
291 else:
292 board = arguments.board
293 build = arguments.build
Jakob Juelich8f143912014-10-10 14:08:05 -0700294
Dan Shi47d32882014-12-22 16:25:05 -0800295 # Remove cros-version label to force provision.
Shuqian Zhao7a49f1b2016-10-24 16:48:04 -0700296 hosts = AFE.get_hosts(label=constants.Labels.BOARD_PREFIX+board,
297 locked=False)
Dan Shi47d32882014-12-22 16:25:05 -0800298 for host in hosts:
Dan Shi81ddc422016-09-09 13:58:31 -0700299 labels_to_remove = [
300 l for l in host.labels
301 if (l.startswith(provision.CROS_VERSION_PREFIX) or
302 l.startswith(provision.TESTBED_BUILD_VERSION_PREFIX))]
303 if labels_to_remove:
304 AFE.run('host_remove_labels', id=host.id, labels=labels_to_remove)
Dan Shi47d32882014-12-22 16:25:05 -0800305
Shuqian Zhaod01fad02016-11-18 10:00:22 -0800306 # Test repair work flow on shards, powerwash test will timeout after 7m.
Kevin Cheng6e4c2642015-12-11 09:45:57 -0800307 if use_shard and not create_and_return:
Shuqian Zhaod01fad02016-11-18 10:00:22 -0800308 powerwash_dut_to_test_repair(host.hostname, timeout=420)
Kevin Cheng6e4c2642015-12-11 09:45:57 -0800309
Dan Shief1a5c02015-04-07 17:37:09 -0700310 current_dir = os.path.dirname(os.path.realpath(__file__))
311 cmd = [os.path.join(current_dir, RUN_SUITE_COMMAND),
Dan Shi7e04fa82013-07-25 15:08:48 -0700312 '-s', suite_name,
Jakob Juelich8f143912014-10-10 14:08:05 -0700313 '-b', board,
314 '-i', build,
Dan Shi7e04fa82013-07-25 15:08:48 -0700315 '-p', arguments.pool,
Shuqian Zhao178ac012016-06-03 15:08:52 -0700316 '-u', str(arguments.num)]
Shuqian Zhaod4864772015-08-06 09:46:22 -0700317 if create_and_return:
318 cmd += ['-c']
Dan Shi81ddc422016-09-09 13:58:31 -0700319 if testbed_test:
320 cmd += ['--run_prod_code']
Dan Shi7e04fa82013-07-25 15:08:48 -0700321
322 suite_job_id = None
Dan Shi7e04fa82013-07-25 15:08:48 -0700323
324 proc = subprocess.Popen(cmd, stdout=subprocess.PIPE,
325 stderr=subprocess.STDOUT)
326
327 while True:
328 line = proc.stdout.readline()
329
330 # Break when run_suite process completed.
331 if not line and proc.poll() != None:
332 break
333 print line.rstrip()
334 run_suite_output.append(line.rstrip())
335
336 if not suite_job_id:
337 m = re.match(SUITE_JOB_START_INFO_REGEX, line)
338 if m and m.group(1):
339 suite_job_id = int(m.group(1))
Shuqian Zhao1b4ca272016-09-18 14:58:19 -0700340 all_suite_ids.append(suite_job_id)
Dan Shi7e04fa82013-07-25 15:08:48 -0700341
342 if not suite_job_id:
343 raise TestPushException('Failed to retrieve suite job ID.')
Dan Shia8da7602014-05-09 15:18:15 -0700344
Shuqian Zhaod4864772015-08-06 09:46:22 -0700345 # If create_and_return specified, wait for the suite to finish.
346 if create_and_return:
347 end = time.time() + arguments.timeout_min * 60
Dan Shiefd403e2016-02-03 11:37:02 -0800348 while not AFE.get_jobs(id=suite_job_id, finished=True):
Shuqian Zhaod4864772015-08-06 09:46:22 -0700349 if time.time() < end:
350 time.sleep(10)
351 else:
Dan Shiefd403e2016-02-03 11:37:02 -0800352 AFE.run('abort_host_queue_entries', job=suite_job_id)
Shuqian Zhaod4864772015-08-06 09:46:22 -0700353 raise TestPushException(
354 'Asynchronous suite triggered by create_and_return '
355 'flag has timed out after %d mins. Aborting it.' %
356 arguments.timeout_min)
357
Dan Shia8da7602014-05-09 15:18:15 -0700358 print 'Suite job %s is completed.' % suite_job_id
Dan Shi7e04fa82013-07-25 15:08:48 -0700359 return suite_job_id
360
361
Dan Shia8da7602014-05-09 15:18:15 -0700362def check_dut_image(build, suite_job_id):
363 """Confirm all DUTs used for the suite are imaged to expected build.
364
365 @param build: Expected build to be imaged.
366 @param suite_job_id: job ID of the suite job.
367 @raise TestPushException: If a DUT does not have expected build imaged.
368 """
369 print 'Checking image installed in DUTs...'
370 job_ids = [job.id for job in
371 models.Job.objects.filter(parent_job_id=suite_job_id)]
372 hqes = [models.HostQueueEntry.objects.filter(job_id=job_id)[0]
373 for job_id in job_ids]
374 hostnames = set([hqe.host.hostname for hqe in hqes])
375 for hostname in hostnames:
Dan Shiefd403e2016-02-03 11:37:02 -0800376 found_build = site_utils.get_build_from_afe(hostname, AFE)
Dan Shia8da7602014-05-09 15:18:15 -0700377 if found_build != build:
378 raise TestPushException('DUT is not imaged properly. Host %s has '
379 'build %s, while build %s is expected.' %
380 (hostname, found_build, build))
381
382
Shuqian Zhaod4864772015-08-06 09:46:22 -0700383def test_suite(suite_name, expected_results, arguments, use_shard=False,
Dan Shi81ddc422016-09-09 13:58:31 -0700384 create_and_return=False, testbed_test=False):
Dan Shi7e04fa82013-07-25 15:08:48 -0700385 """Call run_suite to start a suite job and verify results.
386
387 @param suite_name: Name of a suite, e.g., dummy
388 @param expected_results: A dictionary of test name to test result.
389 @param arguments: Arguments for run_suite command.
Jakob Juelich8f143912014-10-10 14:08:05 -0700390 @param use_shard: If true, suite is scheduled for shard board.
Shuqian Zhaod4864772015-08-06 09:46:22 -0700391 @param create_and_return: If True, run_suite just creates the suite, print
392 the job id, then finish immediately.
Dan Shi81ddc422016-09-09 13:58:31 -0700393 @param testbed_test: True to run testbed test. Default is False.
Dan Shi7e04fa82013-07-25 15:08:48 -0700394 """
Shuqian Zhaod4864772015-08-06 09:46:22 -0700395 suite_job_id = do_run_suite(suite_name, arguments, use_shard,
Dan Shi81ddc422016-09-09 13:58:31 -0700396 create_and_return, testbed_test)
Dan Shi7e04fa82013-07-25 15:08:48 -0700397
Dan Shia8da7602014-05-09 15:18:15 -0700398 # Confirm all DUTs used for the suite are imaged to expected build.
Jakob Juelich8f143912014-10-10 14:08:05 -0700399 # hqe.host_id for jobs running in shard is not synced back to master db,
400 # therefore, skip verifying dut build for jobs running in shard.
Dan Shi81ddc422016-09-09 13:58:31 -0700401 build_expected = (arguments.android_build if testbed_test
402 else arguments.build)
403 if suite_name != AU_SUITE and not use_shard and not testbed_test:
404 check_dut_image(build_expected, suite_job_id)
Dan Shia8da7602014-05-09 15:18:15 -0700405
Shuqian Zhao327b6952016-09-12 10:42:03 -0700406 # Verify test results are the expected results.
407 verify_test_results(suite_job_id, expected_results)
408
409
410def verify_test_results(job_id, expected_results):
411 """Verify the test results with the expected results.
412
413 @param job_id: id of the running jobs. For suite job, it is suite_job_id.
414 @param expected_results: A dictionary of test name to test result.
415 @raise TestPushException: If verify fails.
416 """
Dan Shia8da7602014-05-09 15:18:15 -0700417 print 'Comparing test results...'
Shuqian Zhao327b6952016-09-12 10:42:03 -0700418 test_views = site_utils.get_test_views_from_tko(job_id, TKO)
Dan Shi7e04fa82013-07-25 15:08:48 -0700419
420 mismatch_errors = []
421 extra_test_errors = []
422
423 found_keys = set()
Shuqian Zhao327b6952016-09-12 10:42:03 -0700424 for test_name, test_status in test_views.items():
Dan Shi7e04fa82013-07-25 15:08:48 -0700425 print "%s%s" % (test_name.ljust(30), test_status)
Dan Shi80b6ec02016-07-21 15:49:18 -0700426 # platform_InstallTestImage test may exist in old builds.
427 if re.search('platform_InstallTestImage_SERVER_JOB$', test_name):
428 continue
Dan Shi7e04fa82013-07-25 15:08:48 -0700429 test_found = False
430 for key,val in expected_results.items():
431 if re.search(key, test_name):
432 test_found = True
433 found_keys.add(key)
Dan Shi7e04fa82013-07-25 15:08:48 -0700434 if val != test_status:
435 error = ('%s Expected: [%s], Actual: [%s]' %
436 (test_name, val, test_status))
437 mismatch_errors.append(error)
438 if not test_found:
439 extra_test_errors.append(test_name)
440
441 missing_test_errors = set(expected_results.keys()) - found_keys
Dan Shidc9eb172014-12-09 16:05:02 -0800442 for exception in IGNORE_MISSING_TESTS:
443 try:
444 missing_test_errors.remove(exception)
445 except KeyError:
446 pass
447
Dan Shi7e04fa82013-07-25 15:08:48 -0700448 summary = []
449 if mismatch_errors:
450 summary.append(('Results of %d test(s) do not match expected '
451 'values:') % len(mismatch_errors))
452 summary.extend(mismatch_errors)
453 summary.append('\n')
454
455 if extra_test_errors:
456 summary.append('%d test(s) are not expected to be run:' %
457 len(extra_test_errors))
458 summary.extend(extra_test_errors)
459 summary.append('\n')
460
461 if missing_test_errors:
462 summary.append('%d test(s) are missing from the results:' %
463 len(missing_test_errors))
464 summary.extend(missing_test_errors)
465 summary.append('\n')
466
467 # Test link to log can be loaded.
Shuqian Zhao327b6952016-09-12 10:42:03 -0700468 job_name = '%s-%s' % (job_id, getpass.getuser())
Dan Shi7e04fa82013-07-25 15:08:48 -0700469 log_link = URL_PATTERN % (URL_HOST, job_name)
470 try:
471 urllib2.urlopen(log_link).read()
472 except urllib2.URLError:
473 summary.append('Failed to load page for link to log: %s.' % log_link)
474
475 if summary:
476 raise TestPushException('\n'.join(summary))
477
478
Dan Shief1a5c02015-04-07 17:37:09 -0700479def test_suite_wrapper(queue, suite_name, expected_results, arguments,
Dan Shi81ddc422016-09-09 13:58:31 -0700480 use_shard=False, create_and_return=False,
481 testbed_test=False):
Dan Shief1a5c02015-04-07 17:37:09 -0700482 """Wrapper to call test_suite. Handle exception and pipe it to parent
483 process.
484
485 @param queue: Queue to save exception to be accessed by parent process.
486 @param suite_name: Name of a suite, e.g., dummy
487 @param expected_results: A dictionary of test name to test result.
488 @param arguments: Arguments for run_suite command.
489 @param use_shard: If true, suite is scheduled for shard board.
Shuqian Zhaod4864772015-08-06 09:46:22 -0700490 @param create_and_return: If True, run_suite just creates the suite, print
491 the job id, then finish immediately.
Dan Shi81ddc422016-09-09 13:58:31 -0700492 @param testbed_test: True to run testbed test. Default is False.
Dan Shief1a5c02015-04-07 17:37:09 -0700493 """
494 try:
Shuqian Zhaod4864772015-08-06 09:46:22 -0700495 test_suite(suite_name, expected_results, arguments, use_shard,
Dan Shi81ddc422016-09-09 13:58:31 -0700496 create_and_return, testbed_test)
Dan Shief1a5c02015-04-07 17:37:09 -0700497 except:
498 # Store the whole exc_info leads to a PicklingError.
499 except_type, except_value, tb = sys.exc_info()
500 queue.put((except_type, except_value, traceback.extract_tb(tb)))
501
502
Dan Shief1a5c02015-04-07 17:37:09 -0700503def check_queue(queue):
504 """Check the queue for any exception being raised.
505
506 @param queue: Queue used to store exception for parent process to access.
507 @raise: Any exception found in the queue.
508 """
509 if queue.empty():
510 return
511 exc_info = queue.get()
512 # Raise the exception with original backtrace.
513 print 'Original stack trace of the exception:\n%s' % exc_info[2]
514 raise exc_info[0](exc_info[1])
515
516
Shuqian Zhao7b2daea2016-10-25 13:31:06 -0700517def get_head_of_repos(repos):
518 """Get HEAD of updated repos, currently are autotest and chromite repos
519
520 @param repos: a map of repo name to the path of the repo. E.g.
521 {'autotest': '/usr/local/autotest'}
522 @return: a map of repo names to the current HEAD of that repo.
523 """
524 @contextmanager
525 def cd(new_wd):
526 """Helper function to change working directory.
527
528 @param new_wd: new working directory that switch to.
529 """
530 prev_wd = os.getcwd()
531 os.chdir(os.path.expanduser(new_wd))
532 try:
533 yield
534 finally:
535 os.chdir(prev_wd)
536
537 updated_repo_heads = {}
538 for repo_name, path_to_repo in repos.iteritems():
539 with cd(path_to_repo):
540 head = subprocess.check_output('git rev-parse HEAD',
541 shell=True).strip()
542 updated_repo_heads[repo_name] = head
543 return updated_repo_heads
544
545
Shuqian Zhao80d32712016-11-11 16:37:36 -0800546def push_prod_next_branch(updated_repo_heads):
547 """push prod-next branch to the tested HEAD after all tests pass.
548
549 The push command must be ran as PUSH_USER, since only PUSH_USER has the
550 right to push branches.
551
552 @param updated_repo_heads: a map of repo names to tested HEAD of that repo.
553 """
554 # prod-next branch for every repo is downloaded under PUSH_USER home dir.
555 cmd = 'cd ~/{repo}; git rebase {hash} prod-next; git push origin prod-next'
556 run_push_as_push_user = "sudo su - %s -c '%s'" % (PUSH_USER, cmd)
557
558 for repo_name, test_hash in updated_repo_heads.iteritems():
559 push_cmd = run_push_as_push_user.format(hash=test_hash, repo=repo_name)
560 print 'Pushing %s prod-next branch to %s' % (repo_name, test_hash)
561 print subprocess.check_output(push_cmd, stderr=subprocess.STDOUT,
562 shell=True)
563
564
Dan Shi7e04fa82013-07-25 15:08:48 -0700565def main():
566 """Entry point for test_push script."""
567 arguments = parse_arguments()
Shuqian Zhao80d32712016-11-11 16:37:36 -0800568 updated_repo_heads = get_head_of_repos(UPDATED_REPOS)
569 updated_repo_msg = '\n'.join(
570 ['%s: %s' % (k, v) for k, v in updated_repo_heads.iteritems()])
Dan Shi7e04fa82013-07-25 15:08:48 -0700571
572 try:
Shuqian Zhao676ed6f2016-09-21 14:20:50 -0700573 # Use daemon flag will kill child processes when parent process fails.
574 use_daemon = not arguments.continue_on_failure
Shuqian Zhao1f311c02016-09-01 19:30:54 -0700575 check_dut_inventory(arguments.num_duts)
Dan Shief1a5c02015-04-07 17:37:09 -0700576 queue = multiprocessing.Queue()
577
578 push_to_prod_suite = multiprocessing.Process(
579 target=test_suite_wrapper,
580 args=(queue, PUSH_TO_PROD_SUITE, EXPECTED_TEST_RESULTS,
581 arguments))
Shuqian Zhao676ed6f2016-09-21 14:20:50 -0700582 push_to_prod_suite.daemon = use_daemon
Dan Shief1a5c02015-04-07 17:37:09 -0700583 push_to_prod_suite.start()
Jakob Juelich8f143912014-10-10 14:08:05 -0700584
Dan Shi7e04fa82013-07-25 15:08:48 -0700585 # TODO(dshi): Remove following line after crbug.com/267644 is fixed.
586 # Also, merge EXPECTED_TEST_RESULTS_AU to EXPECTED_TEST_RESULTS
Shuqian Zhao7de04282016-09-23 09:14:13 -0700587 # AU suite will be on shard until crbug.com/634049 is fixed.
Dan Shief1a5c02015-04-07 17:37:09 -0700588 au_suite = multiprocessing.Process(
589 target=test_suite_wrapper,
590 args=(queue, AU_SUITE, EXPECTED_TEST_RESULTS_AU,
Shuqian Zhao7de04282016-09-23 09:14:13 -0700591 arguments, True))
Shuqian Zhao676ed6f2016-09-21 14:20:50 -0700592 au_suite.daemon = use_daemon
Dan Shief1a5c02015-04-07 17:37:09 -0700593 au_suite.start()
594
Shuqian Zhaod4864772015-08-06 09:46:22 -0700595 # suite test with --create_and_return flag
596 asynchronous_suite = multiprocessing.Process(
597 target=test_suite_wrapper,
598 args=(queue, DUMMY_SUITE, EXPECTED_TEST_RESULTS_DUMMY,
Shuqian Zhao7de04282016-09-23 09:14:13 -0700599 arguments, False, True))
Shuqian Zhao1b4ca272016-09-18 14:58:19 -0700600 asynchronous_suite.daemon = True
Shuqian Zhaod4864772015-08-06 09:46:22 -0700601 asynchronous_suite.start()
602
Dan Shi81ddc422016-09-09 13:58:31 -0700603 # Test suite for testbed
604 testbed_suite = multiprocessing.Process(
605 target=test_suite_wrapper,
606 args=(queue, TESTBED_SUITE, EXPECTED_TEST_RESULTS_TESTBED,
607 arguments, False, False, True))
Shuqian Zhao676ed6f2016-09-21 14:20:50 -0700608 testbed_suite.daemon = use_daemon
Dan Shi81ddc422016-09-09 13:58:31 -0700609 testbed_suite.start()
610
Dan Shief1a5c02015-04-07 17:37:09 -0700611 while (push_to_prod_suite.is_alive() or au_suite.is_alive() or
Shuqian Zhao7de04282016-09-23 09:14:13 -0700612 asynchronous_suite.is_alive() or testbed_suite.is_alive()):
Dan Shief1a5c02015-04-07 17:37:09 -0700613 check_queue(queue)
Dan Shief1a5c02015-04-07 17:37:09 -0700614 time.sleep(5)
615
616 check_queue(queue)
617
618 push_to_prod_suite.join()
619 au_suite.join()
Shuqian Zhaod4864772015-08-06 09:46:22 -0700620 asynchronous_suite.join()
Dan Shi81ddc422016-09-09 13:58:31 -0700621 testbed_suite.join()
Shuqian Zhao80d32712016-11-11 16:37:36 -0800622
623 # All tests pass, push prod-next branch for UPDATED_REPOS.
Shuqian Zhaod57ec7d2016-11-18 12:43:54 -0800624 #push_prod_next_branch(updated_repo_heads)
Dan Shi7e04fa82013-07-25 15:08:48 -0700625 except Exception as e:
626 print 'Test for pushing to prod failed:\n'
627 print str(e)
Shuqian Zhao676ed6f2016-09-21 14:20:50 -0700628 # Abort running jobs when choose not to continue when there is failure.
629 if not arguments.continue_on_failure:
630 for suite_id in all_suite_ids:
631 if AFE.get_jobs(id=suite_id, finished=False):
632 AFE.run('abort_host_queue_entries', job=suite_id)
Dan Shi7e04fa82013-07-25 15:08:48 -0700633 # Send out email about the test failure.
634 if arguments.email:
Dan Shi5fa602c2015-03-26 17:54:13 -0700635 gmail_lib.send_email(
636 arguments.email,
637 'Test for pushing to prod failed. Do NOT push!',
Shuqian Zhao7b2daea2016-10-25 13:31:06 -0700638 ('Test CLs of the following repos failed. Below are the '
639 'repos and the corresponding test HEAD.\n\n%s\n\n.'
640 'Error occurred during test:\n\n%s\n\n' %
641 (updated_repo_msg, str(e)) + '\n'.join(run_suite_output)))
Dan Shi7e04fa82013-07-25 15:08:48 -0700642 raise
Shuqian Zhaod2a99f02016-09-22 13:31:30 -0700643 finally:
644 # Reverify all the hosts
645 reverify_all_push_duts(arguments.pool)
Dan Shi7e04fa82013-07-25 15:08:48 -0700646
Shuqian Zhao7b2daea2016-10-25 13:31:06 -0700647 message = ('\nAll tests are completed successfully, the prod branch of the '
648 'following repos ready to be pushed to the hash list below.\n'
649 '%s' % updated_repo_msg)
Dan Shi7e04fa82013-07-25 15:08:48 -0700650 print message
651 # Send out email about test completed successfully.
652 if arguments.email:
Dan Shi5fa602c2015-03-26 17:54:13 -0700653 gmail_lib.send_email(
654 arguments.email,
655 'Test for pushing to prod completed successfully',
656 message)
Dan Shi7e04fa82013-07-25 15:08:48 -0700657
658
659if __name__ == '__main__':
660 sys.exit(main())