blob: f465e1cecbb75b3112d2fb2362ec8ac8e5508d7a [file] [log] [blame]
Dan Shi7e04fa82013-07-25 15:08:48 -07001#!/usr/bin/python
2#
3# Copyright (c) 2013 The Chromium OS Authors. All rights reserved.
4# Use of this source code is governed by a BSD-style license that can be
5# found in the LICENSE file.
6
7"""Tool to validate code in prod branch before pushing to lab.
8
9The script runs push_to_prod suite to verify code in prod branch is ready to be
10pushed. Link to design document:
11https://docs.google.com/a/google.com/document/d/1JMz0xS3fZRSHMpFkkKAL_rxsdbNZomhHbC3B8L71uuI/edit
12
13To verify if prod branch can be pushed to lab, run following command in
14chromeos-autotest.cbf server:
Michael Liang52d9f1f2014-06-17 15:01:24 -070015/usr/local/autotest/site_utils/test_push.py -e someone@company.com
Dan Shi7e04fa82013-07-25 15:08:48 -070016
Shuqian Zhaof3a114c2016-09-21 11:02:15 -070017The script uses latest gandof stable build as test build by default.
Dan Shi7e04fa82013-07-25 15:08:48 -070018
19"""
20
21import argparse
Shuqian Zhao1f311c02016-09-01 19:30:54 -070022import ast
Shuqian Zhao7b2daea2016-10-25 13:31:06 -070023from contextlib import contextmanager
Dan Shi7e04fa82013-07-25 15:08:48 -070024import getpass
Dan Shief1a5c02015-04-07 17:37:09 -070025import multiprocessing
Dan Shi7e04fa82013-07-25 15:08:48 -070026import os
27import re
28import subprocess
29import sys
Dan Shief1a5c02015-04-07 17:37:09 -070030import time
31import traceback
Dan Shi7e04fa82013-07-25 15:08:48 -070032import urllib2
33
34import common
Dan Shia8da7602014-05-09 15:18:15 -070035try:
36 from autotest_lib.frontend import setup_django_environment
37 from autotest_lib.frontend.afe import models
Shuqian Zhao327b6952016-09-12 10:42:03 -070038 from autotest_lib.frontend.afe import rpc_utils
Dan Shia8da7602014-05-09 15:18:15 -070039except ImportError:
40 # Unittest may not have Django database configured and will fail to import.
41 pass
Dan Shi5fa602c2015-03-26 17:54:13 -070042from autotest_lib.client.common_lib import global_config
Shuqian Zhao327b6952016-09-12 10:42:03 -070043from autotest_lib.client.common_lib import priorities
Shuqian Zhao6fc7bf42016-12-11 19:10:36 -080044from autotest_lib.client.common_lib.cros import retry
Dan Shi7e04fa82013-07-25 15:08:48 -070045from autotest_lib.server import site_utils
Shuqian Zhao327b6952016-09-12 10:42:03 -070046from autotest_lib.server import utils
Dan Shi47d32882014-12-22 16:25:05 -080047from autotest_lib.server.cros import provision
Dan Shi7e04fa82013-07-25 15:08:48 -070048from autotest_lib.server.cros.dynamic_suite import frontend_wrappers
Dan Shi5fa602c2015-03-26 17:54:13 -070049from autotest_lib.site_utils import gmail_lib
Dan Shi47d32882014-12-22 16:25:05 -080050from autotest_lib.site_utils.suite_scheduler import constants
Dan Shi7e04fa82013-07-25 15:08:48 -070051
Shuqian Zhao56969542017-05-30 12:56:57 -070052try:
53 from chromite.lib import metrics
54 from chromite.lib import ts_mon_config
55except ImportError:
56 metrics = site_utils.metrics_mock
57 ts_mon_config = site_utils.metrics_mock
58
Shuqian Zhao7b2daea2016-10-25 13:31:06 -070059AUTOTEST_DIR=common.autotest_dir
Dan Shi7e04fa82013-07-25 15:08:48 -070060CONFIG = global_config.global_config
61
Dan Shiefd403e2016-02-03 11:37:02 -080062AFE = frontend_wrappers.RetryingAFE(timeout_min=0.5, delay_sec=2)
Shuqian Zhao327b6952016-09-12 10:42:03 -070063TKO = frontend_wrappers.RetryingTKO(timeout_min=0.1, delay_sec=10)
Dan Shiefd403e2016-02-03 11:37:02 -080064
Dan Shi7e04fa82013-07-25 15:08:48 -070065MAIL_FROM = 'chromeos-test@google.com'
Shuqian Zhao12861662016-08-31 19:23:17 -070066BUILD_REGEX = 'R[\d]+-[\d]+\.[\d]+\.[\d]+'
Dan Shi7e04fa82013-07-25 15:08:48 -070067RUN_SUITE_COMMAND = 'run_suite.py'
68PUSH_TO_PROD_SUITE = 'push_to_prod'
Jakob Juelich8f143912014-10-10 14:08:05 -070069DUMMY_SUITE = 'dummy'
Shuqian Zhao8ac22e82016-09-22 14:26:18 -070070# TODO(shuqianz): Dynamically get android build after crbug.com/646068 fixed
xixuan2d668582016-06-10 14:02:32 -070071DEFAULT_TIMEOUT_MIN_FOR_SUITE_JOB = 30
Shuqian Zhao12861662016-08-31 19:23:17 -070072IMAGE_BUCKET = CONFIG.get_config_value('CROS', 'image_storage_server')
Shuqian Zhao8ac22e82016-09-22 14:26:18 -070073DEFAULT_EMAIL = CONFIG.get_config_value(
xixuan9307e622017-02-03 20:01:01 -080074 'SCHEDULER', 'notify_email', type=list, default=[])
Prathmesh Prabhu5e288512017-08-14 10:47:29 -070075# TODO(crbug.com/743077): Bump up tesbed requirement back to 1 when we re-enable
76# testbed tests.
77DEFAULT_NUM_DUTS = "{'gandof': 4, 'quawks': 2, 'testbed': 0}"
Dan Shi7e04fa82013-07-25 15:08:48 -070078
Fang Deng6dddf602014-04-17 17:01:47 -070079SUITE_JOB_START_INFO_REGEX = ('^.*Created suite job:.*'
80 'tab_id=view_job&object_id=(\d+)$')
Dan Shi7e04fa82013-07-25 15:08:48 -070081
82# Dictionary of test results keyed by test name regular expression.
83EXPECTED_TEST_RESULTS = {'^SERVER_JOB$': 'GOOD',
84 # This is related to dummy_Fail/control.dependency.
85 'dummy_Fail.dependency$': 'TEST_NA',
Dan Shidc9eb172014-12-09 16:05:02 -080086 'login_LoginSuccess.*': 'GOOD',
Dan Shi47d32882014-12-22 16:25:05 -080087 'provision_AutoUpdate.double': 'GOOD',
Dan Shi7e04fa82013-07-25 15:08:48 -070088 'dummy_Pass.*': 'GOOD',
89 'dummy_Fail.Fail$': 'FAIL',
90 'dummy_Fail.RetryFail$': 'FAIL',
91 'dummy_Fail.RetrySuccess': 'GOOD',
92 'dummy_Fail.Error$': 'ERROR',
93 'dummy_Fail.Warn$': 'WARN',
94 'dummy_Fail.NAError$': 'TEST_NA',
95 'dummy_Fail.Crash$': 'GOOD',
96 }
97
Jakob Juelich8f143912014-10-10 14:08:05 -070098EXPECTED_TEST_RESULTS_DUMMY = {'^SERVER_JOB$': 'GOOD',
99 'dummy_Pass.*': 'GOOD',
100 'dummy_Fail.Fail': 'FAIL',
101 'dummy_Fail.Warn': 'WARN',
102 'dummy_Fail.Crash': 'GOOD',
103 'dummy_Fail.Error': 'ERROR',
104 'dummy_Fail.NAError': 'TEST_NA',}
105
Dan Shi81ddc422016-09-09 13:58:31 -0700106EXPECTED_TEST_RESULTS_TESTBED = {'^SERVER_JOB$': 'GOOD',
107 'testbed_DummyTest': 'GOOD',}
108
Shuqian Zhao327b6952016-09-12 10:42:03 -0700109EXPECTED_TEST_RESULTS_POWERWASH = {'platform_Powerwash': 'GOOD',
110 'SERVER_JOB': 'GOOD'}
111
Dan Shi7e04fa82013-07-25 15:08:48 -0700112URL_HOST = CONFIG.get_config_value('SERVER', 'hostname', type=str)
113URL_PATTERN = CONFIG.get_config_value('CROS', 'log_url_pattern', type=str)
114
Dan Shidc9eb172014-12-09 16:05:02 -0800115# Some test could be missing from the test results for various reasons. Add
116# such test in this list and explain the reason.
117IGNORE_MISSING_TESTS = [
118 # For latest build, npo_test_delta does not exist.
119 'autoupdate_EndToEndTest.npo_test_delta.*',
120 # For trybot build, nmo_test_delta does not exist.
121 'autoupdate_EndToEndTest.nmo_test_delta.*',
122 # Older build does not have login_LoginSuccess test in push_to_prod suite.
123 # TODO(dshi): Remove following lines after R41 is stable.
124 'login_LoginSuccess']
125
Dan Shi7e04fa82013-07-25 15:08:48 -0700126# Save all run_suite command output.
Shuqian Zhao7b682192016-09-16 14:38:41 -0700127manager = multiprocessing.Manager()
128run_suite_output = manager.list()
Shuqian Zhao1b4ca272016-09-18 14:58:19 -0700129all_suite_ids = manager.list()
Shuqian Zhao7b2daea2016-10-25 13:31:06 -0700130# A dict maps the name of the updated repos and the path of them.
131UPDATED_REPOS = {'autotest': AUTOTEST_DIR,
132 'chromite': '%s/site-packages/chromite/' % AUTOTEST_DIR}
Shuqian Zhao80d32712016-11-11 16:37:36 -0800133PUSH_USER = 'chromeos-test-lab'
Dan Shi7e04fa82013-07-25 15:08:48 -0700134
135class TestPushException(Exception):
136 """Exception to be raised when the test to push to prod failed."""
137 pass
138
Dan Shi5ba5d2e2014-05-09 13:47:00 -0700139
Shuqian Zhao6fc7bf42016-12-11 19:10:36 -0800140@retry.retry(TestPushException, timeout_min=5, delay_sec=30)
Shuqian Zhaoa6cf66b2017-03-03 12:08:57 -0800141def check_dut_inventory(required_num_duts, pool):
142 """Check DUT inventory for each board in the pool specified..
Shuqian Zhao1f311c02016-09-01 19:30:54 -0700143
Shuqian Zhaoa7fa5b62016-11-18 11:13:16 -0800144 @param required_num_duts: a dict specifying the number of DUT each platform
Shuqian Zhao1f311c02016-09-01 19:30:54 -0700145 requires in order to finish push tests.
Shuqian Zhaoa6cf66b2017-03-03 12:08:57 -0800146 @param pool: the pool used by test_push.
Shuqian Zhao1f311c02016-09-01 19:30:54 -0700147 @raise TestPushException: if number of DUTs are less than the requirement.
148 """
Shuqian Zhao6fc7bf42016-12-11 19:10:36 -0800149 print 'Checking DUT inventory...'
Shuqian Zhaoa6cf66b2017-03-03 12:08:57 -0800150 pool_label = constants.Labels.POOL_PREFIX + pool
Shuqian Zhao1f311c02016-09-01 19:30:54 -0700151 hosts = AFE.run('get_hosts', status='Ready', locked=False)
Shuqian Zhaoa6cf66b2017-03-03 12:08:57 -0800152 hosts = [h for h in hosts if pool_label in h.get('labels', [])]
Shuqian Zhaoa7fa5b62016-11-18 11:13:16 -0800153 platforms = [host['platform'] for host in hosts]
154 current_inventory = {p : platforms.count(p) for p in platforms}
Shuqian Zhao1f311c02016-09-01 19:30:54 -0700155 error_msg = ''
Shuqian Zhaoa7fa5b62016-11-18 11:13:16 -0800156 for platform, req_num in required_num_duts.items():
157 curr_num = current_inventory.get(platform, 0)
Shuqian Zhao1f311c02016-09-01 19:30:54 -0700158 if curr_num < req_num:
Shuqian Zhaoa6cf66b2017-03-03 12:08:57 -0800159 error_msg += ('\nRequire %d %s DUTs in pool: %s, only %d are Ready'
160 ' now' % (req_num, platform, pool, curr_num))
Shuqian Zhao1f311c02016-09-01 19:30:54 -0700161 if error_msg:
162 raise TestPushException('Not enough DUTs to run push tests. %s' %
163 error_msg)
164
165
Shuqian Zhao327b6952016-09-12 10:42:03 -0700166def powerwash_dut_to_test_repair(hostname, timeout):
167 """Powerwash dut to test repair workflow.
Kevin Cheng6e4c2642015-12-11 09:45:57 -0800168
169 @param hostname: hostname of the dut.
Shuqian Zhao327b6952016-09-12 10:42:03 -0700170 @param timeout: seconds of the powerwash test to hit timeout.
171 @raise TestPushException: if DUT fail to run the test.
Kevin Cheng6e4c2642015-12-11 09:45:57 -0800172 """
Shuqian Zhao327b6952016-09-12 10:42:03 -0700173 t = models.Test.objects.get(name='platform_Powerwash')
174 c = utils.read_file(os.path.join(common.autotest_dir, t.path))
175 job_id = rpc_utils.create_job_common(
176 'powerwash', priority=priorities.Priority.SUPER,
177 control_type='Server', control_file=c, hosts=[hostname])
178
Shuqian Zhaoe83a78c2016-09-16 15:01:25 -0700179 end = time.time() + timeout
Shuqian Zhao327b6952016-09-12 10:42:03 -0700180 while not TKO.get_job_test_statuses_from_db(job_id):
Shuqian Zhaoe83a78c2016-09-16 15:01:25 -0700181 if time.time() >= end:
182 AFE.run('abort_host_queue_entries', job=job_id)
Shuqian Zhao327b6952016-09-12 10:42:03 -0700183 raise TestPushException(
Shuqian Zhaoe83a78c2016-09-16 15:01:25 -0700184 'Powerwash test on %s timeout after %ds, abort it.' %
185 (hostname, timeout))
Shuqian Zhao327b6952016-09-12 10:42:03 -0700186 time.sleep(10)
187 verify_test_results(job_id, EXPECTED_TEST_RESULTS_POWERWASH)
188 # Kick off verify, verify will fail and a repair should be triggered.
189 AFE.reverify_hosts(hostnames=[hostname])
Kevin Cheng6e4c2642015-12-11 09:45:57 -0800190
191
Shuqian Zhao06deae02017-02-28 09:55:59 -0800192def reverify_all_push_duts():
193 """Reverify all the push DUTs."""
194 print 'Reverifying all DUTs.'
195 hosts = [h.hostname for h in AFE.get_hosts()]
Shuqian Zhaod2a99f02016-09-22 13:31:30 -0700196 AFE.reverify_hosts(hostnames=hosts)
197
198
Kevin Chenge691ce92016-12-15 12:17:13 -0800199def get_default_build(board='gandof', server='chromeos-autotest.hot'):
Dan Shi5ba5d2e2014-05-09 13:47:00 -0700200 """Get the default build to be used for test.
201
Dan Shi8df9c002016-03-08 15:37:39 -0800202 @param board: Name of board to be tested, default is gandof.
203 @return: Build to be tested, e.g., gandof-release/R36-5881.0.0
Dan Shi5ba5d2e2014-05-09 13:47:00 -0700204 """
Dan Shi5ba5d2e2014-05-09 13:47:00 -0700205 build = None
Kevin Chenge691ce92016-12-15 12:17:13 -0800206 cmd = ('%s/cli/atest stable_version list --board=%s -w %s' %
207 (AUTOTEST_DIR, board, server))
Shuqian Zhao12861662016-08-31 19:23:17 -0700208 result = subprocess.check_output(cmd, shell=True).strip()
209 build = re.search(BUILD_REGEX, result)
210 if build:
211 return '%s-release/%s' % (board, build.group(0))
Dan Shi5ba5d2e2014-05-09 13:47:00 -0700212
Shuqian Zhao12861662016-08-31 19:23:17 -0700213 # If fail to get stable version from cautotest, use that defined in config
Dan Shi5ba5d2e2014-05-09 13:47:00 -0700214 build = CONFIG.get_config_value('CROS', 'stable_cros_version')
215 return '%s-release/%s' % (board, build)
216
Dan Shi7e04fa82013-07-25 15:08:48 -0700217def parse_arguments():
218 """Parse arguments for test_push tool.
219
220 @return: Parsed arguments.
221
222 """
223 parser = argparse.ArgumentParser()
Dan Shi8df9c002016-03-08 15:37:39 -0800224 parser.add_argument('-b', '--board', dest='board', default='gandof',
225 help='Default is gandof.')
Jakob Juelich8f143912014-10-10 14:08:05 -0700226 parser.add_argument('-sb', '--shard_board', dest='shard_board',
227 default='quawks',
228 help='Default is quawks.')
Dan Shi7e04fa82013-07-25 15:08:48 -0700229 parser.add_argument('-i', '--build', dest='build', default=None,
Shuqian Zhaof3a114c2016-09-21 11:02:15 -0700230 help='Default is the latest stale build of given '
231 'board. Must be a stable build, otherwise AU test '
232 'will fail. (ex: gandolf-release/R54-8743.25.0)')
Jakob Juelich8f143912014-10-10 14:08:05 -0700233 parser.add_argument('-si', '--shard_build', dest='shard_build', default=None,
Shuqian Zhaof3a114c2016-09-21 11:02:15 -0700234 help='Default is the latest stable build of given '
235 'board. Must be a stable build, otherwise AU test '
Jakob Juelich8f143912014-10-10 14:08:05 -0700236 'will fail.')
Kevin Chenge691ce92016-12-15 12:17:13 -0800237 parser.add_argument('-w', '--web', default='chromeos-autotest.hot',
238 help='Specify web server to grab stable version from.')
Dan Shi81ddc422016-09-09 13:58:31 -0700239 parser.add_argument('-ab', '--android_board', dest='android_board',
Shuqian Zhao8ac22e82016-09-22 14:26:18 -0700240 default='shamu-2', help='Android board to test.')
Dan Shi81ddc422016-09-09 13:58:31 -0700241 parser.add_argument('-ai', '--android_build', dest='android_build',
242 help='Android build to test.')
Dan Shi7e04fa82013-07-25 15:08:48 -0700243 parser.add_argument('-p', '--pool', dest='pool', default='bvt')
244 parser.add_argument('-u', '--num', dest='num', type=int, default=3,
245 help='Run on at most NUM machines.')
xixuan9307e622017-02-03 20:01:01 -0800246 parser.add_argument('-e', '--email', nargs='+', dest='email',
247 default=DEFAULT_EMAIL,
Dan Shi7e04fa82013-07-25 15:08:48 -0700248 help='Email address for the notification to be sent to '
249 'after the script finished running.')
Shuqian Zhaod4864772015-08-06 09:46:22 -0700250 parser.add_argument('-t', '--timeout_min', dest='timeout_min', type=int,
xixuan2d668582016-06-10 14:02:32 -0700251 default=DEFAULT_TIMEOUT_MIN_FOR_SUITE_JOB,
Shuqian Zhaod4864772015-08-06 09:46:22 -0700252 help='Time in mins to wait before abort the jobs we '
253 'are waiting on. Only for the asynchronous suites '
254 'triggered by create_and_return flag.')
Shuqian Zhao1f311c02016-09-01 19:30:54 -0700255 parser.add_argument('-ud', '--num_duts', dest='num_duts',
256 default=DEFAULT_NUM_DUTS,
257 help="String of dict that indicates the required number"
258 " of DUTs for each board. E.g {'gandof':4}")
Shuqian Zhao676ed6f2016-09-21 14:20:50 -0700259 parser.add_argument('-c', '--continue_on_failure', action='store_true',
260 dest='continue_on_failure',
261 help='All tests continue to run when there is failure')
Dan Shi7e04fa82013-07-25 15:08:48 -0700262
263 arguments = parser.parse_args(sys.argv[1:])
264
Shuqian Zhaof3a114c2016-09-21 11:02:15 -0700265 # Get latest stable build as default build.
Dan Shi7e04fa82013-07-25 15:08:48 -0700266 if not arguments.build:
Kevin Chenge691ce92016-12-15 12:17:13 -0800267 arguments.build = get_default_build(arguments.board, arguments.web)
Jakob Juelich8f143912014-10-10 14:08:05 -0700268 if not arguments.shard_build:
Kevin Chenge691ce92016-12-15 12:17:13 -0800269 arguments.shard_build = get_default_build(arguments.shard_board,
270 arguments.web)
Dan Shi7e04fa82013-07-25 15:08:48 -0700271
Shuqian Zhao1f311c02016-09-01 19:30:54 -0700272 arguments.num_duts = ast.literal_eval(arguments.num_duts)
273
Dan Shi7e04fa82013-07-25 15:08:48 -0700274 return arguments
275
276
Shuqian Zhaod4864772015-08-06 09:46:22 -0700277def do_run_suite(suite_name, arguments, use_shard=False,
Dan Shi81ddc422016-09-09 13:58:31 -0700278 create_and_return=False, testbed_test=False):
Dan Shi7e04fa82013-07-25 15:08:48 -0700279 """Call run_suite to run a suite job, and return the suite job id.
280
281 The script waits the suite job to finish before returning the suite job id.
282 Also it will echo the run_suite output to stdout.
283
284 @param suite_name: Name of a suite, e.g., dummy.
285 @param arguments: Arguments for run_suite command.
Jakob Juelich8f143912014-10-10 14:08:05 -0700286 @param use_shard: If true, suite is scheduled for shard board.
Shuqian Zhaod4864772015-08-06 09:46:22 -0700287 @param create_and_return: If True, run_suite just creates the suite, print
288 the job id, then finish immediately.
Dan Shi81ddc422016-09-09 13:58:31 -0700289 @param testbed_test: True to run testbed test. Default is False.
Jakob Juelich8f143912014-10-10 14:08:05 -0700290
Dan Shi7e04fa82013-07-25 15:08:48 -0700291 @return: Suite job ID.
292
293 """
Dan Shi81ddc422016-09-09 13:58:31 -0700294 if use_shard and not testbed_test:
Jakob Juelich8f143912014-10-10 14:08:05 -0700295 board = arguments.shard_board
296 build = arguments.shard_build
Dan Shi81ddc422016-09-09 13:58:31 -0700297 elif testbed_test:
298 board = arguments.android_board
299 build = arguments.android_build
300 else:
301 board = arguments.board
302 build = arguments.build
Jakob Juelich8f143912014-10-10 14:08:05 -0700303
Dan Shi47d32882014-12-22 16:25:05 -0800304 # Remove cros-version label to force provision.
Shuqian Zhao7a49f1b2016-10-24 16:48:04 -0700305 hosts = AFE.get_hosts(label=constants.Labels.BOARD_PREFIX+board,
306 locked=False)
Dan Shi47d32882014-12-22 16:25:05 -0800307 for host in hosts:
Dan Shi81ddc422016-09-09 13:58:31 -0700308 labels_to_remove = [
309 l for l in host.labels
310 if (l.startswith(provision.CROS_VERSION_PREFIX) or
311 l.startswith(provision.TESTBED_BUILD_VERSION_PREFIX))]
312 if labels_to_remove:
313 AFE.run('host_remove_labels', id=host.id, labels=labels_to_remove)
Dan Shi47d32882014-12-22 16:25:05 -0800314
Shuqian Zhaod01fad02016-11-18 10:00:22 -0800315 # Test repair work flow on shards, powerwash test will timeout after 7m.
Kevin Cheng6e4c2642015-12-11 09:45:57 -0800316 if use_shard and not create_and_return:
Shuqian Zhaod01fad02016-11-18 10:00:22 -0800317 powerwash_dut_to_test_repair(host.hostname, timeout=420)
Kevin Cheng6e4c2642015-12-11 09:45:57 -0800318
Dan Shief1a5c02015-04-07 17:37:09 -0700319 current_dir = os.path.dirname(os.path.realpath(__file__))
320 cmd = [os.path.join(current_dir, RUN_SUITE_COMMAND),
Dan Shi7e04fa82013-07-25 15:08:48 -0700321 '-s', suite_name,
Jakob Juelich8f143912014-10-10 14:08:05 -0700322 '-b', board,
323 '-i', build,
Dan Shi7e04fa82013-07-25 15:08:48 -0700324 '-p', arguments.pool,
Shuqian Zhao178ac012016-06-03 15:08:52 -0700325 '-u', str(arguments.num)]
Shuqian Zhaod4864772015-08-06 09:46:22 -0700326 if create_and_return:
327 cmd += ['-c']
Dan Shi81ddc422016-09-09 13:58:31 -0700328 if testbed_test:
329 cmd += ['--run_prod_code']
Dan Shi7e04fa82013-07-25 15:08:48 -0700330
331 suite_job_id = None
Dan Shi7e04fa82013-07-25 15:08:48 -0700332
333 proc = subprocess.Popen(cmd, stdout=subprocess.PIPE,
334 stderr=subprocess.STDOUT)
335
336 while True:
337 line = proc.stdout.readline()
338
339 # Break when run_suite process completed.
340 if not line and proc.poll() != None:
341 break
342 print line.rstrip()
343 run_suite_output.append(line.rstrip())
344
345 if not suite_job_id:
346 m = re.match(SUITE_JOB_START_INFO_REGEX, line)
347 if m and m.group(1):
348 suite_job_id = int(m.group(1))
Shuqian Zhao1b4ca272016-09-18 14:58:19 -0700349 all_suite_ids.append(suite_job_id)
Dan Shi7e04fa82013-07-25 15:08:48 -0700350
351 if not suite_job_id:
352 raise TestPushException('Failed to retrieve suite job ID.')
Dan Shia8da7602014-05-09 15:18:15 -0700353
Shuqian Zhaod4864772015-08-06 09:46:22 -0700354 # If create_and_return specified, wait for the suite to finish.
355 if create_and_return:
356 end = time.time() + arguments.timeout_min * 60
Dan Shiefd403e2016-02-03 11:37:02 -0800357 while not AFE.get_jobs(id=suite_job_id, finished=True):
Shuqian Zhaod4864772015-08-06 09:46:22 -0700358 if time.time() < end:
359 time.sleep(10)
360 else:
Dan Shiefd403e2016-02-03 11:37:02 -0800361 AFE.run('abort_host_queue_entries', job=suite_job_id)
Shuqian Zhaod4864772015-08-06 09:46:22 -0700362 raise TestPushException(
363 'Asynchronous suite triggered by create_and_return '
364 'flag has timed out after %d mins. Aborting it.' %
365 arguments.timeout_min)
366
Dan Shia8da7602014-05-09 15:18:15 -0700367 print 'Suite job %s is completed.' % suite_job_id
Dan Shi7e04fa82013-07-25 15:08:48 -0700368 return suite_job_id
369
370
Dan Shia8da7602014-05-09 15:18:15 -0700371def check_dut_image(build, suite_job_id):
372 """Confirm all DUTs used for the suite are imaged to expected build.
373
374 @param build: Expected build to be imaged.
375 @param suite_job_id: job ID of the suite job.
376 @raise TestPushException: If a DUT does not have expected build imaged.
377 """
378 print 'Checking image installed in DUTs...'
379 job_ids = [job.id for job in
380 models.Job.objects.filter(parent_job_id=suite_job_id)]
381 hqes = [models.HostQueueEntry.objects.filter(job_id=job_id)[0]
382 for job_id in job_ids]
383 hostnames = set([hqe.host.hostname for hqe in hqes])
384 for hostname in hostnames:
Prathmesh Prabhuf10f41a2017-04-21 11:52:16 -0700385 found_build = site_utils.get_build_from_afe(hostname, AFE)
386 if found_build != build:
Dan Shia8da7602014-05-09 15:18:15 -0700387 raise TestPushException('DUT is not imaged properly. Host %s has '
388 'build %s, while build %s is expected.' %
Prathmesh Prabhuf10f41a2017-04-21 11:52:16 -0700389 (hostname, found_build, build))
Dan Shia8da7602014-05-09 15:18:15 -0700390
391
Shuqian Zhaod4864772015-08-06 09:46:22 -0700392def test_suite(suite_name, expected_results, arguments, use_shard=False,
Dan Shi81ddc422016-09-09 13:58:31 -0700393 create_and_return=False, testbed_test=False):
Dan Shi7e04fa82013-07-25 15:08:48 -0700394 """Call run_suite to start a suite job and verify results.
395
396 @param suite_name: Name of a suite, e.g., dummy
397 @param expected_results: A dictionary of test name to test result.
398 @param arguments: Arguments for run_suite command.
Jakob Juelich8f143912014-10-10 14:08:05 -0700399 @param use_shard: If true, suite is scheduled for shard board.
Shuqian Zhaod4864772015-08-06 09:46:22 -0700400 @param create_and_return: If True, run_suite just creates the suite, print
401 the job id, then finish immediately.
Dan Shi81ddc422016-09-09 13:58:31 -0700402 @param testbed_test: True to run testbed test. Default is False.
Dan Shi7e04fa82013-07-25 15:08:48 -0700403 """
Shuqian Zhaod4864772015-08-06 09:46:22 -0700404 suite_job_id = do_run_suite(suite_name, arguments, use_shard,
Dan Shi81ddc422016-09-09 13:58:31 -0700405 create_and_return, testbed_test)
Dan Shi7e04fa82013-07-25 15:08:48 -0700406
Dan Shia8da7602014-05-09 15:18:15 -0700407 # Confirm all DUTs used for the suite are imaged to expected build.
Jakob Juelich8f143912014-10-10 14:08:05 -0700408 # hqe.host_id for jobs running in shard is not synced back to master db,
409 # therefore, skip verifying dut build for jobs running in shard.
Dan Shi81ddc422016-09-09 13:58:31 -0700410 build_expected = (arguments.android_build if testbed_test
411 else arguments.build)
Aviv Keshetd2359122017-05-03 22:50:10 -0700412 if not use_shard and not testbed_test:
Dan Shi81ddc422016-09-09 13:58:31 -0700413 check_dut_image(build_expected, suite_job_id)
Dan Shia8da7602014-05-09 15:18:15 -0700414
Shuqian Zhao327b6952016-09-12 10:42:03 -0700415 # Verify test results are the expected results.
416 verify_test_results(suite_job_id, expected_results)
417
418
419def verify_test_results(job_id, expected_results):
420 """Verify the test results with the expected results.
421
422 @param job_id: id of the running jobs. For suite job, it is suite_job_id.
423 @param expected_results: A dictionary of test name to test result.
424 @raise TestPushException: If verify fails.
425 """
Dan Shia8da7602014-05-09 15:18:15 -0700426 print 'Comparing test results...'
Shuqian Zhao327b6952016-09-12 10:42:03 -0700427 test_views = site_utils.get_test_views_from_tko(job_id, TKO)
Dan Shi7e04fa82013-07-25 15:08:48 -0700428
429 mismatch_errors = []
430 extra_test_errors = []
431
432 found_keys = set()
Shuqian Zhao327b6952016-09-12 10:42:03 -0700433 for test_name, test_status in test_views.items():
Dan Shi7e04fa82013-07-25 15:08:48 -0700434 print "%s%s" % (test_name.ljust(30), test_status)
Dan Shi80b6ec02016-07-21 15:49:18 -0700435 # platform_InstallTestImage test may exist in old builds.
436 if re.search('platform_InstallTestImage_SERVER_JOB$', test_name):
437 continue
Dan Shi7e04fa82013-07-25 15:08:48 -0700438 test_found = False
439 for key,val in expected_results.items():
440 if re.search(key, test_name):
441 test_found = True
442 found_keys.add(key)
Dan Shi7e04fa82013-07-25 15:08:48 -0700443 if val != test_status:
444 error = ('%s Expected: [%s], Actual: [%s]' %
445 (test_name, val, test_status))
446 mismatch_errors.append(error)
447 if not test_found:
448 extra_test_errors.append(test_name)
449
450 missing_test_errors = set(expected_results.keys()) - found_keys
Dan Shidc9eb172014-12-09 16:05:02 -0800451 for exception in IGNORE_MISSING_TESTS:
452 try:
453 missing_test_errors.remove(exception)
454 except KeyError:
455 pass
456
Dan Shi7e04fa82013-07-25 15:08:48 -0700457 summary = []
458 if mismatch_errors:
459 summary.append(('Results of %d test(s) do not match expected '
460 'values:') % len(mismatch_errors))
461 summary.extend(mismatch_errors)
462 summary.append('\n')
463
464 if extra_test_errors:
465 summary.append('%d test(s) are not expected to be run:' %
466 len(extra_test_errors))
467 summary.extend(extra_test_errors)
468 summary.append('\n')
469
470 if missing_test_errors:
471 summary.append('%d test(s) are missing from the results:' %
472 len(missing_test_errors))
473 summary.extend(missing_test_errors)
474 summary.append('\n')
475
476 # Test link to log can be loaded.
Shuqian Zhao327b6952016-09-12 10:42:03 -0700477 job_name = '%s-%s' % (job_id, getpass.getuser())
Dan Shi7e04fa82013-07-25 15:08:48 -0700478 log_link = URL_PATTERN % (URL_HOST, job_name)
479 try:
480 urllib2.urlopen(log_link).read()
481 except urllib2.URLError:
482 summary.append('Failed to load page for link to log: %s.' % log_link)
483
484 if summary:
485 raise TestPushException('\n'.join(summary))
486
487
Dan Shief1a5c02015-04-07 17:37:09 -0700488def test_suite_wrapper(queue, suite_name, expected_results, arguments,
Dan Shi81ddc422016-09-09 13:58:31 -0700489 use_shard=False, create_and_return=False,
490 testbed_test=False):
Dan Shief1a5c02015-04-07 17:37:09 -0700491 """Wrapper to call test_suite. Handle exception and pipe it to parent
492 process.
493
494 @param queue: Queue to save exception to be accessed by parent process.
495 @param suite_name: Name of a suite, e.g., dummy
496 @param expected_results: A dictionary of test name to test result.
497 @param arguments: Arguments for run_suite command.
498 @param use_shard: If true, suite is scheduled for shard board.
Shuqian Zhaod4864772015-08-06 09:46:22 -0700499 @param create_and_return: If True, run_suite just creates the suite, print
500 the job id, then finish immediately.
Dan Shi81ddc422016-09-09 13:58:31 -0700501 @param testbed_test: True to run testbed test. Default is False.
Dan Shief1a5c02015-04-07 17:37:09 -0700502 """
503 try:
Shuqian Zhaod4864772015-08-06 09:46:22 -0700504 test_suite(suite_name, expected_results, arguments, use_shard,
Dan Shi81ddc422016-09-09 13:58:31 -0700505 create_and_return, testbed_test)
Dan Shief1a5c02015-04-07 17:37:09 -0700506 except:
507 # Store the whole exc_info leads to a PicklingError.
508 except_type, except_value, tb = sys.exc_info()
509 queue.put((except_type, except_value, traceback.extract_tb(tb)))
510
511
Dan Shief1a5c02015-04-07 17:37:09 -0700512def check_queue(queue):
513 """Check the queue for any exception being raised.
514
515 @param queue: Queue used to store exception for parent process to access.
516 @raise: Any exception found in the queue.
517 """
518 if queue.empty():
519 return
520 exc_info = queue.get()
521 # Raise the exception with original backtrace.
522 print 'Original stack trace of the exception:\n%s' % exc_info[2]
523 raise exc_info[0](exc_info[1])
524
525
Shuqian Zhao7b2daea2016-10-25 13:31:06 -0700526def get_head_of_repos(repos):
527 """Get HEAD of updated repos, currently are autotest and chromite repos
528
529 @param repos: a map of repo name to the path of the repo. E.g.
530 {'autotest': '/usr/local/autotest'}
531 @return: a map of repo names to the current HEAD of that repo.
532 """
533 @contextmanager
534 def cd(new_wd):
535 """Helper function to change working directory.
536
537 @param new_wd: new working directory that switch to.
538 """
539 prev_wd = os.getcwd()
540 os.chdir(os.path.expanduser(new_wd))
541 try:
542 yield
543 finally:
544 os.chdir(prev_wd)
545
546 updated_repo_heads = {}
547 for repo_name, path_to_repo in repos.iteritems():
548 with cd(path_to_repo):
549 head = subprocess.check_output('git rev-parse HEAD',
550 shell=True).strip()
551 updated_repo_heads[repo_name] = head
552 return updated_repo_heads
553
554
Shuqian Zhao80d32712016-11-11 16:37:36 -0800555def push_prod_next_branch(updated_repo_heads):
556 """push prod-next branch to the tested HEAD after all tests pass.
557
558 The push command must be ran as PUSH_USER, since only PUSH_USER has the
559 right to push branches.
560
561 @param updated_repo_heads: a map of repo names to tested HEAD of that repo.
562 """
563 # prod-next branch for every repo is downloaded under PUSH_USER home dir.
Shuqian Zhaoaa0301c2016-11-21 09:46:41 -0800564 cmd = ('cd ~/{repo}; git pull; git rebase {hash} prod-next;'
565 'git push origin prod-next')
Shuqian Zhao80d32712016-11-11 16:37:36 -0800566 run_push_as_push_user = "sudo su - %s -c '%s'" % (PUSH_USER, cmd)
567
568 for repo_name, test_hash in updated_repo_heads.iteritems():
569 push_cmd = run_push_as_push_user.format(hash=test_hash, repo=repo_name)
570 print 'Pushing %s prod-next branch to %s' % (repo_name, test_hash)
571 print subprocess.check_output(push_cmd, stderr=subprocess.STDOUT,
572 shell=True)
573
574
xixuan9307e622017-02-03 20:01:01 -0800575def send_notification_email(email_list, title, msg):
576 """Send notification to all email addresses in email list.
577
578 @param email_list: a email address list which receives notification email,
579 whose format is like:
580 [xxx@google.com, xxx@google.com, xxx@google.com,...]
581 so that users could also specify multiple email addresses by using
582 config '--email' or '-e'.
583 @param title: the title of the email to be sent.
584 @param msg: the content of the email to be sent.
585 """
586 gmail_lib.send_email(','.join(email_list), title, msg)
587
588
Shuqian Zhao56969542017-05-30 12:56:57 -0700589def _main(arguments):
590 """Running tests.
591
592 @param arguments: command line arguments.
593 """
Shuqian Zhao80d32712016-11-11 16:37:36 -0800594 updated_repo_heads = get_head_of_repos(UPDATED_REPOS)
595 updated_repo_msg = '\n'.join(
596 ['%s: %s' % (k, v) for k, v in updated_repo_heads.iteritems()])
Shuqian Zhao56969542017-05-30 12:56:57 -0700597 test_push_success = False
Dan Shi7e04fa82013-07-25 15:08:48 -0700598
599 try:
Shuqian Zhao676ed6f2016-09-21 14:20:50 -0700600 # Use daemon flag will kill child processes when parent process fails.
601 use_daemon = not arguments.continue_on_failure
Shuqian Zhao6fc7bf42016-12-11 19:10:36 -0800602 # Verify all the DUTs at the beginning of testing push.
Shuqian Zhao06deae02017-02-28 09:55:59 -0800603 reverify_all_push_duts()
Shuqian Zhao6fc7bf42016-12-11 19:10:36 -0800604 time.sleep(15) # Wait 15 secs for the verify test to start.
Shuqian Zhaoa6cf66b2017-03-03 12:08:57 -0800605 check_dut_inventory(arguments.num_duts, arguments.pool)
Dan Shief1a5c02015-04-07 17:37:09 -0700606 queue = multiprocessing.Queue()
607
608 push_to_prod_suite = multiprocessing.Process(
609 target=test_suite_wrapper,
610 args=(queue, PUSH_TO_PROD_SUITE, EXPECTED_TEST_RESULTS,
611 arguments))
Shuqian Zhao676ed6f2016-09-21 14:20:50 -0700612 push_to_prod_suite.daemon = use_daemon
Dan Shief1a5c02015-04-07 17:37:09 -0700613 push_to_prod_suite.start()
Jakob Juelich8f143912014-10-10 14:08:05 -0700614
Shuqian Zhaod4864772015-08-06 09:46:22 -0700615 # suite test with --create_and_return flag
616 asynchronous_suite = multiprocessing.Process(
617 target=test_suite_wrapper,
618 args=(queue, DUMMY_SUITE, EXPECTED_TEST_RESULTS_DUMMY,
Aviv Keshetd2359122017-05-03 22:50:10 -0700619 arguments, True, True))
Shuqian Zhao1b4ca272016-09-18 14:58:19 -0700620 asynchronous_suite.daemon = True
Shuqian Zhaod4864772015-08-06 09:46:22 -0700621 asynchronous_suite.start()
622
Aviv Keshetd2359122017-05-03 22:50:10 -0700623 while (push_to_prod_suite.is_alive()
Aviv Keshetac36b852017-07-14 09:58:05 -0700624 or asynchronous_suite.is_alive()):
Dan Shief1a5c02015-04-07 17:37:09 -0700625 check_queue(queue)
Dan Shief1a5c02015-04-07 17:37:09 -0700626 time.sleep(5)
627
628 check_queue(queue)
629
630 push_to_prod_suite.join()
Shuqian Zhaod4864772015-08-06 09:46:22 -0700631 asynchronous_suite.join()
Shuqian Zhao80d32712016-11-11 16:37:36 -0800632
633 # All tests pass, push prod-next branch for UPDATED_REPOS.
Shuqian Zhaoaa0301c2016-11-21 09:46:41 -0800634 push_prod_next_branch(updated_repo_heads)
Shuqian Zhao56969542017-05-30 12:56:57 -0700635 test_push_success = True
Dan Shi7e04fa82013-07-25 15:08:48 -0700636 except Exception as e:
637 print 'Test for pushing to prod failed:\n'
638 print str(e)
Shuqian Zhao676ed6f2016-09-21 14:20:50 -0700639 # Abort running jobs when choose not to continue when there is failure.
640 if not arguments.continue_on_failure:
641 for suite_id in all_suite_ids:
642 if AFE.get_jobs(id=suite_id, finished=False):
643 AFE.run('abort_host_queue_entries', job=suite_id)
Dan Shi7e04fa82013-07-25 15:08:48 -0700644 # Send out email about the test failure.
645 if arguments.email:
xixuan9307e622017-02-03 20:01:01 -0800646 send_notification_email(
Dan Shi5fa602c2015-03-26 17:54:13 -0700647 arguments.email,
648 'Test for pushing to prod failed. Do NOT push!',
Shuqian Zhao7b2daea2016-10-25 13:31:06 -0700649 ('Test CLs of the following repos failed. Below are the '
650 'repos and the corresponding test HEAD.\n\n%s\n\n.'
Shuqian Zhao0566ee72017-02-22 10:05:56 -0800651 'Error occurred during test:\n\n%s\n\n'
Prathmesh Prabhu2877a002017-03-14 14:49:42 -0700652 'All logs have been saved to '
Aviv Keshet43bbf052017-07-10 11:39:45 -0700653 '/var/log/test_push/test_push.log on push master. '
654 'Stats on recent success rate can be found at '
655 'go/test-push-stats . Detailed '
Prathmesh Prabhu2877a002017-03-14 14:49:42 -0700656 'debugging info can be found at go/push-to-prod' %
Shuqian Zhao7b2daea2016-10-25 13:31:06 -0700657 (updated_repo_msg, str(e)) + '\n'.join(run_suite_output)))
Dan Shi7e04fa82013-07-25 15:08:48 -0700658 raise
Shuqian Zhaof794c492017-01-06 16:27:23 -0800659 finally:
Shuqian Zhao56969542017-05-30 12:56:57 -0700660 metrics.Counter('chromeos/autotest/test_push/completed').increment(
661 fields={'success': test_push_success})
Shuqian Zhaod2a99f02016-09-22 13:31:30 -0700662 # Reverify all the hosts
Shuqian Zhao06deae02017-02-28 09:55:59 -0800663 reverify_all_push_duts()
Dan Shi7e04fa82013-07-25 15:08:48 -0700664
Prathmesh Prabhu39bf0a62017-08-29 22:03:19 -0700665 message = ('\nAll tests completed successfully, the prod branch of the '
666 'following repos is ready to be pushed to the hash list below.\n'
Aviv Keshet51172b22017-01-30 16:28:57 -0800667 '%s\n\n\nInstructions for pushing to prod are available at '
Shuqian Zhao3002e6e2017-05-02 18:56:14 -0700668 'https://goto.google.com/autotest-to-prod ' % updated_repo_msg)
Dan Shi7e04fa82013-07-25 15:08:48 -0700669 print message
670 # Send out email about test completed successfully.
671 if arguments.email:
xixuan9307e622017-02-03 20:01:01 -0800672 send_notification_email(
Dan Shi5fa602c2015-03-26 17:54:13 -0700673 arguments.email,
674 'Test for pushing to prod completed successfully',
675 message)
Dan Shi7e04fa82013-07-25 15:08:48 -0700676
677
Shuqian Zhao56969542017-05-30 12:56:57 -0700678def main():
679 """Entry point."""
680 arguments = parse_arguments()
Shuqian Zhao034d85e2017-06-01 11:57:39 -0700681 with ts_mon_config.SetupTsMonGlobalState(service_name='test_push',
682 indirect=True):
Shuqian Zhao56969542017-05-30 12:56:57 -0700683 return _main(arguments)
684
Dan Shi7e04fa82013-07-25 15:08:48 -0700685if __name__ == '__main__':
686 sys.exit(main())