blob: fa1ff6582ba8ce29ec271d2df7dc51e1a442cbb1 [file] [log] [blame]
Dan Shi7e04fa82013-07-25 15:08:48 -07001#!/usr/bin/python
2#
3# Copyright (c) 2013 The Chromium OS Authors. All rights reserved.
4# Use of this source code is governed by a BSD-style license that can be
5# found in the LICENSE file.
6
7"""Tool to validate code in prod branch before pushing to lab.
8
9The script runs push_to_prod suite to verify code in prod branch is ready to be
10pushed. Link to design document:
11https://docs.google.com/a/google.com/document/d/1JMz0xS3fZRSHMpFkkKAL_rxsdbNZomhHbC3B8L71uuI/edit
12
13To verify if prod branch can be pushed to lab, run following command in
14chromeos-autotest.cbf server:
Michael Liang52d9f1f2014-06-17 15:01:24 -070015/usr/local/autotest/site_utils/test_push.py -e someone@company.com
Dan Shi7e04fa82013-07-25 15:08:48 -070016
Shuqian Zhaof3a114c2016-09-21 11:02:15 -070017The script uses latest gandof stable build as test build by default.
Dan Shi7e04fa82013-07-25 15:08:48 -070018
19"""
20
21import argparse
Shuqian Zhao1f311c02016-09-01 19:30:54 -070022import ast
Shuqian Zhao7b2daea2016-10-25 13:31:06 -070023from contextlib import contextmanager
Dan Shi7e04fa82013-07-25 15:08:48 -070024import getpass
Dan Shief1a5c02015-04-07 17:37:09 -070025import multiprocessing
Dan Shi7e04fa82013-07-25 15:08:48 -070026import os
27import re
28import subprocess
29import sys
Dan Shief1a5c02015-04-07 17:37:09 -070030import time
31import traceback
Dan Shi7e04fa82013-07-25 15:08:48 -070032import urllib2
33
34import common
Dan Shia8da7602014-05-09 15:18:15 -070035try:
36 from autotest_lib.frontend import setup_django_environment
37 from autotest_lib.frontend.afe import models
Shuqian Zhao327b6952016-09-12 10:42:03 -070038 from autotest_lib.frontend.afe import rpc_utils
Dan Shia8da7602014-05-09 15:18:15 -070039except ImportError:
40 # Unittest may not have Django database configured and will fail to import.
41 pass
Dan Shi5fa602c2015-03-26 17:54:13 -070042from autotest_lib.client.common_lib import global_config
Shuqian Zhao327b6952016-09-12 10:42:03 -070043from autotest_lib.client.common_lib import priorities
Dan Shi7e04fa82013-07-25 15:08:48 -070044from autotest_lib.server import site_utils
Shuqian Zhao327b6952016-09-12 10:42:03 -070045from autotest_lib.server import utils
Dan Shi47d32882014-12-22 16:25:05 -080046from autotest_lib.server.cros import provision
Dan Shi7e04fa82013-07-25 15:08:48 -070047from autotest_lib.server.cros.dynamic_suite import frontend_wrappers
Dan Shi5fa602c2015-03-26 17:54:13 -070048from autotest_lib.site_utils import gmail_lib
Dan Shi47d32882014-12-22 16:25:05 -080049from autotest_lib.site_utils.suite_scheduler import constants
Dan Shi7e04fa82013-07-25 15:08:48 -070050
Shuqian Zhao7b2daea2016-10-25 13:31:06 -070051AUTOTEST_DIR=common.autotest_dir
Dan Shi7e04fa82013-07-25 15:08:48 -070052CONFIG = global_config.global_config
53
Dan Shiefd403e2016-02-03 11:37:02 -080054AFE = frontend_wrappers.RetryingAFE(timeout_min=0.5, delay_sec=2)
Shuqian Zhao327b6952016-09-12 10:42:03 -070055TKO = frontend_wrappers.RetryingTKO(timeout_min=0.1, delay_sec=10)
Dan Shiefd403e2016-02-03 11:37:02 -080056
Dan Shi7e04fa82013-07-25 15:08:48 -070057MAIL_FROM = 'chromeos-test@google.com'
Shuqian Zhao12861662016-08-31 19:23:17 -070058BUILD_REGEX = 'R[\d]+-[\d]+\.[\d]+\.[\d]+'
Dan Shi7e04fa82013-07-25 15:08:48 -070059RUN_SUITE_COMMAND = 'run_suite.py'
60PUSH_TO_PROD_SUITE = 'push_to_prod'
Jakob Juelich8f143912014-10-10 14:08:05 -070061DUMMY_SUITE = 'dummy'
Shuqian Zhaof3a114c2016-09-21 11:02:15 -070062AU_SUITE = 'paygen_au_beta'
Dan Shi81ddc422016-09-09 13:58:31 -070063TESTBED_SUITE = 'testbed_push'
Shuqian Zhao8ac22e82016-09-22 14:26:18 -070064# TODO(shuqianz): Dynamically get android build after crbug.com/646068 fixed
xixuan2d668582016-06-10 14:02:32 -070065DEFAULT_TIMEOUT_MIN_FOR_SUITE_JOB = 30
Shuqian Zhao12861662016-08-31 19:23:17 -070066IMAGE_BUCKET = CONFIG.get_config_value('CROS', 'image_storage_server')
Shuqian Zhao8ac22e82016-09-22 14:26:18 -070067DEFAULT_EMAIL = CONFIG.get_config_value(
68 'SCHEDULER', 'notify_email', type=str, default='')
Shuqian Zhao1f311c02016-09-01 19:30:54 -070069DEFAULT_NUM_DUTS = "{'board:gandof': 4, 'board:quawks': 2}"
Dan Shi7e04fa82013-07-25 15:08:48 -070070
Fang Deng6dddf602014-04-17 17:01:47 -070071SUITE_JOB_START_INFO_REGEX = ('^.*Created suite job:.*'
72 'tab_id=view_job&object_id=(\d+)$')
Dan Shi7e04fa82013-07-25 15:08:48 -070073
74# Dictionary of test results keyed by test name regular expression.
75EXPECTED_TEST_RESULTS = {'^SERVER_JOB$': 'GOOD',
76 # This is related to dummy_Fail/control.dependency.
77 'dummy_Fail.dependency$': 'TEST_NA',
Dan Shidc9eb172014-12-09 16:05:02 -080078 'login_LoginSuccess.*': 'GOOD',
Dan Shi47d32882014-12-22 16:25:05 -080079 'provision_AutoUpdate.double': 'GOOD',
Dan Shi7e04fa82013-07-25 15:08:48 -070080 'dummy_Pass.*': 'GOOD',
81 'dummy_Fail.Fail$': 'FAIL',
82 'dummy_Fail.RetryFail$': 'FAIL',
83 'dummy_Fail.RetrySuccess': 'GOOD',
84 'dummy_Fail.Error$': 'ERROR',
85 'dummy_Fail.Warn$': 'WARN',
86 'dummy_Fail.NAError$': 'TEST_NA',
87 'dummy_Fail.Crash$': 'GOOD',
88 }
89
Jakob Juelich8f143912014-10-10 14:08:05 -070090EXPECTED_TEST_RESULTS_DUMMY = {'^SERVER_JOB$': 'GOOD',
91 'dummy_Pass.*': 'GOOD',
92 'dummy_Fail.Fail': 'FAIL',
93 'dummy_Fail.Warn': 'WARN',
94 'dummy_Fail.Crash': 'GOOD',
95 'dummy_Fail.Error': 'ERROR',
96 'dummy_Fail.NAError': 'TEST_NA',}
97
Dan Shi7e04fa82013-07-25 15:08:48 -070098EXPECTED_TEST_RESULTS_AU = {'SERVER_JOB$': 'GOOD',
Shuqian Zhaof3a114c2016-09-21 11:02:15 -070099 'autoupdate_EndToEndTest.paygen_au_beta_delta.*': 'GOOD',
100 'autoupdate_EndToEndTest.paygen_au_beta_full.*': 'GOOD',
Dan Shi7e04fa82013-07-25 15:08:48 -0700101 }
102
Dan Shi81ddc422016-09-09 13:58:31 -0700103EXPECTED_TEST_RESULTS_TESTBED = {'^SERVER_JOB$': 'GOOD',
104 'testbed_DummyTest': 'GOOD',}
105
Shuqian Zhao327b6952016-09-12 10:42:03 -0700106EXPECTED_TEST_RESULTS_POWERWASH = {'platform_Powerwash': 'GOOD',
107 'SERVER_JOB': 'GOOD'}
108
Dan Shi7e04fa82013-07-25 15:08:48 -0700109URL_HOST = CONFIG.get_config_value('SERVER', 'hostname', type=str)
110URL_PATTERN = CONFIG.get_config_value('CROS', 'log_url_pattern', type=str)
111
Dan Shidc9eb172014-12-09 16:05:02 -0800112# Some test could be missing from the test results for various reasons. Add
113# such test in this list and explain the reason.
114IGNORE_MISSING_TESTS = [
115 # For latest build, npo_test_delta does not exist.
116 'autoupdate_EndToEndTest.npo_test_delta.*',
117 # For trybot build, nmo_test_delta does not exist.
118 'autoupdate_EndToEndTest.nmo_test_delta.*',
119 # Older build does not have login_LoginSuccess test in push_to_prod suite.
120 # TODO(dshi): Remove following lines after R41 is stable.
121 'login_LoginSuccess']
122
Dan Shi7e04fa82013-07-25 15:08:48 -0700123# Save all run_suite command output.
Shuqian Zhao7b682192016-09-16 14:38:41 -0700124manager = multiprocessing.Manager()
125run_suite_output = manager.list()
Shuqian Zhao1b4ca272016-09-18 14:58:19 -0700126all_suite_ids = manager.list()
Shuqian Zhao7b2daea2016-10-25 13:31:06 -0700127# A dict maps the name of the updated repos and the path of them.
128UPDATED_REPOS = {'autotest': AUTOTEST_DIR,
129 'chromite': '%s/site-packages/chromite/' % AUTOTEST_DIR}
Shuqian Zhao80d32712016-11-11 16:37:36 -0800130PUSH_USER = 'chromeos-test-lab'
Dan Shi7e04fa82013-07-25 15:08:48 -0700131
132class TestPushException(Exception):
133 """Exception to be raised when the test to push to prod failed."""
134 pass
135
Dan Shi5ba5d2e2014-05-09 13:47:00 -0700136
Shuqian Zhao1f311c02016-09-01 19:30:54 -0700137def check_dut_inventory(required_num_duts):
138 """Check DUT inventory for each board.
139
140 @param required_num_duts: a dict specified the number of DUT each board
141 requires in order to finish push tests.
142 @raise TestPushException: if number of DUTs are less than the requirement.
143 """
144 hosts = AFE.run('get_hosts', status='Ready', locked=False)
145 boards = [[l for l in host['labels'] if l.startswith('board:')][0]
146 for host in hosts]
147 current_inventory = {b:boards.count(b) for b in boards}
148 error_msg = ''
149 for board, req_num in required_num_duts.items():
150 curr_num = current_inventory.get(board, 0)
151 if curr_num < req_num:
152 error_msg += ('\nRequire %d %s DUTs, only %d are Ready now' %
153 (req_num, board, curr_num))
154 if error_msg:
155 raise TestPushException('Not enough DUTs to run push tests. %s' %
156 error_msg)
157
158
Shuqian Zhao327b6952016-09-12 10:42:03 -0700159def powerwash_dut_to_test_repair(hostname, timeout):
160 """Powerwash dut to test repair workflow.
Kevin Cheng6e4c2642015-12-11 09:45:57 -0800161
162 @param hostname: hostname of the dut.
Shuqian Zhao327b6952016-09-12 10:42:03 -0700163 @param timeout: seconds of the powerwash test to hit timeout.
164 @raise TestPushException: if DUT fail to run the test.
Kevin Cheng6e4c2642015-12-11 09:45:57 -0800165 """
Shuqian Zhao327b6952016-09-12 10:42:03 -0700166 t = models.Test.objects.get(name='platform_Powerwash')
167 c = utils.read_file(os.path.join(common.autotest_dir, t.path))
168 job_id = rpc_utils.create_job_common(
169 'powerwash', priority=priorities.Priority.SUPER,
170 control_type='Server', control_file=c, hosts=[hostname])
171
Shuqian Zhaoe83a78c2016-09-16 15:01:25 -0700172 end = time.time() + timeout
Shuqian Zhao327b6952016-09-12 10:42:03 -0700173 while not TKO.get_job_test_statuses_from_db(job_id):
Shuqian Zhaoe83a78c2016-09-16 15:01:25 -0700174 if time.time() >= end:
175 AFE.run('abort_host_queue_entries', job=job_id)
Shuqian Zhao327b6952016-09-12 10:42:03 -0700176 raise TestPushException(
Shuqian Zhaoe83a78c2016-09-16 15:01:25 -0700177 'Powerwash test on %s timeout after %ds, abort it.' %
178 (hostname, timeout))
Shuqian Zhao327b6952016-09-12 10:42:03 -0700179 time.sleep(10)
180 verify_test_results(job_id, EXPECTED_TEST_RESULTS_POWERWASH)
181 # Kick off verify, verify will fail and a repair should be triggered.
182 AFE.reverify_hosts(hostnames=[hostname])
Kevin Cheng6e4c2642015-12-11 09:45:57 -0800183
184
Shuqian Zhaod2a99f02016-09-22 13:31:30 -0700185def reverify_all_push_duts(pool):
186 """Reverify all the push DUTs.
187
188 @param pool: Name of the pool used by test_push.
189 """
190 pool_label = constants.Labels.POOL_PREFIX + pool
191 hosts = [h.hostname for h in AFE.get_hosts(label=pool_label)]
192 AFE.reverify_hosts(hostnames=hosts)
193
194
Shuqian Zhao12861662016-08-31 19:23:17 -0700195def get_default_build(board='gandof'):
Dan Shi5ba5d2e2014-05-09 13:47:00 -0700196 """Get the default build to be used for test.
197
Dan Shi8df9c002016-03-08 15:37:39 -0800198 @param board: Name of board to be tested, default is gandof.
199 @return: Build to be tested, e.g., gandof-release/R36-5881.0.0
Dan Shi5ba5d2e2014-05-09 13:47:00 -0700200 """
Dan Shi5ba5d2e2014-05-09 13:47:00 -0700201 build = None
Shuqian Zhao12861662016-08-31 19:23:17 -0700202 cmd = ('%s/cli/atest stable_version list --board=%s -w cautotest' %
203 (AUTOTEST_DIR, board))
204 result = subprocess.check_output(cmd, shell=True).strip()
205 build = re.search(BUILD_REGEX, result)
206 if build:
207 return '%s-release/%s' % (board, build.group(0))
Dan Shi5ba5d2e2014-05-09 13:47:00 -0700208
Shuqian Zhao12861662016-08-31 19:23:17 -0700209 # If fail to get stable version from cautotest, use that defined in config
Dan Shi5ba5d2e2014-05-09 13:47:00 -0700210 build = CONFIG.get_config_value('CROS', 'stable_cros_version')
211 return '%s-release/%s' % (board, build)
212
Dan Shi7e04fa82013-07-25 15:08:48 -0700213def parse_arguments():
214 """Parse arguments for test_push tool.
215
216 @return: Parsed arguments.
217
218 """
219 parser = argparse.ArgumentParser()
Dan Shi8df9c002016-03-08 15:37:39 -0800220 parser.add_argument('-b', '--board', dest='board', default='gandof',
221 help='Default is gandof.')
Jakob Juelich8f143912014-10-10 14:08:05 -0700222 parser.add_argument('-sb', '--shard_board', dest='shard_board',
223 default='quawks',
224 help='Default is quawks.')
Dan Shi7e04fa82013-07-25 15:08:48 -0700225 parser.add_argument('-i', '--build', dest='build', default=None,
Shuqian Zhaof3a114c2016-09-21 11:02:15 -0700226 help='Default is the latest stale build of given '
227 'board. Must be a stable build, otherwise AU test '
228 'will fail. (ex: gandolf-release/R54-8743.25.0)')
Jakob Juelich8f143912014-10-10 14:08:05 -0700229 parser.add_argument('-si', '--shard_build', dest='shard_build', default=None,
Shuqian Zhaof3a114c2016-09-21 11:02:15 -0700230 help='Default is the latest stable build of given '
231 'board. Must be a stable build, otherwise AU test '
Jakob Juelich8f143912014-10-10 14:08:05 -0700232 'will fail.')
Dan Shi81ddc422016-09-09 13:58:31 -0700233 parser.add_argument('-ab', '--android_board', dest='android_board',
Shuqian Zhao8ac22e82016-09-22 14:26:18 -0700234 default='shamu-2', help='Android board to test.')
Dan Shi81ddc422016-09-09 13:58:31 -0700235 parser.add_argument('-ai', '--android_build', dest='android_build',
236 help='Android build to test.')
Dan Shi7e04fa82013-07-25 15:08:48 -0700237 parser.add_argument('-p', '--pool', dest='pool', default='bvt')
238 parser.add_argument('-u', '--num', dest='num', type=int, default=3,
239 help='Run on at most NUM machines.')
Shuqian Zhao8ac22e82016-09-22 14:26:18 -0700240 parser.add_argument('-e', '--email', dest='email', default=DEFAULT_EMAIL,
Dan Shi7e04fa82013-07-25 15:08:48 -0700241 help='Email address for the notification to be sent to '
242 'after the script finished running.')
Shuqian Zhaod4864772015-08-06 09:46:22 -0700243 parser.add_argument('-t', '--timeout_min', dest='timeout_min', type=int,
xixuan2d668582016-06-10 14:02:32 -0700244 default=DEFAULT_TIMEOUT_MIN_FOR_SUITE_JOB,
Shuqian Zhaod4864772015-08-06 09:46:22 -0700245 help='Time in mins to wait before abort the jobs we '
246 'are waiting on. Only for the asynchronous suites '
247 'triggered by create_and_return flag.')
Shuqian Zhao1f311c02016-09-01 19:30:54 -0700248 parser.add_argument('-ud', '--num_duts', dest='num_duts',
249 default=DEFAULT_NUM_DUTS,
250 help="String of dict that indicates the required number"
251 " of DUTs for each board. E.g {'gandof':4}")
Shuqian Zhao676ed6f2016-09-21 14:20:50 -0700252 parser.add_argument('-c', '--continue_on_failure', action='store_true',
253 dest='continue_on_failure',
254 help='All tests continue to run when there is failure')
Dan Shi7e04fa82013-07-25 15:08:48 -0700255
256 arguments = parser.parse_args(sys.argv[1:])
257
Shuqian Zhaof3a114c2016-09-21 11:02:15 -0700258 # Get latest stable build as default build.
Dan Shi7e04fa82013-07-25 15:08:48 -0700259 if not arguments.build:
Shuqian Zhao12861662016-08-31 19:23:17 -0700260 arguments.build = get_default_build(arguments.board)
Jakob Juelich8f143912014-10-10 14:08:05 -0700261 if not arguments.shard_build:
Shuqian Zhao12861662016-08-31 19:23:17 -0700262 arguments.shard_build = get_default_build(arguments.shard_board)
Dan Shi7e04fa82013-07-25 15:08:48 -0700263
Shuqian Zhao1f311c02016-09-01 19:30:54 -0700264 arguments.num_duts = ast.literal_eval(arguments.num_duts)
265
Dan Shi7e04fa82013-07-25 15:08:48 -0700266 return arguments
267
268
Shuqian Zhaod4864772015-08-06 09:46:22 -0700269def do_run_suite(suite_name, arguments, use_shard=False,
Dan Shi81ddc422016-09-09 13:58:31 -0700270 create_and_return=False, testbed_test=False):
Dan Shi7e04fa82013-07-25 15:08:48 -0700271 """Call run_suite to run a suite job, and return the suite job id.
272
273 The script waits the suite job to finish before returning the suite job id.
274 Also it will echo the run_suite output to stdout.
275
276 @param suite_name: Name of a suite, e.g., dummy.
277 @param arguments: Arguments for run_suite command.
Jakob Juelich8f143912014-10-10 14:08:05 -0700278 @param use_shard: If true, suite is scheduled for shard board.
Shuqian Zhaod4864772015-08-06 09:46:22 -0700279 @param create_and_return: If True, run_suite just creates the suite, print
280 the job id, then finish immediately.
Dan Shi81ddc422016-09-09 13:58:31 -0700281 @param testbed_test: True to run testbed test. Default is False.
Jakob Juelich8f143912014-10-10 14:08:05 -0700282
Dan Shi7e04fa82013-07-25 15:08:48 -0700283 @return: Suite job ID.
284
285 """
Dan Shi81ddc422016-09-09 13:58:31 -0700286 if use_shard and not testbed_test:
Jakob Juelich8f143912014-10-10 14:08:05 -0700287 board = arguments.shard_board
288 build = arguments.shard_build
Dan Shi81ddc422016-09-09 13:58:31 -0700289 elif testbed_test:
290 board = arguments.android_board
291 build = arguments.android_build
292 else:
293 board = arguments.board
294 build = arguments.build
Jakob Juelich8f143912014-10-10 14:08:05 -0700295
Dan Shi47d32882014-12-22 16:25:05 -0800296 # Remove cros-version label to force provision.
Shuqian Zhao7a49f1b2016-10-24 16:48:04 -0700297 hosts = AFE.get_hosts(label=constants.Labels.BOARD_PREFIX+board,
298 locked=False)
Dan Shi47d32882014-12-22 16:25:05 -0800299 for host in hosts:
Dan Shi81ddc422016-09-09 13:58:31 -0700300 labels_to_remove = [
301 l for l in host.labels
302 if (l.startswith(provision.CROS_VERSION_PREFIX) or
303 l.startswith(provision.TESTBED_BUILD_VERSION_PREFIX))]
304 if labels_to_remove:
305 AFE.run('host_remove_labels', id=host.id, labels=labels_to_remove)
Dan Shi47d32882014-12-22 16:25:05 -0800306
Shuqian Zhaod01fad02016-11-18 10:00:22 -0800307 # Test repair work flow on shards, powerwash test will timeout after 7m.
Kevin Cheng6e4c2642015-12-11 09:45:57 -0800308 if use_shard and not create_and_return:
Shuqian Zhaod01fad02016-11-18 10:00:22 -0800309 powerwash_dut_to_test_repair(host.hostname, timeout=420)
Kevin Cheng6e4c2642015-12-11 09:45:57 -0800310
Dan Shief1a5c02015-04-07 17:37:09 -0700311 current_dir = os.path.dirname(os.path.realpath(__file__))
312 cmd = [os.path.join(current_dir, RUN_SUITE_COMMAND),
Dan Shi7e04fa82013-07-25 15:08:48 -0700313 '-s', suite_name,
Jakob Juelich8f143912014-10-10 14:08:05 -0700314 '-b', board,
315 '-i', build,
Dan Shi7e04fa82013-07-25 15:08:48 -0700316 '-p', arguments.pool,
Shuqian Zhao178ac012016-06-03 15:08:52 -0700317 '-u', str(arguments.num)]
Shuqian Zhaod4864772015-08-06 09:46:22 -0700318 if create_and_return:
319 cmd += ['-c']
Dan Shi81ddc422016-09-09 13:58:31 -0700320 if testbed_test:
321 cmd += ['--run_prod_code']
Dan Shi7e04fa82013-07-25 15:08:48 -0700322
323 suite_job_id = None
Dan Shi7e04fa82013-07-25 15:08:48 -0700324
325 proc = subprocess.Popen(cmd, stdout=subprocess.PIPE,
326 stderr=subprocess.STDOUT)
327
328 while True:
329 line = proc.stdout.readline()
330
331 # Break when run_suite process completed.
332 if not line and proc.poll() != None:
333 break
334 print line.rstrip()
335 run_suite_output.append(line.rstrip())
336
337 if not suite_job_id:
338 m = re.match(SUITE_JOB_START_INFO_REGEX, line)
339 if m and m.group(1):
340 suite_job_id = int(m.group(1))
Shuqian Zhao1b4ca272016-09-18 14:58:19 -0700341 all_suite_ids.append(suite_job_id)
Dan Shi7e04fa82013-07-25 15:08:48 -0700342
343 if not suite_job_id:
344 raise TestPushException('Failed to retrieve suite job ID.')
Dan Shia8da7602014-05-09 15:18:15 -0700345
Shuqian Zhaod4864772015-08-06 09:46:22 -0700346 # If create_and_return specified, wait for the suite to finish.
347 if create_and_return:
348 end = time.time() + arguments.timeout_min * 60
Dan Shiefd403e2016-02-03 11:37:02 -0800349 while not AFE.get_jobs(id=suite_job_id, finished=True):
Shuqian Zhaod4864772015-08-06 09:46:22 -0700350 if time.time() < end:
351 time.sleep(10)
352 else:
Dan Shiefd403e2016-02-03 11:37:02 -0800353 AFE.run('abort_host_queue_entries', job=suite_job_id)
Shuqian Zhaod4864772015-08-06 09:46:22 -0700354 raise TestPushException(
355 'Asynchronous suite triggered by create_and_return '
356 'flag has timed out after %d mins. Aborting it.' %
357 arguments.timeout_min)
358
Dan Shia8da7602014-05-09 15:18:15 -0700359 print 'Suite job %s is completed.' % suite_job_id
Dan Shi7e04fa82013-07-25 15:08:48 -0700360 return suite_job_id
361
362
Dan Shia8da7602014-05-09 15:18:15 -0700363def check_dut_image(build, suite_job_id):
364 """Confirm all DUTs used for the suite are imaged to expected build.
365
366 @param build: Expected build to be imaged.
367 @param suite_job_id: job ID of the suite job.
368 @raise TestPushException: If a DUT does not have expected build imaged.
369 """
370 print 'Checking image installed in DUTs...'
371 job_ids = [job.id for job in
372 models.Job.objects.filter(parent_job_id=suite_job_id)]
373 hqes = [models.HostQueueEntry.objects.filter(job_id=job_id)[0]
374 for job_id in job_ids]
375 hostnames = set([hqe.host.hostname for hqe in hqes])
376 for hostname in hostnames:
Dan Shiefd403e2016-02-03 11:37:02 -0800377 found_build = site_utils.get_build_from_afe(hostname, AFE)
Dan Shia8da7602014-05-09 15:18:15 -0700378 if found_build != build:
379 raise TestPushException('DUT is not imaged properly. Host %s has '
380 'build %s, while build %s is expected.' %
381 (hostname, found_build, build))
382
383
Shuqian Zhaod4864772015-08-06 09:46:22 -0700384def test_suite(suite_name, expected_results, arguments, use_shard=False,
Dan Shi81ddc422016-09-09 13:58:31 -0700385 create_and_return=False, testbed_test=False):
Dan Shi7e04fa82013-07-25 15:08:48 -0700386 """Call run_suite to start a suite job and verify results.
387
388 @param suite_name: Name of a suite, e.g., dummy
389 @param expected_results: A dictionary of test name to test result.
390 @param arguments: Arguments for run_suite command.
Jakob Juelich8f143912014-10-10 14:08:05 -0700391 @param use_shard: If true, suite is scheduled for shard board.
Shuqian Zhaod4864772015-08-06 09:46:22 -0700392 @param create_and_return: If True, run_suite just creates the suite, print
393 the job id, then finish immediately.
Dan Shi81ddc422016-09-09 13:58:31 -0700394 @param testbed_test: True to run testbed test. Default is False.
Dan Shi7e04fa82013-07-25 15:08:48 -0700395 """
Shuqian Zhaod4864772015-08-06 09:46:22 -0700396 suite_job_id = do_run_suite(suite_name, arguments, use_shard,
Dan Shi81ddc422016-09-09 13:58:31 -0700397 create_and_return, testbed_test)
Dan Shi7e04fa82013-07-25 15:08:48 -0700398
Dan Shia8da7602014-05-09 15:18:15 -0700399 # Confirm all DUTs used for the suite are imaged to expected build.
Jakob Juelich8f143912014-10-10 14:08:05 -0700400 # hqe.host_id for jobs running in shard is not synced back to master db,
401 # therefore, skip verifying dut build for jobs running in shard.
Dan Shi81ddc422016-09-09 13:58:31 -0700402 build_expected = (arguments.android_build if testbed_test
403 else arguments.build)
404 if suite_name != AU_SUITE and not use_shard and not testbed_test:
405 check_dut_image(build_expected, suite_job_id)
Dan Shia8da7602014-05-09 15:18:15 -0700406
Shuqian Zhao327b6952016-09-12 10:42:03 -0700407 # Verify test results are the expected results.
408 verify_test_results(suite_job_id, expected_results)
409
410
411def verify_test_results(job_id, expected_results):
412 """Verify the test results with the expected results.
413
414 @param job_id: id of the running jobs. For suite job, it is suite_job_id.
415 @param expected_results: A dictionary of test name to test result.
416 @raise TestPushException: If verify fails.
417 """
Dan Shia8da7602014-05-09 15:18:15 -0700418 print 'Comparing test results...'
Shuqian Zhao327b6952016-09-12 10:42:03 -0700419 test_views = site_utils.get_test_views_from_tko(job_id, TKO)
Dan Shi7e04fa82013-07-25 15:08:48 -0700420
421 mismatch_errors = []
422 extra_test_errors = []
423
424 found_keys = set()
Shuqian Zhao327b6952016-09-12 10:42:03 -0700425 for test_name, test_status in test_views.items():
Dan Shi7e04fa82013-07-25 15:08:48 -0700426 print "%s%s" % (test_name.ljust(30), test_status)
Dan Shi80b6ec02016-07-21 15:49:18 -0700427 # platform_InstallTestImage test may exist in old builds.
428 if re.search('platform_InstallTestImage_SERVER_JOB$', test_name):
429 continue
Dan Shi7e04fa82013-07-25 15:08:48 -0700430 test_found = False
431 for key,val in expected_results.items():
432 if re.search(key, test_name):
433 test_found = True
434 found_keys.add(key)
Dan Shi7e04fa82013-07-25 15:08:48 -0700435 if val != test_status:
436 error = ('%s Expected: [%s], Actual: [%s]' %
437 (test_name, val, test_status))
438 mismatch_errors.append(error)
439 if not test_found:
440 extra_test_errors.append(test_name)
441
442 missing_test_errors = set(expected_results.keys()) - found_keys
Dan Shidc9eb172014-12-09 16:05:02 -0800443 for exception in IGNORE_MISSING_TESTS:
444 try:
445 missing_test_errors.remove(exception)
446 except KeyError:
447 pass
448
Dan Shi7e04fa82013-07-25 15:08:48 -0700449 summary = []
450 if mismatch_errors:
451 summary.append(('Results of %d test(s) do not match expected '
452 'values:') % len(mismatch_errors))
453 summary.extend(mismatch_errors)
454 summary.append('\n')
455
456 if extra_test_errors:
457 summary.append('%d test(s) are not expected to be run:' %
458 len(extra_test_errors))
459 summary.extend(extra_test_errors)
460 summary.append('\n')
461
462 if missing_test_errors:
463 summary.append('%d test(s) are missing from the results:' %
464 len(missing_test_errors))
465 summary.extend(missing_test_errors)
466 summary.append('\n')
467
468 # Test link to log can be loaded.
Shuqian Zhao327b6952016-09-12 10:42:03 -0700469 job_name = '%s-%s' % (job_id, getpass.getuser())
Dan Shi7e04fa82013-07-25 15:08:48 -0700470 log_link = URL_PATTERN % (URL_HOST, job_name)
471 try:
472 urllib2.urlopen(log_link).read()
473 except urllib2.URLError:
474 summary.append('Failed to load page for link to log: %s.' % log_link)
475
476 if summary:
477 raise TestPushException('\n'.join(summary))
478
479
Dan Shief1a5c02015-04-07 17:37:09 -0700480def test_suite_wrapper(queue, suite_name, expected_results, arguments,
Dan Shi81ddc422016-09-09 13:58:31 -0700481 use_shard=False, create_and_return=False,
482 testbed_test=False):
Dan Shief1a5c02015-04-07 17:37:09 -0700483 """Wrapper to call test_suite. Handle exception and pipe it to parent
484 process.
485
486 @param queue: Queue to save exception to be accessed by parent process.
487 @param suite_name: Name of a suite, e.g., dummy
488 @param expected_results: A dictionary of test name to test result.
489 @param arguments: Arguments for run_suite command.
490 @param use_shard: If true, suite is scheduled for shard board.
Shuqian Zhaod4864772015-08-06 09:46:22 -0700491 @param create_and_return: If True, run_suite just creates the suite, print
492 the job id, then finish immediately.
Dan Shi81ddc422016-09-09 13:58:31 -0700493 @param testbed_test: True to run testbed test. Default is False.
Dan Shief1a5c02015-04-07 17:37:09 -0700494 """
495 try:
Shuqian Zhaod4864772015-08-06 09:46:22 -0700496 test_suite(suite_name, expected_results, arguments, use_shard,
Dan Shi81ddc422016-09-09 13:58:31 -0700497 create_and_return, testbed_test)
Dan Shief1a5c02015-04-07 17:37:09 -0700498 except:
499 # Store the whole exc_info leads to a PicklingError.
500 except_type, except_value, tb = sys.exc_info()
501 queue.put((except_type, except_value, traceback.extract_tb(tb)))
502
503
Dan Shief1a5c02015-04-07 17:37:09 -0700504def check_queue(queue):
505 """Check the queue for any exception being raised.
506
507 @param queue: Queue used to store exception for parent process to access.
508 @raise: Any exception found in the queue.
509 """
510 if queue.empty():
511 return
512 exc_info = queue.get()
513 # Raise the exception with original backtrace.
514 print 'Original stack trace of the exception:\n%s' % exc_info[2]
515 raise exc_info[0](exc_info[1])
516
517
Shuqian Zhao7b2daea2016-10-25 13:31:06 -0700518def get_head_of_repos(repos):
519 """Get HEAD of updated repos, currently are autotest and chromite repos
520
521 @param repos: a map of repo name to the path of the repo. E.g.
522 {'autotest': '/usr/local/autotest'}
523 @return: a map of repo names to the current HEAD of that repo.
524 """
525 @contextmanager
526 def cd(new_wd):
527 """Helper function to change working directory.
528
529 @param new_wd: new working directory that switch to.
530 """
531 prev_wd = os.getcwd()
532 os.chdir(os.path.expanduser(new_wd))
533 try:
534 yield
535 finally:
536 os.chdir(prev_wd)
537
538 updated_repo_heads = {}
539 for repo_name, path_to_repo in repos.iteritems():
540 with cd(path_to_repo):
541 head = subprocess.check_output('git rev-parse HEAD',
542 shell=True).strip()
543 updated_repo_heads[repo_name] = head
544 return updated_repo_heads
545
546
Shuqian Zhao80d32712016-11-11 16:37:36 -0800547def push_prod_next_branch(updated_repo_heads):
548 """push prod-next branch to the tested HEAD after all tests pass.
549
550 The push command must be ran as PUSH_USER, since only PUSH_USER has the
551 right to push branches.
552
553 @param updated_repo_heads: a map of repo names to tested HEAD of that repo.
554 """
555 # prod-next branch for every repo is downloaded under PUSH_USER home dir.
556 cmd = 'cd ~/{repo}; git rebase {hash} prod-next; git push origin prod-next'
557 run_push_as_push_user = "sudo su - %s -c '%s'" % (PUSH_USER, cmd)
558
559 for repo_name, test_hash in updated_repo_heads.iteritems():
560 push_cmd = run_push_as_push_user.format(hash=test_hash, repo=repo_name)
561 print 'Pushing %s prod-next branch to %s' % (repo_name, test_hash)
562 print subprocess.check_output(push_cmd, stderr=subprocess.STDOUT,
563 shell=True)
564
565
Dan Shi7e04fa82013-07-25 15:08:48 -0700566def main():
567 """Entry point for test_push script."""
568 arguments = parse_arguments()
Shuqian Zhao80d32712016-11-11 16:37:36 -0800569 updated_repo_heads = get_head_of_repos(UPDATED_REPOS)
570 updated_repo_msg = '\n'.join(
571 ['%s: %s' % (k, v) for k, v in updated_repo_heads.iteritems()])
Dan Shi7e04fa82013-07-25 15:08:48 -0700572
573 try:
Shuqian Zhao676ed6f2016-09-21 14:20:50 -0700574 # Use daemon flag will kill child processes when parent process fails.
575 use_daemon = not arguments.continue_on_failure
Shuqian Zhao1f311c02016-09-01 19:30:54 -0700576 check_dut_inventory(arguments.num_duts)
Dan Shief1a5c02015-04-07 17:37:09 -0700577 queue = multiprocessing.Queue()
578
579 push_to_prod_suite = multiprocessing.Process(
580 target=test_suite_wrapper,
581 args=(queue, PUSH_TO_PROD_SUITE, EXPECTED_TEST_RESULTS,
582 arguments))
Shuqian Zhao676ed6f2016-09-21 14:20:50 -0700583 push_to_prod_suite.daemon = use_daemon
Dan Shief1a5c02015-04-07 17:37:09 -0700584 push_to_prod_suite.start()
Jakob Juelich8f143912014-10-10 14:08:05 -0700585
Dan Shi7e04fa82013-07-25 15:08:48 -0700586 # TODO(dshi): Remove following line after crbug.com/267644 is fixed.
587 # Also, merge EXPECTED_TEST_RESULTS_AU to EXPECTED_TEST_RESULTS
Shuqian Zhao7de04282016-09-23 09:14:13 -0700588 # AU suite will be on shard until crbug.com/634049 is fixed.
Dan Shief1a5c02015-04-07 17:37:09 -0700589 au_suite = multiprocessing.Process(
590 target=test_suite_wrapper,
591 args=(queue, AU_SUITE, EXPECTED_TEST_RESULTS_AU,
Shuqian Zhao7de04282016-09-23 09:14:13 -0700592 arguments, True))
Shuqian Zhao676ed6f2016-09-21 14:20:50 -0700593 au_suite.daemon = use_daemon
Dan Shief1a5c02015-04-07 17:37:09 -0700594 au_suite.start()
595
Shuqian Zhaod4864772015-08-06 09:46:22 -0700596 # suite test with --create_and_return flag
597 asynchronous_suite = multiprocessing.Process(
598 target=test_suite_wrapper,
599 args=(queue, DUMMY_SUITE, EXPECTED_TEST_RESULTS_DUMMY,
Shuqian Zhao7de04282016-09-23 09:14:13 -0700600 arguments, False, True))
Shuqian Zhao1b4ca272016-09-18 14:58:19 -0700601 asynchronous_suite.daemon = True
Shuqian Zhaod4864772015-08-06 09:46:22 -0700602 asynchronous_suite.start()
603
Dan Shi81ddc422016-09-09 13:58:31 -0700604 # Test suite for testbed
605 testbed_suite = multiprocessing.Process(
606 target=test_suite_wrapper,
607 args=(queue, TESTBED_SUITE, EXPECTED_TEST_RESULTS_TESTBED,
608 arguments, False, False, True))
Shuqian Zhao676ed6f2016-09-21 14:20:50 -0700609 testbed_suite.daemon = use_daemon
Dan Shi81ddc422016-09-09 13:58:31 -0700610 testbed_suite.start()
611
Dan Shief1a5c02015-04-07 17:37:09 -0700612 while (push_to_prod_suite.is_alive() or au_suite.is_alive() or
Shuqian Zhao7de04282016-09-23 09:14:13 -0700613 asynchronous_suite.is_alive() or testbed_suite.is_alive()):
Dan Shief1a5c02015-04-07 17:37:09 -0700614 check_queue(queue)
Dan Shief1a5c02015-04-07 17:37:09 -0700615 time.sleep(5)
616
617 check_queue(queue)
618
619 push_to_prod_suite.join()
620 au_suite.join()
Shuqian Zhaod4864772015-08-06 09:46:22 -0700621 asynchronous_suite.join()
Dan Shi81ddc422016-09-09 13:58:31 -0700622 testbed_suite.join()
Shuqian Zhao80d32712016-11-11 16:37:36 -0800623
624 # All tests pass, push prod-next branch for UPDATED_REPOS.
Shuqian Zhaod57ec7d2016-11-18 12:43:54 -0800625 #push_prod_next_branch(updated_repo_heads)
Dan Shi7e04fa82013-07-25 15:08:48 -0700626 except Exception as e:
627 print 'Test for pushing to prod failed:\n'
628 print str(e)
Shuqian Zhao676ed6f2016-09-21 14:20:50 -0700629 # Abort running jobs when choose not to continue when there is failure.
630 if not arguments.continue_on_failure:
631 for suite_id in all_suite_ids:
632 if AFE.get_jobs(id=suite_id, finished=False):
633 AFE.run('abort_host_queue_entries', job=suite_id)
Dan Shi7e04fa82013-07-25 15:08:48 -0700634 # Send out email about the test failure.
635 if arguments.email:
Dan Shi5fa602c2015-03-26 17:54:13 -0700636 gmail_lib.send_email(
637 arguments.email,
638 'Test for pushing to prod failed. Do NOT push!',
Shuqian Zhao7b2daea2016-10-25 13:31:06 -0700639 ('Test CLs of the following repos failed. Below are the '
640 'repos and the corresponding test HEAD.\n\n%s\n\n.'
641 'Error occurred during test:\n\n%s\n\n' %
642 (updated_repo_msg, str(e)) + '\n'.join(run_suite_output)))
Dan Shi7e04fa82013-07-25 15:08:48 -0700643 raise
Shuqian Zhaod2a99f02016-09-22 13:31:30 -0700644 finally:
645 # Reverify all the hosts
646 reverify_all_push_duts(arguments.pool)
Dan Shi7e04fa82013-07-25 15:08:48 -0700647
Shuqian Zhao7b2daea2016-10-25 13:31:06 -0700648 message = ('\nAll tests are completed successfully, the prod branch of the '
649 'following repos ready to be pushed to the hash list below.\n'
650 '%s' % updated_repo_msg)
Dan Shi7e04fa82013-07-25 15:08:48 -0700651 print message
652 # Send out email about test completed successfully.
653 if arguments.email:
Dan Shi5fa602c2015-03-26 17:54:13 -0700654 gmail_lib.send_email(
655 arguments.email,
656 'Test for pushing to prod completed successfully',
657 message)
Dan Shi7e04fa82013-07-25 15:08:48 -0700658
659
660if __name__ == '__main__':
661 sys.exit(main())