Dan Shi | 7e04fa8 | 2013-07-25 15:08:48 -0700 | [diff] [blame] | 1 | #!/usr/bin/python |
| 2 | # |
| 3 | # Copyright (c) 2013 The Chromium OS Authors. All rights reserved. |
| 4 | # Use of this source code is governed by a BSD-style license that can be |
| 5 | # found in the LICENSE file. |
| 6 | |
| 7 | """Tool to validate code in prod branch before pushing to lab. |
| 8 | |
| 9 | The script runs push_to_prod suite to verify code in prod branch is ready to be |
| 10 | pushed. Link to design document: |
| 11 | https://docs.google.com/a/google.com/document/d/1JMz0xS3fZRSHMpFkkKAL_rxsdbNZomhHbC3B8L71uuI/edit |
| 12 | |
| 13 | To verify if prod branch can be pushed to lab, run following command in |
Shuqian Zhao | bb030ff | 2017-09-21 17:36:13 -0700 | [diff] [blame] | 14 | chromeos-staging-master2.hot server: |
Michael Liang | 52d9f1f | 2014-06-17 15:01:24 -0700 | [diff] [blame] | 15 | /usr/local/autotest/site_utils/test_push.py -e someone@company.com |
Dan Shi | 7e04fa8 | 2013-07-25 15:08:48 -0700 | [diff] [blame] | 16 | |
Shuqian Zhao | f3a114c | 2016-09-21 11:02:15 -0700 | [diff] [blame] | 17 | The script uses latest gandof stable build as test build by default. |
Dan Shi | 7e04fa8 | 2013-07-25 15:08:48 -0700 | [diff] [blame] | 18 | |
| 19 | """ |
| 20 | |
| 21 | import argparse |
Shuqian Zhao | 1f311c0 | 2016-09-01 19:30:54 -0700 | [diff] [blame] | 22 | import ast |
Shuqian Zhao | 7b2daea | 2016-10-25 13:31:06 -0700 | [diff] [blame] | 23 | from contextlib import contextmanager |
Shuqian Zhao | 0de876d | 2018-01-31 11:53:34 -0800 | [diff] [blame] | 24 | import datetime |
Dan Shi | 7e04fa8 | 2013-07-25 15:08:48 -0700 | [diff] [blame] | 25 | import getpass |
Dan Shi | ef1a5c0 | 2015-04-07 17:37:09 -0700 | [diff] [blame] | 26 | import multiprocessing |
Dan Shi | 7e04fa8 | 2013-07-25 15:08:48 -0700 | [diff] [blame] | 27 | import os |
| 28 | import re |
| 29 | import subprocess |
| 30 | import sys |
Dan Shi | ef1a5c0 | 2015-04-07 17:37:09 -0700 | [diff] [blame] | 31 | import time |
| 32 | import traceback |
Dan Shi | 7e04fa8 | 2013-07-25 15:08:48 -0700 | [diff] [blame] | 33 | import urllib2 |
| 34 | |
| 35 | import common |
Dan Shi | a8da760 | 2014-05-09 15:18:15 -0700 | [diff] [blame] | 36 | try: |
| 37 | from autotest_lib.frontend import setup_django_environment |
| 38 | from autotest_lib.frontend.afe import models |
Shuqian Zhao | 327b695 | 2016-09-12 10:42:03 -0700 | [diff] [blame] | 39 | from autotest_lib.frontend.afe import rpc_utils |
Dan Shi | a8da760 | 2014-05-09 15:18:15 -0700 | [diff] [blame] | 40 | except ImportError: |
| 41 | # Unittest may not have Django database configured and will fail to import. |
| 42 | pass |
Dan Shi | 5fa602c | 2015-03-26 17:54:13 -0700 | [diff] [blame] | 43 | from autotest_lib.client.common_lib import global_config |
Shuqian Zhao | 327b695 | 2016-09-12 10:42:03 -0700 | [diff] [blame] | 44 | from autotest_lib.client.common_lib import priorities |
Shuqian Zhao | f239b31 | 2017-12-05 16:45:02 -0800 | [diff] [blame] | 45 | from autotest_lib.client.common_lib.cros import retry |
Prathmesh Prabhu | cd246f5 | 2018-01-03 13:45:48 -0800 | [diff] [blame] | 46 | from autotest_lib.frontend.afe import rpc_client_lib |
Xixuan Wu | 93e646c | 2017-12-07 18:36:10 -0800 | [diff] [blame] | 47 | from autotest_lib.server import constants |
Dan Shi | 7e04fa8 | 2013-07-25 15:08:48 -0700 | [diff] [blame] | 48 | from autotest_lib.server import site_utils |
Shuqian Zhao | 327b695 | 2016-09-12 10:42:03 -0700 | [diff] [blame] | 49 | from autotest_lib.server import utils |
Dan Shi | 47d3288 | 2014-12-22 16:25:05 -0800 | [diff] [blame] | 50 | from autotest_lib.server.cros import provision |
Dan Shi | 7e04fa8 | 2013-07-25 15:08:48 -0700 | [diff] [blame] | 51 | from autotest_lib.server.cros.dynamic_suite import frontend_wrappers |
Dan Shi | 7e04fa8 | 2013-07-25 15:08:48 -0700 | [diff] [blame] | 52 | |
Shuqian Zhao | 5696954 | 2017-05-30 12:56:57 -0700 | [diff] [blame] | 53 | try: |
| 54 | from chromite.lib import metrics |
| 55 | from chromite.lib import ts_mon_config |
| 56 | except ImportError: |
| 57 | metrics = site_utils.metrics_mock |
| 58 | ts_mon_config = site_utils.metrics_mock |
| 59 | |
Shuqian Zhao | 7b2daea | 2016-10-25 13:31:06 -0700 | [diff] [blame] | 60 | AUTOTEST_DIR=common.autotest_dir |
Dan Shi | 7e04fa8 | 2013-07-25 15:08:48 -0700 | [diff] [blame] | 61 | CONFIG = global_config.global_config |
| 62 | |
Dan Shi | efd403e | 2016-02-03 11:37:02 -0800 | [diff] [blame] | 63 | AFE = frontend_wrappers.RetryingAFE(timeout_min=0.5, delay_sec=2) |
Shuqian Zhao | 327b695 | 2016-09-12 10:42:03 -0700 | [diff] [blame] | 64 | TKO = frontend_wrappers.RetryingTKO(timeout_min=0.1, delay_sec=10) |
Dan Shi | efd403e | 2016-02-03 11:37:02 -0800 | [diff] [blame] | 65 | |
Dan Shi | 7e04fa8 | 2013-07-25 15:08:48 -0700 | [diff] [blame] | 66 | MAIL_FROM = 'chromeos-test@google.com' |
Shuqian Zhao | 1286166 | 2016-08-31 19:23:17 -0700 | [diff] [blame] | 67 | BUILD_REGEX = 'R[\d]+-[\d]+\.[\d]+\.[\d]+' |
Dan Shi | 7e04fa8 | 2013-07-25 15:08:48 -0700 | [diff] [blame] | 68 | RUN_SUITE_COMMAND = 'run_suite.py' |
| 69 | PUSH_TO_PROD_SUITE = 'push_to_prod' |
Jakob Juelich | 8f14391 | 2014-10-10 14:08:05 -0700 | [diff] [blame] | 70 | DUMMY_SUITE = 'dummy' |
xixuan | 2d66858 | 2016-06-10 14:02:32 -0700 | [diff] [blame] | 71 | DEFAULT_TIMEOUT_MIN_FOR_SUITE_JOB = 30 |
Shuqian Zhao | 1286166 | 2016-08-31 19:23:17 -0700 | [diff] [blame] | 72 | IMAGE_BUCKET = CONFIG.get_config_value('CROS', 'image_storage_server') |
Allen Li | 64edf06 | 2017-11-27 15:33:54 -0800 | [diff] [blame] | 73 | DEFAULT_NUM_DUTS = ( |
| 74 | ('gandof', 4), |
| 75 | ('quawks', 2), |
Allen Li | 64edf06 | 2017-11-27 15:33:54 -0800 | [diff] [blame] | 76 | ) |
Dan Shi | 7e04fa8 | 2013-07-25 15:08:48 -0700 | [diff] [blame] | 77 | |
Fang Deng | 6dddf60 | 2014-04-17 17:01:47 -0700 | [diff] [blame] | 78 | SUITE_JOB_START_INFO_REGEX = ('^.*Created suite job:.*' |
| 79 | 'tab_id=view_job&object_id=(\d+)$') |
Dan Shi | 7e04fa8 | 2013-07-25 15:08:48 -0700 | [diff] [blame] | 80 | |
| 81 | # Dictionary of test results keyed by test name regular expression. |
| 82 | EXPECTED_TEST_RESULTS = {'^SERVER_JOB$': 'GOOD', |
| 83 | # This is related to dummy_Fail/control.dependency. |
| 84 | 'dummy_Fail.dependency$': 'TEST_NA', |
Dan Shi | dc9eb17 | 2014-12-09 16:05:02 -0800 | [diff] [blame] | 85 | 'login_LoginSuccess.*': 'GOOD', |
Dan Shi | 47d3288 | 2014-12-22 16:25:05 -0800 | [diff] [blame] | 86 | 'provision_AutoUpdate.double': 'GOOD', |
Dan Shi | 7e04fa8 | 2013-07-25 15:08:48 -0700 | [diff] [blame] | 87 | 'dummy_Pass.*': 'GOOD', |
| 88 | 'dummy_Fail.Fail$': 'FAIL', |
| 89 | 'dummy_Fail.RetryFail$': 'FAIL', |
| 90 | 'dummy_Fail.RetrySuccess': 'GOOD', |
| 91 | 'dummy_Fail.Error$': 'ERROR', |
| 92 | 'dummy_Fail.Warn$': 'WARN', |
| 93 | 'dummy_Fail.NAError$': 'TEST_NA', |
| 94 | 'dummy_Fail.Crash$': 'GOOD', |
Aviv Keshet | ff024f9 | 2017-09-26 13:43:14 -0700 | [diff] [blame] | 95 | 'autotest_SyncCount$': 'GOOD', |
Dan Shi | 7e04fa8 | 2013-07-25 15:08:48 -0700 | [diff] [blame] | 96 | } |
| 97 | |
Jakob Juelich | 8f14391 | 2014-10-10 14:08:05 -0700 | [diff] [blame] | 98 | EXPECTED_TEST_RESULTS_DUMMY = {'^SERVER_JOB$': 'GOOD', |
| 99 | 'dummy_Pass.*': 'GOOD', |
| 100 | 'dummy_Fail.Fail': 'FAIL', |
| 101 | 'dummy_Fail.Warn': 'WARN', |
| 102 | 'dummy_Fail.Crash': 'GOOD', |
| 103 | 'dummy_Fail.Error': 'ERROR', |
| 104 | 'dummy_Fail.NAError': 'TEST_NA',} |
| 105 | |
Shuqian Zhao | 327b695 | 2016-09-12 10:42:03 -0700 | [diff] [blame] | 106 | EXPECTED_TEST_RESULTS_POWERWASH = {'platform_Powerwash': 'GOOD', |
| 107 | 'SERVER_JOB': 'GOOD'} |
| 108 | |
Dan Shi | 7e04fa8 | 2013-07-25 15:08:48 -0700 | [diff] [blame] | 109 | URL_HOST = CONFIG.get_config_value('SERVER', 'hostname', type=str) |
| 110 | URL_PATTERN = CONFIG.get_config_value('CROS', 'log_url_pattern', type=str) |
| 111 | |
Dan Shi | dc9eb17 | 2014-12-09 16:05:02 -0800 | [diff] [blame] | 112 | # Some test could be missing from the test results for various reasons. Add |
| 113 | # such test in this list and explain the reason. |
| 114 | IGNORE_MISSING_TESTS = [ |
| 115 | # For latest build, npo_test_delta does not exist. |
| 116 | 'autoupdate_EndToEndTest.npo_test_delta.*', |
| 117 | # For trybot build, nmo_test_delta does not exist. |
| 118 | 'autoupdate_EndToEndTest.nmo_test_delta.*', |
| 119 | # Older build does not have login_LoginSuccess test in push_to_prod suite. |
| 120 | # TODO(dshi): Remove following lines after R41 is stable. |
| 121 | 'login_LoginSuccess'] |
| 122 | |
Aviv Keshet | 0d679eb | 2017-11-08 13:25:01 -0800 | [diff] [blame] | 123 | # Multiprocessing proxy objects that are used to share data between background |
| 124 | # suite-running processes and main process. The multiprocessing-compatible |
| 125 | # versions are initialized in _main. |
| 126 | _run_suite_output = [] |
| 127 | _all_suite_ids = [] |
| 128 | |
Shuqian Zhao | 7b2daea | 2016-10-25 13:31:06 -0700 | [diff] [blame] | 129 | # A dict maps the name of the updated repos and the path of them. |
| 130 | UPDATED_REPOS = {'autotest': AUTOTEST_DIR, |
| 131 | 'chromite': '%s/site-packages/chromite/' % AUTOTEST_DIR} |
Shuqian Zhao | 80d3271 | 2016-11-11 16:37:36 -0800 | [diff] [blame] | 132 | PUSH_USER = 'chromeos-test-lab' |
Dan Shi | 7e04fa8 | 2013-07-25 15:08:48 -0700 | [diff] [blame] | 133 | |
Shuqian Zhao | 0de876d | 2018-01-31 11:53:34 -0800 | [diff] [blame] | 134 | DEFAULT_SERVICE_RESPAWN_LIMIT = 2 |
| 135 | |
| 136 | |
Dan Shi | 7e04fa8 | 2013-07-25 15:08:48 -0700 | [diff] [blame] | 137 | class TestPushException(Exception): |
| 138 | """Exception to be raised when the test to push to prod failed.""" |
| 139 | pass |
| 140 | |
Shuqian Zhao | f239b31 | 2017-12-05 16:45:02 -0800 | [diff] [blame] | 141 | @retry.retry(TestPushException, timeout_min=5, delay_sec=30) |
| 142 | def check_dut_inventory(required_num_duts, pool): |
| 143 | """Check DUT inventory for each board in the pool specified.. |
| 144 | |
| 145 | @param required_num_duts: a dict specifying the number of DUT each platform |
| 146 | requires in order to finish push tests. |
| 147 | @param pool: the pool used by test_push. |
| 148 | @raise TestPushException: if number of DUTs are less than the requirement. |
| 149 | """ |
| 150 | print 'Checking DUT inventory...' |
| 151 | pool_label = constants.Labels.POOL_PREFIX + pool |
| 152 | hosts = AFE.run('get_hosts', status='Ready', locked=False) |
| 153 | hosts = [h for h in hosts if pool_label in h.get('labels', [])] |
| 154 | platforms = [host['platform'] for host in hosts] |
| 155 | current_inventory = {p : platforms.count(p) for p in platforms} |
| 156 | error_msg = '' |
| 157 | for platform, req_num in required_num_duts.items(): |
| 158 | curr_num = current_inventory.get(platform, 0) |
| 159 | if curr_num < req_num: |
| 160 | error_msg += ('\nRequire %d %s DUTs in pool: %s, only %d are Ready' |
| 161 | ' now' % (req_num, platform, pool, curr_num)) |
| 162 | if error_msg: |
| 163 | raise TestPushException('Not enough DUTs to run push tests. %s' % |
| 164 | error_msg) |
| 165 | |
Dan Shi | 5ba5d2e | 2014-05-09 13:47:00 -0700 | [diff] [blame] | 166 | |
Shuqian Zhao | 327b695 | 2016-09-12 10:42:03 -0700 | [diff] [blame] | 167 | def powerwash_dut_to_test_repair(hostname, timeout): |
| 168 | """Powerwash dut to test repair workflow. |
Kevin Cheng | 6e4c264 | 2015-12-11 09:45:57 -0800 | [diff] [blame] | 169 | |
| 170 | @param hostname: hostname of the dut. |
Shuqian Zhao | 327b695 | 2016-09-12 10:42:03 -0700 | [diff] [blame] | 171 | @param timeout: seconds of the powerwash test to hit timeout. |
| 172 | @raise TestPushException: if DUT fail to run the test. |
Kevin Cheng | 6e4c264 | 2015-12-11 09:45:57 -0800 | [diff] [blame] | 173 | """ |
Shuqian Zhao | 327b695 | 2016-09-12 10:42:03 -0700 | [diff] [blame] | 174 | t = models.Test.objects.get(name='platform_Powerwash') |
| 175 | c = utils.read_file(os.path.join(common.autotest_dir, t.path)) |
| 176 | job_id = rpc_utils.create_job_common( |
| 177 | 'powerwash', priority=priorities.Priority.SUPER, |
| 178 | control_type='Server', control_file=c, hosts=[hostname]) |
| 179 | |
Shuqian Zhao | e83a78c | 2016-09-16 15:01:25 -0700 | [diff] [blame] | 180 | end = time.time() + timeout |
Shuqian Zhao | 327b695 | 2016-09-12 10:42:03 -0700 | [diff] [blame] | 181 | while not TKO.get_job_test_statuses_from_db(job_id): |
Shuqian Zhao | e83a78c | 2016-09-16 15:01:25 -0700 | [diff] [blame] | 182 | if time.time() >= end: |
| 183 | AFE.run('abort_host_queue_entries', job=job_id) |
Shuqian Zhao | 327b695 | 2016-09-12 10:42:03 -0700 | [diff] [blame] | 184 | raise TestPushException( |
Shuqian Zhao | e83a78c | 2016-09-16 15:01:25 -0700 | [diff] [blame] | 185 | 'Powerwash test on %s timeout after %ds, abort it.' % |
| 186 | (hostname, timeout)) |
Shuqian Zhao | 327b695 | 2016-09-12 10:42:03 -0700 | [diff] [blame] | 187 | time.sleep(10) |
| 188 | verify_test_results(job_id, EXPECTED_TEST_RESULTS_POWERWASH) |
| 189 | # Kick off verify, verify will fail and a repair should be triggered. |
| 190 | AFE.reverify_hosts(hostnames=[hostname]) |
Kevin Cheng | 6e4c264 | 2015-12-11 09:45:57 -0800 | [diff] [blame] | 191 | |
| 192 | |
Shuqian Zhao | 06deae0 | 2017-02-28 09:55:59 -0800 | [diff] [blame] | 193 | def reverify_all_push_duts(): |
| 194 | """Reverify all the push DUTs.""" |
| 195 | print 'Reverifying all DUTs.' |
| 196 | hosts = [h.hostname for h in AFE.get_hosts()] |
Shuqian Zhao | d2a99f0 | 2016-09-22 13:31:30 -0700 | [diff] [blame] | 197 | AFE.reverify_hosts(hostnames=hosts) |
| 198 | |
| 199 | |
Richard Barnette | 2af8221 | 2018-04-20 15:11:54 -0700 | [diff] [blame] | 200 | def parse_arguments(argv): |
Dan Shi | 7e04fa8 | 2013-07-25 15:08:48 -0700 | [diff] [blame] | 201 | """Parse arguments for test_push tool. |
| 202 | |
Richard Barnette | 2af8221 | 2018-04-20 15:11:54 -0700 | [diff] [blame] | 203 | @param argv Argument vector, as for `sys.argv`, including the |
| 204 | command name in `argv[0]`. |
Dan Shi | 7e04fa8 | 2013-07-25 15:08:48 -0700 | [diff] [blame] | 205 | @return: Parsed arguments. |
| 206 | |
| 207 | """ |
Richard Barnette | 2af8221 | 2018-04-20 15:11:54 -0700 | [diff] [blame] | 208 | parser = argparse.ArgumentParser(prog=argv[0]) |
Dan Shi | 8df9c00 | 2016-03-08 15:37:39 -0800 | [diff] [blame] | 209 | parser.add_argument('-b', '--board', dest='board', default='gandof', |
| 210 | help='Default is gandof.') |
Jakob Juelich | 8f14391 | 2014-10-10 14:08:05 -0700 | [diff] [blame] | 211 | parser.add_argument('-sb', '--shard_board', dest='shard_board', |
| 212 | default='quawks', |
| 213 | help='Default is quawks.') |
Dan Shi | 7e04fa8 | 2013-07-25 15:08:48 -0700 | [diff] [blame] | 214 | parser.add_argument('-i', '--build', dest='build', default=None, |
Shuqian Zhao | f3a114c | 2016-09-21 11:02:15 -0700 | [diff] [blame] | 215 | help='Default is the latest stale build of given ' |
| 216 | 'board. Must be a stable build, otherwise AU test ' |
| 217 | 'will fail. (ex: gandolf-release/R54-8743.25.0)') |
Jakob Juelich | 8f14391 | 2014-10-10 14:08:05 -0700 | [diff] [blame] | 218 | parser.add_argument('-si', '--shard_build', dest='shard_build', default=None, |
Shuqian Zhao | f3a114c | 2016-09-21 11:02:15 -0700 | [diff] [blame] | 219 | help='Default is the latest stable build of given ' |
| 220 | 'board. Must be a stable build, otherwise AU test ' |
Jakob Juelich | 8f14391 | 2014-10-10 14:08:05 -0700 | [diff] [blame] | 221 | 'will fail.') |
Dan Shi | 7e04fa8 | 2013-07-25 15:08:48 -0700 | [diff] [blame] | 222 | parser.add_argument('-p', '--pool', dest='pool', default='bvt') |
Shuqian Zhao | d486477 | 2015-08-06 09:46:22 -0700 | [diff] [blame] | 223 | parser.add_argument('-t', '--timeout_min', dest='timeout_min', type=int, |
xixuan | 2d66858 | 2016-06-10 14:02:32 -0700 | [diff] [blame] | 224 | default=DEFAULT_TIMEOUT_MIN_FOR_SUITE_JOB, |
Shuqian Zhao | d486477 | 2015-08-06 09:46:22 -0700 | [diff] [blame] | 225 | help='Time in mins to wait before abort the jobs we ' |
| 226 | 'are waiting on. Only for the asynchronous suites ' |
| 227 | 'triggered by create_and_return flag.') |
Shuqian Zhao | 1f311c0 | 2016-09-01 19:30:54 -0700 | [diff] [blame] | 228 | parser.add_argument('-ud', '--num_duts', dest='num_duts', |
Allen Li | 64edf06 | 2017-11-27 15:33:54 -0800 | [diff] [blame] | 229 | default=dict(DEFAULT_NUM_DUTS), |
| 230 | type=ast.literal_eval, |
| 231 | help="Python dict literal that specifies the required" |
| 232 | " number of DUTs for each board. E.g {'gandof':4}") |
Shuqian Zhao | 676ed6f | 2016-09-21 14:20:50 -0700 | [diff] [blame] | 233 | parser.add_argument('-c', '--continue_on_failure', action='store_true', |
| 234 | dest='continue_on_failure', |
| 235 | help='All tests continue to run when there is failure') |
Shuqian Zhao | 0de876d | 2018-01-31 11:53:34 -0800 | [diff] [blame] | 236 | parser.add_argument('-sl', '--service_respawn_limit', type=int, |
| 237 | default=DEFAULT_SERVICE_RESPAWN_LIMIT, |
| 238 | help='If a service crashes more than this, the test ' |
| 239 | 'push is considered failed.') |
Dan Shi | 7e04fa8 | 2013-07-25 15:08:48 -0700 | [diff] [blame] | 240 | |
Richard Barnette | 2af8221 | 2018-04-20 15:11:54 -0700 | [diff] [blame] | 241 | arguments = parser.parse_args(argv[1:]) |
Dan Shi | 7e04fa8 | 2013-07-25 15:08:48 -0700 | [diff] [blame] | 242 | |
Shuqian Zhao | f3a114c | 2016-09-21 11:02:15 -0700 | [diff] [blame] | 243 | # Get latest stable build as default build. |
Richard Barnette | 2af8221 | 2018-04-20 15:11:54 -0700 | [diff] [blame] | 244 | version_map = AFE.get_stable_version_map(AFE.CROS_IMAGE_TYPE) |
Dan Shi | 7e04fa8 | 2013-07-25 15:08:48 -0700 | [diff] [blame] | 245 | if not arguments.build: |
Richard Barnette | 2af8221 | 2018-04-20 15:11:54 -0700 | [diff] [blame] | 246 | arguments.build = version_map.get_image_name(arguments.board) |
Jakob Juelich | 8f14391 | 2014-10-10 14:08:05 -0700 | [diff] [blame] | 247 | if not arguments.shard_build: |
Richard Barnette | 2af8221 | 2018-04-20 15:11:54 -0700 | [diff] [blame] | 248 | arguments.shard_build = version_map.get_image_name( |
| 249 | arguments.shard_board) |
Dan Shi | 7e04fa8 | 2013-07-25 15:08:48 -0700 | [diff] [blame] | 250 | return arguments |
| 251 | |
| 252 | |
Shuqian Zhao | d486477 | 2015-08-06 09:46:22 -0700 | [diff] [blame] | 253 | def do_run_suite(suite_name, arguments, use_shard=False, |
Richard Barnette | b12413a | 2018-04-25 01:00:27 +0000 | [diff] [blame] | 254 | create_and_return=False): |
Dan Shi | 7e04fa8 | 2013-07-25 15:08:48 -0700 | [diff] [blame] | 255 | """Call run_suite to run a suite job, and return the suite job id. |
| 256 | |
| 257 | The script waits the suite job to finish before returning the suite job id. |
| 258 | Also it will echo the run_suite output to stdout. |
| 259 | |
| 260 | @param suite_name: Name of a suite, e.g., dummy. |
| 261 | @param arguments: Arguments for run_suite command. |
Jakob Juelich | 8f14391 | 2014-10-10 14:08:05 -0700 | [diff] [blame] | 262 | @param use_shard: If true, suite is scheduled for shard board. |
Shuqian Zhao | d486477 | 2015-08-06 09:46:22 -0700 | [diff] [blame] | 263 | @param create_and_return: If True, run_suite just creates the suite, print |
| 264 | the job id, then finish immediately. |
Jakob Juelich | 8f14391 | 2014-10-10 14:08:05 -0700 | [diff] [blame] | 265 | |
Dan Shi | 7e04fa8 | 2013-07-25 15:08:48 -0700 | [diff] [blame] | 266 | @return: Suite job ID. |
| 267 | |
| 268 | """ |
Richard Barnette | b12413a | 2018-04-25 01:00:27 +0000 | [diff] [blame] | 269 | if use_shard: |
Jakob Juelich | 8f14391 | 2014-10-10 14:08:05 -0700 | [diff] [blame] | 270 | board = arguments.shard_board |
| 271 | build = arguments.shard_build |
Dan Shi | 81ddc42 | 2016-09-09 13:58:31 -0700 | [diff] [blame] | 272 | else: |
| 273 | board = arguments.board |
| 274 | build = arguments.build |
Jakob Juelich | 8f14391 | 2014-10-10 14:08:05 -0700 | [diff] [blame] | 275 | |
Dan Shi | 47d3288 | 2014-12-22 16:25:05 -0800 | [diff] [blame] | 276 | # Remove cros-version label to force provision. |
Shuqian Zhao | 7a49f1b | 2016-10-24 16:48:04 -0700 | [diff] [blame] | 277 | hosts = AFE.get_hosts(label=constants.Labels.BOARD_PREFIX+board, |
| 278 | locked=False) |
Dan Shi | 47d3288 | 2014-12-22 16:25:05 -0800 | [diff] [blame] | 279 | for host in hosts: |
Dan Shi | 81ddc42 | 2016-09-09 13:58:31 -0700 | [diff] [blame] | 280 | labels_to_remove = [ |
| 281 | l for l in host.labels |
Richard Barnette | b12413a | 2018-04-25 01:00:27 +0000 | [diff] [blame] | 282 | if l.startswith(provision.CROS_VERSION_PREFIX)] |
Dan Shi | 81ddc42 | 2016-09-09 13:58:31 -0700 | [diff] [blame] | 283 | if labels_to_remove: |
| 284 | AFE.run('host_remove_labels', id=host.id, labels=labels_to_remove) |
Dan Shi | 47d3288 | 2014-12-22 16:25:05 -0800 | [diff] [blame] | 285 | |
Shuqian Zhao | d01fad0 | 2016-11-18 10:00:22 -0800 | [diff] [blame] | 286 | # Test repair work flow on shards, powerwash test will timeout after 7m. |
Kevin Cheng | 6e4c264 | 2015-12-11 09:45:57 -0800 | [diff] [blame] | 287 | if use_shard and not create_and_return: |
Shuqian Zhao | d01fad0 | 2016-11-18 10:00:22 -0800 | [diff] [blame] | 288 | powerwash_dut_to_test_repair(host.hostname, timeout=420) |
Kevin Cheng | 6e4c264 | 2015-12-11 09:45:57 -0800 | [diff] [blame] | 289 | |
Dan Shi | ef1a5c0 | 2015-04-07 17:37:09 -0700 | [diff] [blame] | 290 | current_dir = os.path.dirname(os.path.realpath(__file__)) |
| 291 | cmd = [os.path.join(current_dir, RUN_SUITE_COMMAND), |
Dan Shi | 7e04fa8 | 2013-07-25 15:08:48 -0700 | [diff] [blame] | 292 | '-s', suite_name, |
Jakob Juelich | 8f14391 | 2014-10-10 14:08:05 -0700 | [diff] [blame] | 293 | '-b', board, |
| 294 | '-i', build, |
Dan Shi | 7e04fa8 | 2013-07-25 15:08:48 -0700 | [diff] [blame] | 295 | '-p', arguments.pool, |
Allen Li | 64edf06 | 2017-11-27 15:33:54 -0800 | [diff] [blame] | 296 | '--minimum_duts', str(arguments.num_duts[board])] |
Shuqian Zhao | d486477 | 2015-08-06 09:46:22 -0700 | [diff] [blame] | 297 | if create_and_return: |
| 298 | cmd += ['-c'] |
Dan Shi | 7e04fa8 | 2013-07-25 15:08:48 -0700 | [diff] [blame] | 299 | |
| 300 | suite_job_id = None |
Dan Shi | 7e04fa8 | 2013-07-25 15:08:48 -0700 | [diff] [blame] | 301 | |
| 302 | proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, |
| 303 | stderr=subprocess.STDOUT) |
| 304 | |
| 305 | while True: |
| 306 | line = proc.stdout.readline() |
| 307 | |
| 308 | # Break when run_suite process completed. |
| 309 | if not line and proc.poll() != None: |
| 310 | break |
| 311 | print line.rstrip() |
Aviv Keshet | 0d679eb | 2017-11-08 13:25:01 -0800 | [diff] [blame] | 312 | _run_suite_output.append(line.rstrip()) |
Dan Shi | 7e04fa8 | 2013-07-25 15:08:48 -0700 | [diff] [blame] | 313 | |
| 314 | if not suite_job_id: |
| 315 | m = re.match(SUITE_JOB_START_INFO_REGEX, line) |
| 316 | if m and m.group(1): |
| 317 | suite_job_id = int(m.group(1)) |
Aviv Keshet | 0d679eb | 2017-11-08 13:25:01 -0800 | [diff] [blame] | 318 | _all_suite_ids.append(suite_job_id) |
Dan Shi | 7e04fa8 | 2013-07-25 15:08:48 -0700 | [diff] [blame] | 319 | |
| 320 | if not suite_job_id: |
| 321 | raise TestPushException('Failed to retrieve suite job ID.') |
Dan Shi | a8da760 | 2014-05-09 15:18:15 -0700 | [diff] [blame] | 322 | |
Shuqian Zhao | d486477 | 2015-08-06 09:46:22 -0700 | [diff] [blame] | 323 | # If create_and_return specified, wait for the suite to finish. |
| 324 | if create_and_return: |
| 325 | end = time.time() + arguments.timeout_min * 60 |
Dan Shi | efd403e | 2016-02-03 11:37:02 -0800 | [diff] [blame] | 326 | while not AFE.get_jobs(id=suite_job_id, finished=True): |
Shuqian Zhao | d486477 | 2015-08-06 09:46:22 -0700 | [diff] [blame] | 327 | if time.time() < end: |
| 328 | time.sleep(10) |
| 329 | else: |
Dan Shi | efd403e | 2016-02-03 11:37:02 -0800 | [diff] [blame] | 330 | AFE.run('abort_host_queue_entries', job=suite_job_id) |
Shuqian Zhao | d486477 | 2015-08-06 09:46:22 -0700 | [diff] [blame] | 331 | raise TestPushException( |
| 332 | 'Asynchronous suite triggered by create_and_return ' |
| 333 | 'flag has timed out after %d mins. Aborting it.' % |
| 334 | arguments.timeout_min) |
| 335 | |
Dan Shi | a8da760 | 2014-05-09 15:18:15 -0700 | [diff] [blame] | 336 | print 'Suite job %s is completed.' % suite_job_id |
Dan Shi | 7e04fa8 | 2013-07-25 15:08:48 -0700 | [diff] [blame] | 337 | return suite_job_id |
| 338 | |
| 339 | |
Dan Shi | a8da760 | 2014-05-09 15:18:15 -0700 | [diff] [blame] | 340 | def check_dut_image(build, suite_job_id): |
| 341 | """Confirm all DUTs used for the suite are imaged to expected build. |
| 342 | |
| 343 | @param build: Expected build to be imaged. |
| 344 | @param suite_job_id: job ID of the suite job. |
| 345 | @raise TestPushException: If a DUT does not have expected build imaged. |
| 346 | """ |
| 347 | print 'Checking image installed in DUTs...' |
| 348 | job_ids = [job.id for job in |
| 349 | models.Job.objects.filter(parent_job_id=suite_job_id)] |
| 350 | hqes = [models.HostQueueEntry.objects.filter(job_id=job_id)[0] |
| 351 | for job_id in job_ids] |
| 352 | hostnames = set([hqe.host.hostname for hqe in hqes]) |
| 353 | for hostname in hostnames: |
Prathmesh Prabhu | f10f41a | 2017-04-21 11:52:16 -0700 | [diff] [blame] | 354 | found_build = site_utils.get_build_from_afe(hostname, AFE) |
| 355 | if found_build != build: |
Dan Shi | a8da760 | 2014-05-09 15:18:15 -0700 | [diff] [blame] | 356 | raise TestPushException('DUT is not imaged properly. Host %s has ' |
| 357 | 'build %s, while build %s is expected.' % |
Prathmesh Prabhu | f10f41a | 2017-04-21 11:52:16 -0700 | [diff] [blame] | 358 | (hostname, found_build, build)) |
Dan Shi | a8da760 | 2014-05-09 15:18:15 -0700 | [diff] [blame] | 359 | |
| 360 | |
Shuqian Zhao | d486477 | 2015-08-06 09:46:22 -0700 | [diff] [blame] | 361 | def test_suite(suite_name, expected_results, arguments, use_shard=False, |
Richard Barnette | b12413a | 2018-04-25 01:00:27 +0000 | [diff] [blame] | 362 | create_and_return=False): |
Dan Shi | 7e04fa8 | 2013-07-25 15:08:48 -0700 | [diff] [blame] | 363 | """Call run_suite to start a suite job and verify results. |
| 364 | |
| 365 | @param suite_name: Name of a suite, e.g., dummy |
| 366 | @param expected_results: A dictionary of test name to test result. |
| 367 | @param arguments: Arguments for run_suite command. |
Jakob Juelich | 8f14391 | 2014-10-10 14:08:05 -0700 | [diff] [blame] | 368 | @param use_shard: If true, suite is scheduled for shard board. |
Shuqian Zhao | d486477 | 2015-08-06 09:46:22 -0700 | [diff] [blame] | 369 | @param create_and_return: If True, run_suite just creates the suite, print |
| 370 | the job id, then finish immediately. |
Dan Shi | 7e04fa8 | 2013-07-25 15:08:48 -0700 | [diff] [blame] | 371 | """ |
Shuqian Zhao | d486477 | 2015-08-06 09:46:22 -0700 | [diff] [blame] | 372 | suite_job_id = do_run_suite(suite_name, arguments, use_shard, |
Richard Barnette | b12413a | 2018-04-25 01:00:27 +0000 | [diff] [blame] | 373 | create_and_return) |
Dan Shi | 7e04fa8 | 2013-07-25 15:08:48 -0700 | [diff] [blame] | 374 | |
Dan Shi | a8da760 | 2014-05-09 15:18:15 -0700 | [diff] [blame] | 375 | # Confirm all DUTs used for the suite are imaged to expected build. |
Jakob Juelich | 8f14391 | 2014-10-10 14:08:05 -0700 | [diff] [blame] | 376 | # hqe.host_id for jobs running in shard is not synced back to master db, |
| 377 | # therefore, skip verifying dut build for jobs running in shard. |
Richard Barnette | b12413a | 2018-04-25 01:00:27 +0000 | [diff] [blame] | 378 | build_expected = arguments.build |
| 379 | if not use_shard: |
Dan Shi | 81ddc42 | 2016-09-09 13:58:31 -0700 | [diff] [blame] | 380 | check_dut_image(build_expected, suite_job_id) |
Dan Shi | a8da760 | 2014-05-09 15:18:15 -0700 | [diff] [blame] | 381 | |
Shuqian Zhao | 327b695 | 2016-09-12 10:42:03 -0700 | [diff] [blame] | 382 | # Verify test results are the expected results. |
| 383 | verify_test_results(suite_job_id, expected_results) |
| 384 | |
| 385 | |
| 386 | def verify_test_results(job_id, expected_results): |
| 387 | """Verify the test results with the expected results. |
| 388 | |
| 389 | @param job_id: id of the running jobs. For suite job, it is suite_job_id. |
| 390 | @param expected_results: A dictionary of test name to test result. |
| 391 | @raise TestPushException: If verify fails. |
| 392 | """ |
Dan Shi | a8da760 | 2014-05-09 15:18:15 -0700 | [diff] [blame] | 393 | print 'Comparing test results...' |
Shuqian Zhao | 327b695 | 2016-09-12 10:42:03 -0700 | [diff] [blame] | 394 | test_views = site_utils.get_test_views_from_tko(job_id, TKO) |
Dan Shi | 7e04fa8 | 2013-07-25 15:08:48 -0700 | [diff] [blame] | 395 | |
| 396 | mismatch_errors = [] |
| 397 | extra_test_errors = [] |
| 398 | |
| 399 | found_keys = set() |
Shuqian Zhao | 327b695 | 2016-09-12 10:42:03 -0700 | [diff] [blame] | 400 | for test_name, test_status in test_views.items(): |
Dan Shi | 7e04fa8 | 2013-07-25 15:08:48 -0700 | [diff] [blame] | 401 | print "%s%s" % (test_name.ljust(30), test_status) |
Dan Shi | 80b6ec0 | 2016-07-21 15:49:18 -0700 | [diff] [blame] | 402 | # platform_InstallTestImage test may exist in old builds. |
| 403 | if re.search('platform_InstallTestImage_SERVER_JOB$', test_name): |
| 404 | continue |
Dan Shi | 7e04fa8 | 2013-07-25 15:08:48 -0700 | [diff] [blame] | 405 | test_found = False |
| 406 | for key,val in expected_results.items(): |
| 407 | if re.search(key, test_name): |
| 408 | test_found = True |
| 409 | found_keys.add(key) |
Dan Shi | 7e04fa8 | 2013-07-25 15:08:48 -0700 | [diff] [blame] | 410 | if val != test_status: |
| 411 | error = ('%s Expected: [%s], Actual: [%s]' % |
| 412 | (test_name, val, test_status)) |
| 413 | mismatch_errors.append(error) |
| 414 | if not test_found: |
| 415 | extra_test_errors.append(test_name) |
| 416 | |
| 417 | missing_test_errors = set(expected_results.keys()) - found_keys |
Dan Shi | dc9eb17 | 2014-12-09 16:05:02 -0800 | [diff] [blame] | 418 | for exception in IGNORE_MISSING_TESTS: |
| 419 | try: |
| 420 | missing_test_errors.remove(exception) |
| 421 | except KeyError: |
| 422 | pass |
| 423 | |
Dan Shi | 7e04fa8 | 2013-07-25 15:08:48 -0700 | [diff] [blame] | 424 | summary = [] |
| 425 | if mismatch_errors: |
| 426 | summary.append(('Results of %d test(s) do not match expected ' |
| 427 | 'values:') % len(mismatch_errors)) |
| 428 | summary.extend(mismatch_errors) |
| 429 | summary.append('\n') |
| 430 | |
| 431 | if extra_test_errors: |
| 432 | summary.append('%d test(s) are not expected to be run:' % |
| 433 | len(extra_test_errors)) |
| 434 | summary.extend(extra_test_errors) |
| 435 | summary.append('\n') |
| 436 | |
| 437 | if missing_test_errors: |
| 438 | summary.append('%d test(s) are missing from the results:' % |
| 439 | len(missing_test_errors)) |
| 440 | summary.extend(missing_test_errors) |
| 441 | summary.append('\n') |
| 442 | |
| 443 | # Test link to log can be loaded. |
Shuqian Zhao | 327b695 | 2016-09-12 10:42:03 -0700 | [diff] [blame] | 444 | job_name = '%s-%s' % (job_id, getpass.getuser()) |
Prathmesh Prabhu | cd246f5 | 2018-01-03 13:45:48 -0800 | [diff] [blame] | 445 | log_link = URL_PATTERN % (rpc_client_lib.add_protocol(URL_HOST), job_name) |
Dan Shi | 7e04fa8 | 2013-07-25 15:08:48 -0700 | [diff] [blame] | 446 | try: |
| 447 | urllib2.urlopen(log_link).read() |
| 448 | except urllib2.URLError: |
| 449 | summary.append('Failed to load page for link to log: %s.' % log_link) |
| 450 | |
| 451 | if summary: |
| 452 | raise TestPushException('\n'.join(summary)) |
| 453 | |
| 454 | |
Dan Shi | ef1a5c0 | 2015-04-07 17:37:09 -0700 | [diff] [blame] | 455 | def test_suite_wrapper(queue, suite_name, expected_results, arguments, |
Richard Barnette | b12413a | 2018-04-25 01:00:27 +0000 | [diff] [blame] | 456 | use_shard=False, create_and_return=False): |
Dan Shi | ef1a5c0 | 2015-04-07 17:37:09 -0700 | [diff] [blame] | 457 | """Wrapper to call test_suite. Handle exception and pipe it to parent |
| 458 | process. |
| 459 | |
| 460 | @param queue: Queue to save exception to be accessed by parent process. |
| 461 | @param suite_name: Name of a suite, e.g., dummy |
| 462 | @param expected_results: A dictionary of test name to test result. |
| 463 | @param arguments: Arguments for run_suite command. |
| 464 | @param use_shard: If true, suite is scheduled for shard board. |
Shuqian Zhao | d486477 | 2015-08-06 09:46:22 -0700 | [diff] [blame] | 465 | @param create_and_return: If True, run_suite just creates the suite, print |
| 466 | the job id, then finish immediately. |
Dan Shi | ef1a5c0 | 2015-04-07 17:37:09 -0700 | [diff] [blame] | 467 | """ |
| 468 | try: |
Shuqian Zhao | d486477 | 2015-08-06 09:46:22 -0700 | [diff] [blame] | 469 | test_suite(suite_name, expected_results, arguments, use_shard, |
Richard Barnette | b12413a | 2018-04-25 01:00:27 +0000 | [diff] [blame] | 470 | create_and_return) |
Allen Li | 64edf06 | 2017-11-27 15:33:54 -0800 | [diff] [blame] | 471 | except Exception: |
Dan Shi | ef1a5c0 | 2015-04-07 17:37:09 -0700 | [diff] [blame] | 472 | # Store the whole exc_info leads to a PicklingError. |
| 473 | except_type, except_value, tb = sys.exc_info() |
| 474 | queue.put((except_type, except_value, traceback.extract_tb(tb))) |
| 475 | |
| 476 | |
Dan Shi | ef1a5c0 | 2015-04-07 17:37:09 -0700 | [diff] [blame] | 477 | def check_queue(queue): |
| 478 | """Check the queue for any exception being raised. |
| 479 | |
| 480 | @param queue: Queue used to store exception for parent process to access. |
| 481 | @raise: Any exception found in the queue. |
| 482 | """ |
| 483 | if queue.empty(): |
| 484 | return |
| 485 | exc_info = queue.get() |
| 486 | # Raise the exception with original backtrace. |
| 487 | print 'Original stack trace of the exception:\n%s' % exc_info[2] |
| 488 | raise exc_info[0](exc_info[1]) |
| 489 | |
| 490 | |
Shuqian Zhao | 7b2daea | 2016-10-25 13:31:06 -0700 | [diff] [blame] | 491 | def get_head_of_repos(repos): |
| 492 | """Get HEAD of updated repos, currently are autotest and chromite repos |
| 493 | |
| 494 | @param repos: a map of repo name to the path of the repo. E.g. |
| 495 | {'autotest': '/usr/local/autotest'} |
| 496 | @return: a map of repo names to the current HEAD of that repo. |
| 497 | """ |
| 498 | @contextmanager |
| 499 | def cd(new_wd): |
| 500 | """Helper function to change working directory. |
| 501 | |
| 502 | @param new_wd: new working directory that switch to. |
| 503 | """ |
| 504 | prev_wd = os.getcwd() |
| 505 | os.chdir(os.path.expanduser(new_wd)) |
| 506 | try: |
| 507 | yield |
| 508 | finally: |
| 509 | os.chdir(prev_wd) |
| 510 | |
| 511 | updated_repo_heads = {} |
| 512 | for repo_name, path_to_repo in repos.iteritems(): |
| 513 | with cd(path_to_repo): |
| 514 | head = subprocess.check_output('git rev-parse HEAD', |
| 515 | shell=True).strip() |
| 516 | updated_repo_heads[repo_name] = head |
| 517 | return updated_repo_heads |
| 518 | |
| 519 | |
Shuqian Zhao | 80d3271 | 2016-11-11 16:37:36 -0800 | [diff] [blame] | 520 | def push_prod_next_branch(updated_repo_heads): |
| 521 | """push prod-next branch to the tested HEAD after all tests pass. |
| 522 | |
| 523 | The push command must be ran as PUSH_USER, since only PUSH_USER has the |
| 524 | right to push branches. |
| 525 | |
| 526 | @param updated_repo_heads: a map of repo names to tested HEAD of that repo. |
| 527 | """ |
| 528 | # prod-next branch for every repo is downloaded under PUSH_USER home dir. |
Shuqian Zhao | aa0301c | 2016-11-21 09:46:41 -0800 | [diff] [blame] | 529 | cmd = ('cd ~/{repo}; git pull; git rebase {hash} prod-next;' |
| 530 | 'git push origin prod-next') |
Shuqian Zhao | 80d3271 | 2016-11-11 16:37:36 -0800 | [diff] [blame] | 531 | run_push_as_push_user = "sudo su - %s -c '%s'" % (PUSH_USER, cmd) |
| 532 | |
| 533 | for repo_name, test_hash in updated_repo_heads.iteritems(): |
| 534 | push_cmd = run_push_as_push_user.format(hash=test_hash, repo=repo_name) |
| 535 | print 'Pushing %s prod-next branch to %s' % (repo_name, test_hash) |
| 536 | print subprocess.check_output(push_cmd, stderr=subprocess.STDOUT, |
| 537 | shell=True) |
| 538 | |
| 539 | |
Prathmesh Prabhu | bac5be0 | 2018-01-09 11:38:23 -0800 | [diff] [blame] | 540 | def _run_test_suites(arguments): |
| 541 | """Run the actual tests that comprise the test_push.""" |
| 542 | # Use daemon flag will kill child processes when parent process fails. |
| 543 | use_daemon = not arguments.continue_on_failure |
| 544 | queue = multiprocessing.Queue() |
| 545 | |
| 546 | push_to_prod_suite = multiprocessing.Process( |
| 547 | target=test_suite_wrapper, |
| 548 | args=(queue, PUSH_TO_PROD_SUITE, EXPECTED_TEST_RESULTS, |
| 549 | arguments)) |
| 550 | push_to_prod_suite.daemon = use_daemon |
| 551 | push_to_prod_suite.start() |
| 552 | |
| 553 | # suite test with --create_and_return flag |
| 554 | asynchronous_suite = multiprocessing.Process( |
| 555 | target=test_suite_wrapper, |
| 556 | args=(queue, DUMMY_SUITE, EXPECTED_TEST_RESULTS_DUMMY, |
| 557 | arguments, True, True)) |
| 558 | asynchronous_suite.daemon = True |
| 559 | asynchronous_suite.start() |
| 560 | |
| 561 | while push_to_prod_suite.is_alive() or asynchronous_suite.is_alive(): |
| 562 | check_queue(queue) |
| 563 | time.sleep(5) |
| 564 | check_queue(queue) |
| 565 | push_to_prod_suite.join() |
| 566 | asynchronous_suite.join() |
| 567 | |
| 568 | |
Shuqian Zhao | 0de876d | 2018-01-31 11:53:34 -0800 | [diff] [blame] | 569 | def check_service_crash(respawn_limit, start_time): |
| 570 | """Check whether scheduler or host_scheduler crash during testing. |
| 571 | |
| 572 | Since the testing push is kicked off at the beginning of a given hour, the way |
| 573 | to check whether a service is crashed is to check whether the times of the |
| 574 | service being respawn during testing push is over the respawn_limit. |
| 575 | |
| 576 | @param respawn_limit: The maximum number of times the service is allowed to |
| 577 | be respawn. |
| 578 | @param start_time: The time that testing push is kicked off. |
| 579 | """ |
| 580 | def _parse(filename_prefix, filename): |
| 581 | """Helper method to parse the time of the log. |
| 582 | |
| 583 | @param filename_prefix: The prefix of the filename. |
| 584 | @param filename: The name of the log file. |
| 585 | """ |
| 586 | return datetime.datetime.strptime(filename[len(filename_prefix):], |
| 587 | "%Y-%m-%d-%H.%M.%S") |
| 588 | |
| 589 | services = ['scheduler', 'host_scheduler'] |
| 590 | logs = os.listdir('%s/logs/' % AUTOTEST_DIR) |
| 591 | curr_time = datetime.datetime.now() |
| 592 | |
| 593 | error_msg = '' |
| 594 | for service in services: |
| 595 | log_prefix = '%s.log.' % service |
| 596 | respawn_count = sum(1 for l in logs if l.startswith(log_prefix) |
| 597 | and start_time <= _parse(log_prefix, l) <= curr_time) |
| 598 | |
| 599 | if respawn_count > respawn_limit: |
| 600 | error_msg += ('%s has been respawned %s times during testing push at %s. ' |
| 601 | 'It is very likely crashed. Please check!\n' % |
| 602 | (service, respawn_count, |
| 603 | start_time.strftime("%Y-%m-%d-%H"))) |
| 604 | if error_msg: |
| 605 | raise TestPushException(error_msg) |
| 606 | |
| 607 | |
Prathmesh Prabhu | bac5be0 | 2018-01-09 11:38:23 -0800 | [diff] [blame] | 608 | def _promote_prod_next_refs(): |
| 609 | """Updates prod-next branch on relevant repos.""" |
| 610 | updated_repo_heads = get_head_of_repos(UPDATED_REPOS) |
| 611 | push_prod_next_branch(updated_repo_heads) |
| 612 | return updated_repo_heads |
| 613 | |
| 614 | |
| 615 | _SUCCESS_MSG = """ |
| 616 | All tests completed successfully, the prod branch of the following repos is |
| 617 | ready to be pushed to the hash list below. |
| 618 | |
| 619 | %(updated_repos_msg)s |
| 620 | |
| 621 | Instructions for pushing to prod are available at |
| 622 | https://goto.google.com/autotest-to-prod |
| 623 | """ |
| 624 | |
| 625 | |
Shuqian Zhao | 5696954 | 2017-05-30 12:56:57 -0700 | [diff] [blame] | 626 | def _main(arguments): |
Prathmesh Prabhu | bac5be0 | 2018-01-09 11:38:23 -0800 | [diff] [blame] | 627 | """Run test and promote repo branches if tests succeed. |
Shuqian Zhao | 5696954 | 2017-05-30 12:56:57 -0700 | [diff] [blame] | 628 | |
| 629 | @param arguments: command line arguments. |
| 630 | """ |
Aviv Keshet | 0d679eb | 2017-11-08 13:25:01 -0800 | [diff] [blame] | 631 | |
| 632 | # TODO Use chromite.lib.parallel.Manager instead, to workaround the |
| 633 | # too-long-tmp-path problem. |
| 634 | mpmanager = multiprocessing.Manager() |
Prathmesh Prabhu | bac5be0 | 2018-01-09 11:38:23 -0800 | [diff] [blame] | 635 | # These are globals used by other functions in this module to communicate |
| 636 | # back from worker processes. |
| 637 | global _run_suite_output |
Aviv Keshet | 0d679eb | 2017-11-08 13:25:01 -0800 | [diff] [blame] | 638 | _run_suite_output = mpmanager.list() |
Prathmesh Prabhu | bac5be0 | 2018-01-09 11:38:23 -0800 | [diff] [blame] | 639 | global _all_suite_ids |
Aviv Keshet | 0d679eb | 2017-11-08 13:25:01 -0800 | [diff] [blame] | 640 | _all_suite_ids = mpmanager.list() |
| 641 | |
Dan Shi | 7e04fa8 | 2013-07-25 15:08:48 -0700 | [diff] [blame] | 642 | try: |
Shuqian Zhao | 0de876d | 2018-01-31 11:53:34 -0800 | [diff] [blame] | 643 | start_time = datetime.datetime.now() |
Shuqian Zhao | 06deae0 | 2017-02-28 09:55:59 -0800 | [diff] [blame] | 644 | reverify_all_push_duts() |
Prathmesh Prabhu | bac5be0 | 2018-01-09 11:38:23 -0800 | [diff] [blame] | 645 | time.sleep(15) # Wait for the verify test to start. |
Shuqian Zhao | f239b31 | 2017-12-05 16:45:02 -0800 | [diff] [blame] | 646 | check_dut_inventory(arguments.num_duts, arguments.pool) |
Prathmesh Prabhu | bac5be0 | 2018-01-09 11:38:23 -0800 | [diff] [blame] | 647 | _run_test_suites(arguments) |
Shuqian Zhao | 91b2014 | 2018-02-09 10:10:54 -0800 | [diff] [blame] | 648 | check_service_crash(arguments.service_respawn_limit, start_time) |
Prathmesh Prabhu | bac5be0 | 2018-01-09 11:38:23 -0800 | [diff] [blame] | 649 | updated_repo_heads = _promote_prod_next_refs() |
| 650 | updated_repos_msg = '\n'.join( |
| 651 | ['%s: %s' % (k, v) for k, v in updated_repo_heads.iteritems()]) |
| 652 | print _SUCCESS_MSG % {'updated_repos_msg': updated_repos_msg} |
| 653 | except Exception: |
Shuqian Zhao | 676ed6f | 2016-09-21 14:20:50 -0700 | [diff] [blame] | 654 | # Abort running jobs when choose not to continue when there is failure. |
| 655 | if not arguments.continue_on_failure: |
Aviv Keshet | 0d679eb | 2017-11-08 13:25:01 -0800 | [diff] [blame] | 656 | for suite_id in _all_suite_ids: |
Shuqian Zhao | 676ed6f | 2016-09-21 14:20:50 -0700 | [diff] [blame] | 657 | if AFE.get_jobs(id=suite_id, finished=False): |
| 658 | AFE.run('abort_host_queue_entries', job=suite_id) |
Dan Shi | 7e04fa8 | 2013-07-25 15:08:48 -0700 | [diff] [blame] | 659 | raise |
Shuqian Zhao | f794c49 | 2017-01-06 16:27:23 -0800 | [diff] [blame] | 660 | finally: |
Shuqian Zhao | d2a99f0 | 2016-09-22 13:31:30 -0700 | [diff] [blame] | 661 | # Reverify all the hosts |
Shuqian Zhao | 06deae0 | 2017-02-28 09:55:59 -0800 | [diff] [blame] | 662 | reverify_all_push_duts() |
Dan Shi | 7e04fa8 | 2013-07-25 15:08:48 -0700 | [diff] [blame] | 663 | |
Dan Shi | 7e04fa8 | 2013-07-25 15:08:48 -0700 | [diff] [blame] | 664 | |
Shuqian Zhao | 5696954 | 2017-05-30 12:56:57 -0700 | [diff] [blame] | 665 | def main(): |
| 666 | """Entry point.""" |
Richard Barnette | 2af8221 | 2018-04-20 15:11:54 -0700 | [diff] [blame] | 667 | arguments = parse_arguments(sys.argv) |
Shuqian Zhao | 034d85e | 2017-06-01 11:57:39 -0700 | [diff] [blame] | 668 | with ts_mon_config.SetupTsMonGlobalState(service_name='test_push', |
| 669 | indirect=True): |
Prathmesh Prabhu | bac5be0 | 2018-01-09 11:38:23 -0800 | [diff] [blame] | 670 | test_push_success = False |
| 671 | try: |
| 672 | _main(arguments) |
| 673 | test_push_success = True |
| 674 | finally: |
| 675 | metrics.Counter('chromeos/autotest/test_push/completed').increment( |
| 676 | fields={'success': test_push_success}) |
| 677 | |
Shuqian Zhao | 5696954 | 2017-05-30 12:56:57 -0700 | [diff] [blame] | 678 | |
Dan Shi | 7e04fa8 | 2013-07-25 15:08:48 -0700 | [diff] [blame] | 679 | if __name__ == '__main__': |
Prathmesh Prabhu | bac5be0 | 2018-01-09 11:38:23 -0800 | [diff] [blame] | 680 | main() |