Chris Masone | 24b80f1 | 2012-02-14 14:18:01 -0800 | [diff] [blame] | 1 | #!/usr/bin/python |
| 2 | # |
| 3 | # Copyright (c) 2012 The Chromium OS Authors. All rights reserved. |
| 4 | # Use of this source code is governed by a BSD-style license that can be |
| 5 | # found in the LICENSE file. |
| 6 | |
| 7 | """Tool for running suites of tests and waiting for completion. |
| 8 | |
| 9 | The desired test suite will be scheduled with autotest, and then |
| 10 | this tool will block until the job is complete, printing a summary |
| 11 | at the end. Error conditions result in exceptions. |
| 12 | |
| 13 | This is intended for use only with Chrome OS test suits that leverage the |
| 14 | dynamic suite infrastructure in server/cros/dynamic_suite.py. |
| 15 | """ |
| 16 | |
Prashanth B | 923ca26 | 2014-03-14 12:36:29 -0700 | [diff] [blame^] | 17 | import datetime as datetime_base |
beeps | 6f02d19 | 2013-03-22 13:15:49 -0700 | [diff] [blame] | 18 | import getpass, hashlib, logging, optparse, os, re, sys, time |
Chris Masone | cfa7efc | 2012-09-06 16:00:07 -0700 | [diff] [blame] | 19 | from datetime import datetime |
| 20 | |
Chris Masone | 24b80f1 | 2012-02-14 14:18:01 -0800 | [diff] [blame] | 21 | import common |
Chris Masone | cfa7efc | 2012-09-06 16:00:07 -0700 | [diff] [blame] | 22 | |
J. Richard Barnette | 3cbd76b | 2013-11-27 12:11:25 -0800 | [diff] [blame] | 23 | from autotest_lib.client.common_lib import global_config, enum |
| 24 | from autotest_lib.client.common_lib import priorities |
Prashanth B | 923ca26 | 2014-03-14 12:36:29 -0700 | [diff] [blame^] | 25 | from autotest_lib.frontend.afe.json_rpc import proxy |
J. Richard Barnette | 3cbd76b | 2013-11-27 12:11:25 -0800 | [diff] [blame] | 26 | from autotest_lib.server import utils |
Chris Masone | 44e4d6c | 2012-08-15 14:25:53 -0700 | [diff] [blame] | 27 | from autotest_lib.server.cros.dynamic_suite import constants |
Chris Masone | b493555 | 2012-08-14 12:05:54 -0700 | [diff] [blame] | 28 | from autotest_lib.server.cros.dynamic_suite import frontend_wrappers |
| 29 | from autotest_lib.server.cros.dynamic_suite import job_status |
Prashanth B | 923ca26 | 2014-03-14 12:36:29 -0700 | [diff] [blame^] | 30 | from autotest_lib.server.cros.dynamic_suite import reporting_utils |
J. Richard Barnette | e7b98bb | 2013-08-21 16:34:16 -0700 | [diff] [blame] | 31 | from autotest_lib.server.cros.dynamic_suite import tools |
beeps | 6f02d19 | 2013-03-22 13:15:49 -0700 | [diff] [blame] | 32 | from autotest_lib.site_utils.graphite import stats |
Prashanth B | 923ca26 | 2014-03-14 12:36:29 -0700 | [diff] [blame^] | 33 | from autotest_lib.site_utils import diagnosis_utils |
Chris Masone | 24b80f1 | 2012-02-14 14:18:01 -0800 | [diff] [blame] | 34 | |
Chris Masone | 1120cdf | 2012-02-27 17:35:07 -0800 | [diff] [blame] | 35 | CONFIG = global_config.global_config |
| 36 | |
Simran Basi | 22aa9fe | 2012-12-07 16:37:09 -0800 | [diff] [blame] | 37 | # Return code that will be sent back to autotest_rpc_server.py |
Aviv Keshet | 1480c4a | 2013-03-21 16:38:31 -0700 | [diff] [blame] | 38 | RETURN_CODES = enum.Enum('OK', 'ERROR', 'WARNING') |
Simran Basi | 22aa9fe | 2012-12-07 16:37:09 -0800 | [diff] [blame] | 39 | |
Chris Masone | dfa0beba | 2012-03-19 11:41:47 -0700 | [diff] [blame] | 40 | |
Scott Zawalski | 94457b7 | 2012-07-02 18:45:07 -0400 | [diff] [blame] | 41 | def setup_logging(logfile=None): |
| 42 | """Setup basic logging with all logging info stripped. |
| 43 | |
| 44 | Calls to logging will only show the message. No severity is logged. |
| 45 | |
| 46 | @param logfile: If specified dump output to a file as well. |
| 47 | """ |
Dan Shi | 8a2dddd | 2013-08-09 12:58:44 -0700 | [diff] [blame] | 48 | # Remove all existing handlers. client/common_lib/logging_config adds |
| 49 | # a StreamHandler to logger when modules are imported, e.g., |
| 50 | # autotest_lib.client.bin.utils. A new StreamHandler will be added here to |
| 51 | # log only messages, not severity. |
| 52 | logging.getLogger().handlers = [] |
| 53 | |
Scott Zawalski | 94457b7 | 2012-07-02 18:45:07 -0400 | [diff] [blame] | 54 | screen_handler = logging.StreamHandler() |
| 55 | screen_handler.setFormatter(logging.Formatter('%(message)s')) |
| 56 | logging.getLogger().addHandler(screen_handler) |
| 57 | logging.getLogger().setLevel(logging.INFO) |
| 58 | if logfile: |
| 59 | file_handler = logging.FileHandler(logfile) |
| 60 | file_handler.setLevel(logging.DEBUG) |
| 61 | logging.getLogger().addHandler(file_handler) |
Chris Masone | dfa0beba | 2012-03-19 11:41:47 -0700 | [diff] [blame] | 62 | |
| 63 | |
Chris Masone | 24b80f1 | 2012-02-14 14:18:01 -0800 | [diff] [blame] | 64 | def parse_options(): |
Aviv Keshet | 1480c4a | 2013-03-21 16:38:31 -0700 | [diff] [blame] | 65 | #pylint: disable-msg=C0111 |
Zdenek Behan | 77290c3 | 2012-06-26 17:39:47 +0200 | [diff] [blame] | 66 | usage = "usage: %prog [options]" |
Chris Masone | 24b80f1 | 2012-02-14 14:18:01 -0800 | [diff] [blame] | 67 | parser = optparse.OptionParser(usage=usage) |
| 68 | parser.add_option("-b", "--board", dest="board") |
| 69 | parser.add_option("-i", "--build", dest="build") |
Chris Masone | 359c0fd | 2012-03-13 15:18:59 -0700 | [diff] [blame] | 70 | # This should just be a boolean flag, but the autotest "proxy" code |
| 71 | # can't handle flags that don't take arguments. |
Alex Miller | ab33ddb | 2012-10-03 12:56:02 -0700 | [diff] [blame] | 72 | parser.add_option("-n", "--no_wait", dest="no_wait", default="False", |
| 73 | help='Must pass "True" or "False" if used.') |
Alex Miller | 0032e93 | 2013-10-23 12:52:58 -0700 | [diff] [blame] | 74 | # If you really want no pool, --pool="" will do it. USE WITH CARE. |
| 75 | parser.add_option("-p", "--pool", dest="pool", default="suites") |
Chris Masone | 24b80f1 | 2012-02-14 14:18:01 -0800 | [diff] [blame] | 76 | parser.add_option("-s", "--suite_name", dest="name") |
Simran Basi | 25effe3 | 2013-11-26 13:02:11 -0800 | [diff] [blame] | 77 | parser.add_option("-a", "--afe_timeout_mins", dest="afe_timeout_mins", |
| 78 | default=30) |
| 79 | parser.add_option("-t", "--timeout_mins", dest="timeout_mins", |
| 80 | default=1440) |
Chris Masone | 8ac6671 | 2012-02-15 14:21:02 -0800 | [diff] [blame] | 81 | parser.add_option("-d", "--delay_sec", dest="delay_sec", default=10) |
Chris Masone | 986459e | 2012-04-11 11:36:48 -0700 | [diff] [blame] | 82 | parser.add_option("-m", "--mock_job_id", dest="mock_job_id", |
| 83 | help="Skips running suite; creates report for given ID.") |
Alex Miller | 05a2fff | 2012-09-10 10:14:34 -0700 | [diff] [blame] | 84 | parser.add_option("-u", "--num", dest="num", type="int", default=None, |
Chris Masone | 8906ab1 | 2012-07-23 15:37:56 -0700 | [diff] [blame] | 85 | help="Run on at most NUM machines.") |
Alex Miller | f43d0eb | 2012-10-01 13:43:13 -0700 | [diff] [blame] | 86 | # Same boolean flag issue applies here. |
Alex Miller | ab33ddb | 2012-10-03 12:56:02 -0700 | [diff] [blame] | 87 | parser.add_option("-f", "--file_bugs", dest="file_bugs", default='False', |
| 88 | help='File bugs on test failures. Must pass "True" or ' |
| 89 | '"False" if used.') |
Dan Shi | a02181f | 2013-01-29 14:03:32 -0800 | [diff] [blame] | 90 | parser.add_option("-l", "--bypass_labstatus", dest="bypass_labstatus", |
| 91 | action="store_true", help='Bypass lab status check.') |
Alex Miller | 88762a8 | 2013-09-04 15:41:28 -0700 | [diff] [blame] | 92 | # We allow either a number or a string for the priority. This way, if you |
| 93 | # know what you're doing, one can specify a custom priority level between |
| 94 | # other levels. |
| 95 | parser.add_option("-r", "--priority", dest="priority", |
| 96 | default=priorities.Priority.DEFAULT, |
| 97 | action="store", help="Priority of suite") |
Aviv Keshet | 7cd1231 | 2013-07-25 10:25:55 -0700 | [diff] [blame] | 98 | parser.add_option("--suite_args", dest="suite_args", |
| 99 | default=None, action="store", |
| 100 | help="Argument string for suite control file.") |
Alex Miller | f43d0eb | 2012-10-01 13:43:13 -0700 | [diff] [blame] | 101 | |
Chris Masone | 24b80f1 | 2012-02-14 14:18:01 -0800 | [diff] [blame] | 102 | options, args = parser.parse_args() |
| 103 | return parser, options, args |
| 104 | |
| 105 | |
| 106 | def get_pretty_status(status): |
Aviv Keshet | 1480c4a | 2013-03-21 16:38:31 -0700 | [diff] [blame] | 107 | """ |
| 108 | Converts a status string into a pretty-for-printing string. |
| 109 | |
| 110 | @param status: Status to convert. |
| 111 | |
| 112 | @return: Returns pretty string. |
| 113 | GOOD -> [ PASSED ] |
| 114 | TEST_NA -> [ INFO ] |
| 115 | other -> [ FAILED ] |
| 116 | """ |
Chris Masone | 24b80f1 | 2012-02-14 14:18:01 -0800 | [diff] [blame] | 117 | if status == 'GOOD': |
| 118 | return '[ PASSED ]' |
Chris Masone | 8906ab1 | 2012-07-23 15:37:56 -0700 | [diff] [blame] | 119 | elif status == 'TEST_NA': |
| 120 | return '[ INFO ]' |
Chris Masone | 24b80f1 | 2012-02-14 14:18:01 -0800 | [diff] [blame] | 121 | return '[ FAILED ]' |
| 122 | |
Zdenek Behan | 150fbd6 | 2012-04-06 17:20:01 +0200 | [diff] [blame] | 123 | def is_fail_status(status): |
Aviv Keshet | 1480c4a | 2013-03-21 16:38:31 -0700 | [diff] [blame] | 124 | """ |
| 125 | Check if the given status corresponds to a failure. |
| 126 | |
| 127 | @param status: The status to check. (string) |
| 128 | |
| 129 | @return: True if status is FAIL or ERROR. False otherwise. |
| 130 | """ |
Zdenek Behan | 150fbd6 | 2012-04-06 17:20:01 +0200 | [diff] [blame] | 131 | # All the statuses tests can have when they fail. |
beeps | 619189a | 2013-04-10 20:56:09 -0700 | [diff] [blame] | 132 | if status in ['FAIL', 'ERROR', 'ABORT']: |
Zdenek Behan | 150fbd6 | 2012-04-06 17:20:01 +0200 | [diff] [blame] | 133 | return True |
| 134 | return False |
| 135 | |
Chris Masone | 24b80f1 | 2012-02-14 14:18:01 -0800 | [diff] [blame] | 136 | |
beeps | 619189a | 2013-04-10 20:56:09 -0700 | [diff] [blame] | 137 | def get_view_info(suite_job_id, view, build, suite): |
Zdenek Behan | 150fbd6 | 2012-04-06 17:20:01 +0200 | [diff] [blame] | 138 | """ |
| 139 | Parse a view for the slave job name and job_id. |
| 140 | |
| 141 | @param suite_job_id: The job id of our master suite job. |
| 142 | @param view: Test result view. |
beeps | 619189a | 2013-04-10 20:56:09 -0700 | [diff] [blame] | 143 | @param build: build passed in via the -b option. |
| 144 | eg: lumpy-release/R28-3947.0.0 |
| 145 | @param suite: suite passed in via the -s option. |
| 146 | eg: dummy |
| 147 | @return A tuple job_name, experimental, name of the slave test run |
| 148 | described by view. eg: |
| 149 | experimental_dummy_Pass fails: (1130-owner, True, dummy_Pass) |
| 150 | experimental_dummy_Pass aborts: (1130-owner, True, |
| 151 | experimental_dummy_Pass) |
| 152 | dummy_Fail: (1130-owner, False, dummy_Fail.Error) |
Zdenek Behan | 150fbd6 | 2012-04-06 17:20:01 +0200 | [diff] [blame] | 153 | """ |
| 154 | # By default, we are the main suite job since there is no |
| 155 | # keyval entry for our job_name. |
| 156 | job_name = '%s-%s' % (suite_job_id, getpass.getuser()) |
| 157 | experimental = False |
beeps | 619189a | 2013-04-10 20:56:09 -0700 | [diff] [blame] | 158 | test_name = '' |
Dan Shi | d152180 | 2013-05-24 13:08:37 -0700 | [diff] [blame] | 159 | # raw test name is the test_name from tko status view. tko_job_keyvals may |
| 160 | # have a record of the hash of this name mapping to job_id-owner, which can |
| 161 | # be used to reference the test to its job url. The change is made to |
| 162 | # support tests in different jobs within a suite that shares the same test |
| 163 | # class, e.g., AU suite. |
| 164 | raw_test_name = view['test_name'] |
Zdenek Behan | 150fbd6 | 2012-04-06 17:20:01 +0200 | [diff] [blame] | 165 | if 'job_keyvals' in view: |
beeps | 619189a | 2013-04-10 20:56:09 -0700 | [diff] [blame] | 166 | # For a test invocation like: |
| 167 | # NAME = "dummy_Fail" |
| 168 | # job.run_test('dummy_Fail', tag='Error', to_throw='TestError') |
| 169 | # we will: |
| 170 | # Record a keyval of the jobs test_name field: dummy_Fail |
| 171 | # On success, yield a tko status with the tagged name: |
| 172 | # dummy_Fail.Error |
| 173 | # On abort, yield a status (not from tko) with the job name: |
| 174 | # /build/suite/dummy_Fail.Error |
| 175 | # Note the last 2 options include the tag. The tag is seperated |
| 176 | # from the rest of the name with a '.'. The tag or test name can |
| 177 | # also include a /, and we must isolate the tag before we compare it |
| 178 | # to the hashed keyval. Based on this we have the following cases: |
| 179 | # 1. Regular test failure with or without a tag '.': std_job_name is |
| 180 | # set to the view test_name, after removing the tag. |
| 181 | # 2. Regular test Aborts: we know that dynamic_suite inserted a name |
| 182 | # like build/suite/test.name (eg: |
| 183 | # lumpy-release/R28-3947.0.0/dummy/dummy_Fail.Error), so we |
| 184 | # intersect the build/suite/ string we already have with the |
| 185 | # test_name in the view. The name of the aborted test is |
| 186 | # instrumental in generating the job_name, which is used in |
| 187 | # creating a link to the logs. |
| 188 | # 3. Experimental tests, Aborts and Failures: The test view |
| 189 | # corresponding to the afe_job_id of the suite job contains |
| 190 | # stubs for each test in this suite. The names of these jobs |
| 191 | # will contain an experimental prefix if they were aborted; |
| 192 | # If they failed the same names will not contain an experimental |
| 193 | # prefix but we would have hashed the name with a prefix. Eg: |
| 194 | # Test name = experimental_pass |
| 195 | # keyval contains: hash(experimental_pass) |
| 196 | # Fail/Pass view['test_name'] = pass |
| 197 | # Abort view['test_name'] = board/build/experimental_pass |
| 198 | # So we need to add the experimental prefix only if the test was |
| 199 | # aborted. Everything else is the same as [2]. |
| 200 | # 4. Experimental server job failures: eg verify passes, something on |
| 201 | # the DUT crashes, the experimental server job fails to ssh in. We |
| 202 | # need to manually set the experimental flag in this case because the |
| 203 | # server job name isn't recorded in the keyvals. For a normal suite |
| 204 | # the views will contain: SERVER_JOB, try_new_image, test_name. i.e |
| 205 | # the test server jobs should be handled transparently and only the |
| 206 | # suite server job should appear in the view. If a server job fails |
| 207 | # (for an experimental test or otherwise) we insert the server job |
| 208 | # entry into the tko database instead. Put more generally we insert |
| 209 | # the last stage we knew about into the db record associated with |
| 210 | # that suites afe_job_id. This could lead to a view containing: |
| 211 | # SERVER_JOB, try_new_image, |
| 212 | # lumpy-release/R28-4008.0.0/bvt/experimental_pass_SERVER_JOB. |
Alex Miller | 9a1987a | 2013-08-21 15:51:16 -0700 | [diff] [blame] | 213 | # Neither of these operations will stomp on a pristine string. |
Dan Shi | 605f764 | 2013-11-04 16:32:54 -0800 | [diff] [blame] | 214 | test_name = tools.get_test_name(build, suite, view['test_name']) |
Alex Miller | 9a1987a | 2013-08-21 15:51:16 -0700 | [diff] [blame] | 215 | std_job_name = test_name.split('.')[0] |
Chris Masone | 11aae45 | 2012-05-21 16:08:39 -0700 | [diff] [blame] | 216 | |
beeps | 619189a | 2013-04-10 20:56:09 -0700 | [diff] [blame] | 217 | if (job_status.view_is_for_infrastructure_fail(view) and |
| 218 | std_job_name.startswith(constants.EXPERIMENTAL_PREFIX)): |
| 219 | experimental = True |
| 220 | |
| 221 | if std_job_name.startswith(constants.EXPERIMENTAL_PREFIX): |
| 222 | exp_job_name = std_job_name |
| 223 | else: |
| 224 | exp_job_name = constants.EXPERIMENTAL_PREFIX + std_job_name |
Chris Masone | d9f13c5 | 2012-08-29 10:37:08 -0700 | [diff] [blame] | 225 | std_job_hash = hashlib.md5(std_job_name).hexdigest() |
| 226 | exp_job_hash = hashlib.md5(exp_job_name).hexdigest() |
Dan Shi | d152180 | 2013-05-24 13:08:37 -0700 | [diff] [blame] | 227 | raw_test_name_hash = hashlib.md5(raw_test_name).hexdigest() |
Chris Masone | 11aae45 | 2012-05-21 16:08:39 -0700 | [diff] [blame] | 228 | |
beeps | 619189a | 2013-04-10 20:56:09 -0700 | [diff] [blame] | 229 | # In the experimental abort case both these clauses can evaluate |
| 230 | # to True. |
Chris Masone | 11aae45 | 2012-05-21 16:08:39 -0700 | [diff] [blame] | 231 | if std_job_hash in view['job_keyvals']: |
| 232 | job_name = view['job_keyvals'][std_job_hash] |
beeps | 619189a | 2013-04-10 20:56:09 -0700 | [diff] [blame] | 233 | if exp_job_hash in view['job_keyvals']: |
Chris Masone | 11aae45 | 2012-05-21 16:08:39 -0700 | [diff] [blame] | 234 | experimental = True |
| 235 | job_name = view['job_keyvals'][exp_job_hash] |
Dan Shi | d152180 | 2013-05-24 13:08:37 -0700 | [diff] [blame] | 236 | if raw_test_name_hash in view['job_keyvals']: |
| 237 | job_name = view['job_keyvals'][raw_test_name_hash] |
Chris Masone | 11aae45 | 2012-05-21 16:08:39 -0700 | [diff] [blame] | 238 | |
beeps | 619189a | 2013-04-10 20:56:09 -0700 | [diff] [blame] | 239 | # If the name being returned is the test name it needs to include the tag |
| 240 | return job_name, experimental, std_job_name if not test_name else test_name |
Zdenek Behan | 150fbd6 | 2012-04-06 17:20:01 +0200 | [diff] [blame] | 241 | |
| 242 | |
Craig Harrison | 25eb0f3 | 2012-08-23 16:48:49 -0700 | [diff] [blame] | 243 | class LogLink(object): |
J. Richard Barnette | b9c911d | 2013-08-23 11:24:21 -0700 | [diff] [blame] | 244 | """Information needed to record a link in the logs. |
Craig Harrison | 25eb0f3 | 2012-08-23 16:48:49 -0700 | [diff] [blame] | 245 | |
J. Richard Barnette | b9c911d | 2013-08-23 11:24:21 -0700 | [diff] [blame] | 246 | Depending on context and the information provided at |
| 247 | construction time, the link may point to either to log files for |
| 248 | a job, or to a bug filed for a failure in the job. |
Craig Harrison | 25eb0f3 | 2012-08-23 16:48:49 -0700 | [diff] [blame] | 249 | |
J. Richard Barnette | b9c911d | 2013-08-23 11:24:21 -0700 | [diff] [blame] | 250 | @var anchor The link text. |
| 251 | @var url The link url. |
| 252 | @var bug_id Id of a bug to link to, or None. |
| 253 | """ |
| 254 | |
| 255 | _BUG_URL_PREFIX = CONFIG.get_config_value('BUG_REPORTING', |
| 256 | 'tracker_url') |
J. Richard Barnette | b9c911d | 2013-08-23 11:24:21 -0700 | [diff] [blame] | 257 | _URL_PATTERN = CONFIG.get_config_value('CROS', |
| 258 | 'log_url_pattern', type=str) |
| 259 | |
| 260 | |
Fang Deng | 53c6ff5 | 2014-02-24 17:51:24 -0800 | [diff] [blame] | 261 | def __init__(self, anchor, server, job_string, bug_info=None, reason=None): |
J. Richard Barnette | b9c911d | 2013-08-23 11:24:21 -0700 | [diff] [blame] | 262 | """Initialize the LogLink by generating the log URL. |
| 263 | |
| 264 | @param anchor The link text. |
Alex Miller | c7a5952 | 2013-10-30 15:18:57 -0700 | [diff] [blame] | 265 | @param server The hostname of the server this suite ran on. |
J. Richard Barnette | b9c911d | 2013-08-23 11:24:21 -0700 | [diff] [blame] | 266 | @param job_string The job whose logs we'd like to link to. |
| 267 | @param bug_info Info about the bug, if one was filed. |
Fang Deng | 53c6ff5 | 2014-02-24 17:51:24 -0800 | [diff] [blame] | 268 | @param reason A string representing the reason of failure if any. |
Craig Harrison | 25eb0f3 | 2012-08-23 16:48:49 -0700 | [diff] [blame] | 269 | """ |
| 270 | self.anchor = anchor |
Alex Miller | c7a5952 | 2013-10-30 15:18:57 -0700 | [diff] [blame] | 271 | self.url = self._URL_PATTERN % (server, job_string) |
Fang Deng | 53c6ff5 | 2014-02-24 17:51:24 -0800 | [diff] [blame] | 272 | self.reason = reason |
J. Richard Barnette | b9c911d | 2013-08-23 11:24:21 -0700 | [diff] [blame] | 273 | if bug_info: |
| 274 | self.bug_id, self.bug_count = bug_info |
| 275 | else: |
| 276 | self.bug_id = None |
| 277 | self.bug_count = None |
Craig Harrison | 25eb0f3 | 2012-08-23 16:48:49 -0700 | [diff] [blame] | 278 | |
| 279 | |
| 280 | def GenerateBuildbotLink(self): |
J. Richard Barnette | b9c911d | 2013-08-23 11:24:21 -0700 | [diff] [blame] | 281 | """Generate a link formatted to meet buildbot expectations. |
| 282 | |
| 283 | If there is a bug associated with this link, report that; |
| 284 | otherwise report a link to the job logs. |
Craig Harrison | 25eb0f3 | 2012-08-23 16:48:49 -0700 | [diff] [blame] | 285 | |
| 286 | @return A link formatted for the buildbot log annotator. |
| 287 | """ |
J. Richard Barnette | b9c911d | 2013-08-23 11:24:21 -0700 | [diff] [blame] | 288 | if self.bug_id: |
| 289 | url = '%s%s' % (self._BUG_URL_PREFIX, self.bug_id) |
beeps | ad4daf8 | 2013-09-26 10:07:33 -0700 | [diff] [blame] | 290 | if self.bug_count is None: |
Fang Deng | 53c6ff5 | 2014-02-24 17:51:24 -0800 | [diff] [blame] | 291 | anchor_text = '%s (Unknown number of reports)' % ( |
beeps | ad4daf8 | 2013-09-26 10:07:33 -0700 | [diff] [blame] | 292 | self.anchor.strip()) |
| 293 | elif self.bug_count == 1: |
Fang Deng | 53c6ff5 | 2014-02-24 17:51:24 -0800 | [diff] [blame] | 294 | anchor_text = '%s (new)' % self.anchor.strip() |
J. Richard Barnette | b9c911d | 2013-08-23 11:24:21 -0700 | [diff] [blame] | 295 | else: |
Fang Deng | 53c6ff5 | 2014-02-24 17:51:24 -0800 | [diff] [blame] | 296 | anchor_text = '%s (%s reports)' % ( |
J. Richard Barnette | b9c911d | 2013-08-23 11:24:21 -0700 | [diff] [blame] | 297 | self.anchor.strip(), self.bug_count) |
| 298 | else: |
| 299 | url = self.url |
| 300 | anchor_text = self.anchor.strip() |
Fang Deng | 53c6ff5 | 2014-02-24 17:51:24 -0800 | [diff] [blame] | 301 | |
| 302 | if self.reason: |
| 303 | anchor_text = '%s - %s' % (anchor_text, self.reason) |
| 304 | |
| 305 | return '@@@STEP_LINK@%s@%s@@@'% (anchor_text, url) |
Craig Harrison | 25eb0f3 | 2012-08-23 16:48:49 -0700 | [diff] [blame] | 306 | |
| 307 | |
Craig Harrison | d845157 | 2012-08-31 10:29:33 -0700 | [diff] [blame] | 308 | def GenerateTextLink(self): |
J. Richard Barnette | b9c911d | 2013-08-23 11:24:21 -0700 | [diff] [blame] | 309 | """Generate a link to the job's logs, for consumption by a human. |
Craig Harrison | 25eb0f3 | 2012-08-23 16:48:49 -0700 | [diff] [blame] | 310 | |
Craig Harrison | d845157 | 2012-08-31 10:29:33 -0700 | [diff] [blame] | 311 | @return A link formatted for human readability. |
Craig Harrison | 25eb0f3 | 2012-08-23 16:48:49 -0700 | [diff] [blame] | 312 | """ |
Fang Deng | 53c6ff5 | 2014-02-24 17:51:24 -0800 | [diff] [blame] | 313 | return '%s%s' % (self.anchor, self.url) |
Craig Harrison | 25eb0f3 | 2012-08-23 16:48:49 -0700 | [diff] [blame] | 314 | |
| 315 | |
Chris Masone | b61b405 | 2012-04-30 14:35:28 -0700 | [diff] [blame] | 316 | class Timings(object): |
| 317 | """Timings for important events during a suite. |
| 318 | |
| 319 | All timestamps are datetime.datetime objects. |
| 320 | |
| 321 | @var suite_start_time: the time the suite started. |
Chris Masone | b61b405 | 2012-04-30 14:35:28 -0700 | [diff] [blame] | 322 | @var tests_start_time: the time the first test started running. |
| 323 | """ |
beeps | 6f02d19 | 2013-03-22 13:15:49 -0700 | [diff] [blame] | 324 | |
| 325 | # Recorded in create_suite_job as we're staging the components of a |
| 326 | # build on the devserver. Only the artifacts necessary to start |
| 327 | # installing images onto DUT's will be staged when we record |
| 328 | # payload_end_time, the remaining artifacts are downloaded after we kick |
| 329 | # off the reimaging job, at which point we record artifact_end_time. |
Chris Masone | a8066a9 | 2012-05-01 16:52:31 -0700 | [diff] [blame] | 330 | download_start_time = None |
| 331 | payload_end_time = None |
| 332 | artifact_end_time = None |
beeps | 6f02d19 | 2013-03-22 13:15:49 -0700 | [diff] [blame] | 333 | |
| 334 | # The test_start_time, but taken off the view that corresponds to the |
| 335 | # suite instead of an individual test. |
Chris Masone | b61b405 | 2012-04-30 14:35:28 -0700 | [diff] [blame] | 336 | suite_start_time = None |
beeps | 6f02d19 | 2013-03-22 13:15:49 -0700 | [diff] [blame] | 337 | |
beeps | 6f02d19 | 2013-03-22 13:15:49 -0700 | [diff] [blame] | 338 | # Earliest and Latest tests in the set of TestViews passed to us. |
Chris Masone | b61b405 | 2012-04-30 14:35:28 -0700 | [diff] [blame] | 339 | tests_start_time = None |
| 340 | tests_end_time = None |
| 341 | |
| 342 | |
beeps | 6f02d19 | 2013-03-22 13:15:49 -0700 | [diff] [blame] | 343 | def _GetDatetime(self, timing_string, timing_string_format): |
| 344 | """ |
| 345 | Formats the timing_string according to the timing_string_format. |
| 346 | |
| 347 | @param timing_string: A datetime timing string. |
| 348 | @param timing_string_format: Format of the time in timing_string. |
| 349 | @return: A datetime object for the given timing string. |
| 350 | """ |
| 351 | try: |
| 352 | return datetime.strptime(timing_string, timing_string_format) |
| 353 | except TypeError: |
| 354 | return None |
| 355 | |
| 356 | |
Chris Masone | d9f13c5 | 2012-08-29 10:37:08 -0700 | [diff] [blame] | 357 | def RecordTiming(self, view): |
| 358 | """Given a test report view, extract and record pertinent time info. |
Chris Masone | b61b405 | 2012-04-30 14:35:28 -0700 | [diff] [blame] | 359 | |
| 360 | get_detailed_test_views() returns a list of entries that provide |
| 361 | info about the various parts of a suite run. This method can take |
| 362 | any one of these entries and look up timestamp info we might want |
| 363 | and record it. |
| 364 | |
Chris Masone | cfa7efc | 2012-09-06 16:00:07 -0700 | [diff] [blame] | 365 | If timestamps are unavailable, datetime.datetime.min/max will be used. |
| 366 | |
Chris Masone | d9f13c5 | 2012-08-29 10:37:08 -0700 | [diff] [blame] | 367 | @param view: a view dict, as returned by get_detailed_test_views(). |
Chris Masone | b61b405 | 2012-04-30 14:35:28 -0700 | [diff] [blame] | 368 | """ |
Chris Masone | cfa7efc | 2012-09-06 16:00:07 -0700 | [diff] [blame] | 369 | start_candidate = datetime.min |
| 370 | end_candidate = datetime.max |
| 371 | if view['test_started_time']: |
| 372 | start_candidate = datetime.strptime(view['test_started_time'], |
| 373 | job_status.TIME_FMT) |
| 374 | if view['test_finished_time']: |
| 375 | end_candidate = datetime.strptime(view['test_finished_time'], |
| 376 | job_status.TIME_FMT) |
| 377 | |
Chris Masone | d9f13c5 | 2012-08-29 10:37:08 -0700 | [diff] [blame] | 378 | if job_status.view_is_for_suite_prep(view): |
Chris Masone | b61b405 | 2012-04-30 14:35:28 -0700 | [diff] [blame] | 379 | self.suite_start_time = start_candidate |
Chris Masone | b61b405 | 2012-04-30 14:35:28 -0700 | [diff] [blame] | 380 | else: |
| 381 | self._UpdateFirstTestStartTime(start_candidate) |
| 382 | self._UpdateLastTestEndTime(end_candidate) |
Chris Masone | d9f13c5 | 2012-08-29 10:37:08 -0700 | [diff] [blame] | 383 | if 'job_keyvals' in view: |
| 384 | keyvals = view['job_keyvals'] |
beeps | 6f02d19 | 2013-03-22 13:15:49 -0700 | [diff] [blame] | 385 | self.download_start_time = self._GetDatetime( |
| 386 | keyvals.get(constants.DOWNLOAD_STARTED_TIME), |
| 387 | job_status.TIME_FMT) |
| 388 | |
| 389 | self.payload_end_time = self._GetDatetime( |
| 390 | keyvals.get(constants.PAYLOAD_FINISHED_TIME), |
| 391 | job_status.TIME_FMT) |
| 392 | |
| 393 | self.artifact_end_time = self._GetDatetime( |
| 394 | keyvals.get(constants.ARTIFACT_FINISHED_TIME), |
| 395 | job_status.TIME_FMT) |
Chris Masone | 44e4d6c | 2012-08-15 14:25:53 -0700 | [diff] [blame] | 396 | |
Chris Masone | b61b405 | 2012-04-30 14:35:28 -0700 | [diff] [blame] | 397 | |
| 398 | def _UpdateFirstTestStartTime(self, candidate): |
| 399 | """Update self.tests_start_time, iff candidate is an earlier time. |
| 400 | |
| 401 | @param candidate: a datetime.datetime object. |
| 402 | """ |
| 403 | if not self.tests_start_time or candidate < self.tests_start_time: |
| 404 | self.tests_start_time = candidate |
| 405 | |
| 406 | |
| 407 | def _UpdateLastTestEndTime(self, candidate): |
| 408 | """Update self.tests_end_time, iff candidate is a later time. |
| 409 | |
| 410 | @param candidate: a datetime.datetime object. |
| 411 | """ |
| 412 | if not self.tests_end_time or candidate > self.tests_end_time: |
| 413 | self.tests_end_time = candidate |
| 414 | |
| 415 | |
| 416 | def __str__(self): |
| 417 | return ('\n' |
| 418 | 'Suite timings:\n' |
Chris Masone | a8066a9 | 2012-05-01 16:52:31 -0700 | [diff] [blame] | 419 | 'Downloads started at %s\n' |
| 420 | 'Payload downloads ended at %s\n' |
Chris Masone | b61b405 | 2012-04-30 14:35:28 -0700 | [diff] [blame] | 421 | 'Suite started at %s\n' |
Chris Masone | a8066a9 | 2012-05-01 16:52:31 -0700 | [diff] [blame] | 422 | 'Artifact downloads ended (at latest) at %s\n' |
Chris Masone | b61b405 | 2012-04-30 14:35:28 -0700 | [diff] [blame] | 423 | 'Testing started at %s\n' |
Chris Masone | a8066a9 | 2012-05-01 16:52:31 -0700 | [diff] [blame] | 424 | 'Testing ended at %s\n' % (self.download_start_time, |
| 425 | self.payload_end_time, |
| 426 | self.suite_start_time, |
Chris Masone | a8066a9 | 2012-05-01 16:52:31 -0700 | [diff] [blame] | 427 | self.artifact_end_time, |
Chris Masone | b61b405 | 2012-04-30 14:35:28 -0700 | [diff] [blame] | 428 | self.tests_start_time, |
| 429 | self.tests_end_time)) |
| 430 | |
| 431 | |
beeps | 6f02d19 | 2013-03-22 13:15:49 -0700 | [diff] [blame] | 432 | def _GetDataKeyForStatsd(self, suite, build, board): |
| 433 | """ |
| 434 | Constructs the key used for logging statsd timing data. |
| 435 | |
| 436 | @param suite: scheduled suite that we want to record the results of. |
| 437 | @param build: The build string. This string should have a consistent |
| 438 | format eg: x86-mario-release/R26-3570.0.0. If the format of this |
| 439 | string changes such that we can't determine build_type or branch |
| 440 | we give up and use the parametes we're sure of instead (suite, |
| 441 | board). eg: |
| 442 | 1. build = x86-alex-pgo-release/R26-3570.0.0 |
| 443 | branch = 26 |
| 444 | build_type = pgo-release |
| 445 | 2. build = lumpy-paladin/R28-3993.0.0-rc5 |
| 446 | branch = 28 |
| 447 | build_type = paladin |
| 448 | @param board: The board that this suite ran on. |
| 449 | @return: The key used to log timing information in statsd. |
| 450 | """ |
| 451 | try: |
J. Richard Barnette | 3cbd76b | 2013-11-27 12:11:25 -0800 | [diff] [blame] | 452 | _board, build_type, branch = utils.ParseBuildName(build)[:3] |
| 453 | except utils.ParseBuildNameException as e: |
beeps | 6f02d19 | 2013-03-22 13:15:49 -0700 | [diff] [blame] | 454 | logging.error(str(e)) |
| 455 | branch = 'Unknown' |
| 456 | build_type = 'Unknown' |
| 457 | else: |
| 458 | embedded_str = re.search(r'x86-\w+-(.*)', _board) |
| 459 | if embedded_str: |
| 460 | build_type = embedded_str.group(1) + '-' + build_type |
| 461 | |
| 462 | data_key_dict = { |
| 463 | 'board': board, |
| 464 | 'branch': branch, |
| 465 | 'build_type': build_type, |
| 466 | 'suite': suite, |
| 467 | } |
| 468 | return ('run_suite.%(board)s.%(build_type)s.%(branch)s.%(suite)s' |
| 469 | % data_key_dict) |
| 470 | |
| 471 | |
| 472 | def SendResultsToStatsd(self, suite, build, board): |
| 473 | """ |
| 474 | Sends data to statsd. |
| 475 | |
| 476 | 1. Makes a data_key of the form: run_suite.$board.$branch.$suite |
| 477 | eg: stats/gauges/<hostname>/run_suite/<board>/<branch>/<suite>/ |
| 478 | 2. Computes timings for several start and end event pairs. |
Alex Miller | 9a1987a | 2013-08-21 15:51:16 -0700 | [diff] [blame] | 479 | 3. Sends all timing values to statsd. |
beeps | 6f02d19 | 2013-03-22 13:15:49 -0700 | [diff] [blame] | 480 | |
| 481 | @param suite: scheduled suite that we want to record the results of. |
| 482 | @param build: the build that this suite ran on. |
| 483 | eg: 'lumpy-release/R26-3570.0.0' |
| 484 | @param board: the board that this suite ran on. |
| 485 | """ |
| 486 | if sys.version_info < (2, 7): |
| 487 | logging.error('Sending run_suite perf data to statsd requires' |
| 488 | 'python 2.7 or greater.') |
| 489 | return |
| 490 | |
| 491 | data_key = self._GetDataKeyForStatsd(suite, build, board) |
| 492 | |
| 493 | # Since we don't want to try subtracting corrupted datetime values |
| 494 | # we catch TypeErrors in _GetDatetime and insert None instead. This |
| 495 | # means that even if, say, keyvals.get(constants.ARTIFACT_FINISHED_TIME) |
| 496 | # returns a corrupt value the member artifact_end_time is set to None. |
| 497 | if self.download_start_time: |
| 498 | if self.payload_end_time: |
| 499 | stats.Timer(data_key).send('payload_download_time', |
| 500 | (self.payload_end_time - |
| 501 | self.download_start_time).total_seconds()) |
| 502 | |
| 503 | if self.artifact_end_time: |
| 504 | stats.Timer(data_key).send('artifact_download_time', |
| 505 | (self.artifact_end_time - |
| 506 | self.download_start_time).total_seconds()) |
| 507 | |
| 508 | if self.tests_end_time: |
| 509 | if self.suite_start_time: |
| 510 | stats.Timer(data_key).send('suite_run_time', |
| 511 | (self.tests_end_time - |
| 512 | self.suite_start_time).total_seconds()) |
| 513 | |
| 514 | if self.tests_start_time: |
| 515 | stats.Timer(data_key).send('tests_run_time', |
| 516 | (self.tests_end_time - |
| 517 | self.tests_start_time).total_seconds()) |
| 518 | |
beeps | 6f02d19 | 2013-03-22 13:15:49 -0700 | [diff] [blame] | 519 | |
beeps | 619189a | 2013-04-10 20:56:09 -0700 | [diff] [blame] | 520 | def _full_test_name(job_id, view, build, suite): |
| 521 | """ |
| 522 | Generates the full test name for printing to logs and generating a link to |
| 523 | the results. |
Craig Harrison | d845157 | 2012-08-31 10:29:33 -0700 | [diff] [blame] | 524 | |
| 525 | @param job_id: the job id. |
| 526 | @param view: the view for which we are generating the name. |
beeps | 619189a | 2013-04-10 20:56:09 -0700 | [diff] [blame] | 527 | @param build: the build for this invocation of run_suite. |
| 528 | @param suite: the suite for this invocation of run_suite. |
Craig Harrison | d845157 | 2012-08-31 10:29:33 -0700 | [diff] [blame] | 529 | @return The test name, possibly with a descriptive prefix appended. |
| 530 | """ |
beeps | 619189a | 2013-04-10 20:56:09 -0700 | [diff] [blame] | 531 | experimental, test_name = get_view_info(job_id, view, build, suite)[1:] |
| 532 | |
| 533 | # If an experimental test is aborted get_view_info returns a name which |
| 534 | # includes the prefix. |
| 535 | prefix = constants.EXPERIMENTAL_PREFIX if (experimental and |
| 536 | not test_name.startswith(constants.EXPERIMENTAL_PREFIX)) else '' |
| 537 | return prefix + test_name |
Craig Harrison | d845157 | 2012-08-31 10:29:33 -0700 | [diff] [blame] | 538 | |
| 539 | |
Alex Miller | c7a5952 | 2013-10-30 15:18:57 -0700 | [diff] [blame] | 540 | _DEFAULT_AUTOTEST_INSTANCE = CONFIG.get_config_value( |
| 541 | 'SERVER', 'hostname', type=str) |
| 542 | |
| 543 | |
| 544 | def instance_for_pool(pool_name): |
| 545 | """ |
| 546 | Return the hostname of the server that should be used to service a suite |
| 547 | for the specified pool. |
| 548 | |
| 549 | @param pool_name: The pool (without 'pool:' to schedule the suite against. |
| 550 | @return: The correct host that should be used to service this suite run. |
| 551 | """ |
| 552 | return CONFIG.get_config_value( |
| 553 | 'POOL_INSTANCE_SHARDING', pool_name, |
| 554 | default=_DEFAULT_AUTOTEST_INSTANCE) |
| 555 | |
| 556 | |
Chris Masone | 24b80f1 | 2012-02-14 14:18:01 -0800 | [diff] [blame] | 557 | def main(): |
Aviv Keshet | 1480c4a | 2013-03-21 16:38:31 -0700 | [diff] [blame] | 558 | """ |
| 559 | Entry point for run_suite script. |
| 560 | """ |
Chris Masone | 24b80f1 | 2012-02-14 14:18:01 -0800 | [diff] [blame] | 561 | parser, options, args = parse_options() |
Chris Masone | 3a85064 | 2012-07-11 11:11:18 -0700 | [diff] [blame] | 562 | log_name = 'run_suite-default.log' |
Chris Masone | 986459e | 2012-04-11 11:36:48 -0700 | [diff] [blame] | 563 | if not options.mock_job_id: |
Zdenek Behan | 77290c3 | 2012-06-26 17:39:47 +0200 | [diff] [blame] | 564 | if args: |
| 565 | print 'Unknown arguments: ' + str(args) |
| 566 | parser.print_help() |
| 567 | return |
| 568 | if not options.build: |
| 569 | print 'Need to specify which build to use' |
| 570 | parser.print_help() |
| 571 | return |
| 572 | if not options.board: |
| 573 | print 'Need to specify board' |
| 574 | parser.print_help() |
| 575 | return |
| 576 | if not options.name: |
| 577 | print 'Need to specify suite name' |
Chris Masone | 986459e | 2012-04-11 11:36:48 -0700 | [diff] [blame] | 578 | parser.print_help() |
| 579 | return |
Chris Masone | 3a85064 | 2012-07-11 11:11:18 -0700 | [diff] [blame] | 580 | # convert build name from containing / to containing only _ |
| 581 | log_name = 'run_suite-%s.log' % options.build.replace('/', '_') |
| 582 | log_dir = os.path.join(common.autotest_dir, 'logs') |
| 583 | if os.path.exists(log_dir): |
| 584 | log_name = os.path.join(log_dir, log_name) |
Alex Miller | 8e75d0d | 2012-07-31 15:13:32 -0700 | [diff] [blame] | 585 | if options.num is not None and options.num < 1: |
| 586 | print 'Number of machines must be more than 0, if specified.' |
| 587 | parser.print_help() |
| 588 | return |
Alex Miller | ab33ddb | 2012-10-03 12:56:02 -0700 | [diff] [blame] | 589 | if options.no_wait != 'True' and options.no_wait != 'False': |
| 590 | print 'Please specify "True" or "False" for --no_wait.' |
| 591 | parser.print_help() |
| 592 | return |
| 593 | if options.file_bugs != 'True' and options.file_bugs != 'False': |
| 594 | print 'Please specify "True" or "False" for --file_bugs.' |
| 595 | parser.print_help() |
| 596 | return |
Alex Miller | 88762a8 | 2013-09-04 15:41:28 -0700 | [diff] [blame] | 597 | |
| 598 | try: |
| 599 | priority = int(options.priority) |
| 600 | except ValueError: |
| 601 | try: |
| 602 | priority = priorities.Priority.get_value(options.priority) |
| 603 | except AttributeError: |
| 604 | print 'Unknown priority level %s. Try one of %s.' % ( |
| 605 | options.priority, ', '.join(priorities.Priority.names)) |
| 606 | |
Scott Zawalski | 94457b7 | 2012-07-02 18:45:07 -0400 | [diff] [blame] | 607 | setup_logging(logfile=log_name) |
Chris Masone | dfa0beba | 2012-03-19 11:41:47 -0700 | [diff] [blame] | 608 | |
Simran Basi | 22aa9fe | 2012-12-07 16:37:09 -0800 | [diff] [blame] | 609 | try: |
Dan Shi | a02181f | 2013-01-29 14:03:32 -0800 | [diff] [blame] | 610 | if not options.bypass_labstatus: |
J. Richard Barnette | abbe096 | 2013-12-10 18:15:44 -0800 | [diff] [blame] | 611 | utils.check_lab_status(options.build) |
| 612 | except utils.TestLabException as e: |
Alex Miller | c3262f0 | 2013-11-15 11:27:19 -0800 | [diff] [blame] | 613 | logging.warning('Error Message: %s', e) |
Aviv Keshet | 1480c4a | 2013-03-21 16:38:31 -0700 | [diff] [blame] | 614 | return RETURN_CODES.WARNING |
Simran Basi | 22aa9fe | 2012-12-07 16:37:09 -0800 | [diff] [blame] | 615 | |
Alex Miller | c7a5952 | 2013-10-30 15:18:57 -0700 | [diff] [blame] | 616 | instance_server = instance_for_pool(options.pool) |
| 617 | afe = frontend_wrappers.RetryingAFE(server=instance_server, |
Simran Basi | 25effe3 | 2013-11-26 13:02:11 -0800 | [diff] [blame] | 618 | timeout_min=options.afe_timeout_mins, |
Chris Masone | 8ac6671 | 2012-02-15 14:21:02 -0800 | [diff] [blame] | 619 | delay_sec=options.delay_sec) |
Alex Miller | c7a5952 | 2013-10-30 15:18:57 -0700 | [diff] [blame] | 620 | logging.info('Autotest instance: %s', instance_server) |
Chris Masone | 359c0fd | 2012-03-13 15:18:59 -0700 | [diff] [blame] | 621 | |
Dan Shi | 9512241 | 2013-11-12 16:20:33 -0800 | [diff] [blame] | 622 | wait = options.no_wait == 'False' |
| 623 | file_bugs = options.file_bugs == 'True' |
Prashanth B | 923ca26 | 2014-03-14 12:36:29 -0700 | [diff] [blame^] | 624 | logging.info('%s Submitted create_suite_job rpc', |
| 625 | diagnosis_utils.JobTimer.format_time(datetime.now())) |
Chris Masone | 986459e | 2012-04-11 11:36:48 -0700 | [diff] [blame] | 626 | if options.mock_job_id: |
| 627 | job_id = int(options.mock_job_id) |
| 628 | else: |
Alex Miller | 8e75d0d | 2012-07-31 15:13:32 -0700 | [diff] [blame] | 629 | job_id = afe.run('create_suite_job', suite_name=options.name, |
| 630 | board=options.board, build=options.build, |
Alex Miller | f43d0eb | 2012-10-01 13:43:13 -0700 | [diff] [blame] | 631 | check_hosts=wait, pool=options.pool, num=options.num, |
Aviv Keshet | 7cd1231 | 2013-07-25 10:25:55 -0700 | [diff] [blame] | 632 | file_bugs=file_bugs, priority=priority, |
Dan Shi | 9512241 | 2013-11-12 16:20:33 -0800 | [diff] [blame] | 633 | suite_args=options.suite_args, |
Simran Basi | 25effe3 | 2013-11-26 13:02:11 -0800 | [diff] [blame] | 634 | wait_for_results=wait, |
| 635 | timeout_mins=options.timeout_mins) |
Prashanth B | 923ca26 | 2014-03-14 12:36:29 -0700 | [diff] [blame^] | 636 | job_timer = diagnosis_utils.JobTimer( |
| 637 | time.time(), float(options.timeout_mins)) |
| 638 | logging.info('%s Created suite job: %s', |
| 639 | job_timer.format_time(job_timer.job_created_time), |
| 640 | reporting_utils.link_job( |
| 641 | job_id, instance_server=instance_server)) |
| 642 | |
Alex Miller | c7a5952 | 2013-10-30 15:18:57 -0700 | [diff] [blame] | 643 | TKO = frontend_wrappers.RetryingTKO(server=instance_server, |
Simran Basi | 25effe3 | 2013-11-26 13:02:11 -0800 | [diff] [blame] | 644 | timeout_min=options.afe_timeout_mins, |
Chris Masone | 8ac6671 | 2012-02-15 14:21:02 -0800 | [diff] [blame] | 645 | delay_sec=options.delay_sec) |
Aviv Keshet | 1480c4a | 2013-03-21 16:38:31 -0700 | [diff] [blame] | 646 | code = RETURN_CODES.OK |
Prashanth B | 923ca26 | 2014-03-14 12:36:29 -0700 | [diff] [blame^] | 647 | rpc_helper = diagnosis_utils.RPCHelper(afe) |
J. Richard Barnette | 712eb40 | 2013-08-13 18:03:00 -0700 | [diff] [blame] | 648 | if wait: |
| 649 | while not afe.get_jobs(id=job_id, finished=True): |
Prashanth B | 923ca26 | 2014-03-14 12:36:29 -0700 | [diff] [blame^] | 650 | # Note that this call logs output, preventing buildbot's |
| 651 | # 9000 second silent timeout from kicking in. Let there be no |
| 652 | # doubt, this is a hack. The timeout is from upstream buildbot and |
| 653 | # this is the easiest work around. |
| 654 | if job_timer.first_past_halftime(): |
| 655 | rpc_helper.diagnose_job(job_id) |
| 656 | logging.info('The suite job has another %s till timeout \n', |
| 657 | job_timer.timeout_hours - job_timer.elapsed_time()) |
Alex Miller | 764227d | 2013-11-15 10:28:56 -0800 | [diff] [blame] | 658 | time.sleep(10) |
J. Richard Barnette | 712eb40 | 2013-08-13 18:03:00 -0700 | [diff] [blame] | 659 | |
Scott Zawalski | 0acfe11 | 2012-03-06 09:21:44 -0500 | [diff] [blame] | 660 | views = TKO.run('get_detailed_test_views', afe_job_id=job_id) |
Dan Shi | f327b91 | 2013-07-18 15:35:05 -0700 | [diff] [blame] | 661 | # The intended behavior is to refrain from recording stats if the suite |
| 662 | # was aborted (either by a user or through the golo rpc). Since all the |
| 663 | # views associated with the afe_job_id of the suite contain the keyvals |
| 664 | # of the suite and not the individual tests themselves, we can achieve |
| 665 | # this without digging through the views. |
| 666 | is_aborted = any([view['job_keyvals'].get('aborted_by') |
| 667 | for view in views]) |
Dan Shi | 76af802 | 2013-10-19 01:59:49 -0700 | [diff] [blame] | 668 | # For hostless job in Starting status, there is no test view associated. |
| 669 | # This can happen when a suite job in Starting status is aborted. When |
| 670 | # the scheduler hits some limit, e.g., max_hostless_jobs_per_drone, |
| 671 | # max_jobs_started_per_cycle, a suite job can stays in Starting status. |
| 672 | if not views: |
| 673 | code = RETURN_CODES.ERROR |
| 674 | returnmessage = RETURN_CODES.get_string(code) |
| 675 | logging.info('\nNo test view was found.\n' |
| 676 | 'Will return from run_suite with status: %s', |
| 677 | returnmessage) |
| 678 | return code |
| 679 | |
beeps | 619189a | 2013-04-10 20:56:09 -0700 | [diff] [blame] | 680 | width = max((len(_full_test_name(job_id, view, options.build, |
Dan Shi | 76af802 | 2013-10-19 01:59:49 -0700 | [diff] [blame] | 681 | options.name)) for view in views)) + 3 |
Scott Zawalski | 0acfe11 | 2012-03-06 09:21:44 -0500 | [diff] [blame] | 682 | |
Chris Masone | d9f13c5 | 2012-08-29 10:37:08 -0700 | [diff] [blame] | 683 | relevant_views = filter(job_status.view_is_relevant, views) |
Scott Zawalski | 0acfe11 | 2012-03-06 09:21:44 -0500 | [diff] [blame] | 684 | if not relevant_views: |
Zdenek Behan | 150fbd6 | 2012-04-06 17:20:01 +0200 | [diff] [blame] | 685 | # The main suite job most likely failed in SERVER_JOB. |
| 686 | relevant_views = views |
Scott Zawalski | 0acfe11 | 2012-03-06 09:21:44 -0500 | [diff] [blame] | 687 | |
Chris Masone | b61b405 | 2012-04-30 14:35:28 -0700 | [diff] [blame] | 688 | timings = Timings() |
Craig Harrison | 25eb0f3 | 2012-08-23 16:48:49 -0700 | [diff] [blame] | 689 | web_links = [] |
| 690 | buildbot_links = [] |
Chris Masone | d9f13c5 | 2012-08-29 10:37:08 -0700 | [diff] [blame] | 691 | for view in relevant_views: |
| 692 | timings.RecordTiming(view) |
| 693 | if job_status.view_is_for_suite_prep(view): |
| 694 | view['test_name'] = 'Suite prep' |
Chris Masone | 3a85064 | 2012-07-11 11:11:18 -0700 | [diff] [blame] | 695 | |
beeps | 619189a | 2013-04-10 20:56:09 -0700 | [diff] [blame] | 696 | job_name, experimental = get_view_info(job_id, view, options.build, |
| 697 | options.name)[:2] |
| 698 | test_view = _full_test_name(job_id, view, options.build, |
| 699 | options.name).ljust(width) |
Chris Masone | d9f13c5 | 2012-08-29 10:37:08 -0700 | [diff] [blame] | 700 | logging.info("%s%s", test_view, get_pretty_status(view['status'])) |
beeps | 8ead53c | 2013-04-26 19:12:46 -0700 | [diff] [blame] | 701 | |
J. Richard Barnette | e7b98bb | 2013-08-21 16:34:16 -0700 | [diff] [blame] | 702 | # It's important that we use the test name in the view |
| 703 | # and not the name returned by full_test_name, as this |
| 704 | # was the name inserted after the test ran, e.g. for an |
| 705 | # aborted test full_test_name will return |
| 706 | # 'experimental_testname' but the view and the bug_id |
| 707 | # keyval will use '/build/suite/experimental_testname'. |
J. Richard Barnette | b9c911d | 2013-08-23 11:24:21 -0700 | [diff] [blame] | 708 | bug_info = tools.get_test_failure_bug_info( |
J. Richard Barnette | e7b98bb | 2013-08-21 16:34:16 -0700 | [diff] [blame] | 709 | view['job_keyvals'], view['test_name']) |
beeps | 8ead53c | 2013-04-26 19:12:46 -0700 | [diff] [blame] | 710 | |
Fang Deng | 53c6ff5 | 2014-02-24 17:51:24 -0800 | [diff] [blame] | 711 | link = LogLink(test_view, instance_server, job_name, |
| 712 | bug_info) |
Craig Harrison | 25eb0f3 | 2012-08-23 16:48:49 -0700 | [diff] [blame] | 713 | web_links.append(link) |
Chris Masone | d9f13c5 | 2012-08-29 10:37:08 -0700 | [diff] [blame] | 714 | |
J. Richard Barnette | 02f815e | 2013-08-22 16:28:32 -0700 | [diff] [blame] | 715 | # Don't show links on the buildbot waterfall for tests with |
| 716 | # GOOD status. |
Chris Masone | d9f13c5 | 2012-08-29 10:37:08 -0700 | [diff] [blame] | 717 | if view['status'] != 'GOOD': |
| 718 | logging.info("%s %s: %s", test_view, view['status'], |
| 719 | view['reason']) |
Fang Deng | 53c6ff5 | 2014-02-24 17:51:24 -0800 | [diff] [blame] | 720 | link.reason = '%s: %s' % (view['status'], view['reason']) |
Chris Masone | 8906ab1 | 2012-07-23 15:37:56 -0700 | [diff] [blame] | 721 | if view['status'] == 'TEST_NA': |
| 722 | # Didn't run; nothing to do here! |
| 723 | continue |
J. Richard Barnette | 02f815e | 2013-08-22 16:28:32 -0700 | [diff] [blame] | 724 | buildbot_links.append(link) |
Aviv Keshet | 1480c4a | 2013-03-21 16:38:31 -0700 | [diff] [blame] | 725 | if code == RETURN_CODES.ERROR: |
Zdenek Behan | 150fbd6 | 2012-04-06 17:20:01 +0200 | [diff] [blame] | 726 | # Failed already, no need to worry further. |
| 727 | continue |
beeps | 619189a | 2013-04-10 20:56:09 -0700 | [diff] [blame] | 728 | |
| 729 | # Any non experimental test that has a status other than WARN |
| 730 | # or GOOD will result in the tree closing. Experimental tests |
| 731 | # will not close the tree, even if they have been aborted. |
beeps | b8117ff | 2013-10-01 13:50:07 -0700 | [diff] [blame] | 732 | if not experimental: |
| 733 | if view['status'] == 'WARN': |
| 734 | code = RETURN_CODES.WARNING |
| 735 | elif is_fail_status(view['status']): |
| 736 | code = RETURN_CODES.ERROR |
Aviv Keshet | 1480c4a | 2013-03-21 16:38:31 -0700 | [diff] [blame] | 737 | |
Dan Shi | f327b91 | 2013-07-18 15:35:05 -0700 | [diff] [blame] | 738 | # Do not record stats for aborted suites. |
Fang Deng | 1cf6234 | 2013-11-22 10:06:32 -0800 | [diff] [blame] | 739 | if not is_aborted and not options.mock_job_id: |
Dan Shi | f327b91 | 2013-07-18 15:35:05 -0700 | [diff] [blame] | 740 | timings.SendResultsToStatsd(options.name, options.build, |
| 741 | options.board) |
Scott Zawalski | 94457b7 | 2012-07-02 18:45:07 -0400 | [diff] [blame] | 742 | logging.info(timings) |
Craig Harrison | 25eb0f3 | 2012-08-23 16:48:49 -0700 | [diff] [blame] | 743 | logging.info('\n' |
| 744 | 'Links to test logs:') |
| 745 | for link in web_links: |
Craig Harrison | d845157 | 2012-08-31 10:29:33 -0700 | [diff] [blame] | 746 | logging.info(link.GenerateTextLink()) |
Aviv Keshet | 1480c4a | 2013-03-21 16:38:31 -0700 | [diff] [blame] | 747 | |
| 748 | try: |
| 749 | returnmessage = RETURN_CODES.get_string(code) |
| 750 | except ValueError: |
| 751 | returnmessage = 'UNKNOWN' |
| 752 | logging.info('\n' |
| 753 | 'Will return from run_suite with status: %s', |
| 754 | returnmessage) |
| 755 | |
Prashanth B | 923ca26 | 2014-03-14 12:36:29 -0700 | [diff] [blame^] | 756 | # There is a minor race condition here where we might have aborted for |
| 757 | # some reason other than a timeout, and the job_timer thinks it's a |
| 758 | # timeout because of the jitter in waiting for results. This shouldn't |
| 759 | # harm us since all diagnose_pool does is log information about a pool. |
| 760 | if job_timer.is_suite_timeout(): |
| 761 | logging.info('\nAttempting to diagnose pool: %s', options.pool) |
| 762 | try: |
| 763 | # Add some jitter to make up for any latency in |
| 764 | # aborting the suite or checking for results. |
| 765 | cutoff = (job_timer.timeout_hours + |
| 766 | datetime_base.timedelta(hours=0.3)) |
| 767 | rpc_helper.diagnose_pool( |
| 768 | options.board, options.pool, cutoff) |
| 769 | except proxy.JSONRPCException as e: |
| 770 | logging.warning('Unable to diagnose suite abort.') |
| 771 | |
Craig Harrison | 25eb0f3 | 2012-08-23 16:48:49 -0700 | [diff] [blame] | 772 | logging.info('\n' |
| 773 | 'Output below this line is for buildbot consumption:') |
| 774 | for link in buildbot_links: |
| 775 | logging.info(link.GenerateBuildbotLink()) |
Chris Masone | d5939fe | 2012-03-13 10:11:06 -0700 | [diff] [blame] | 776 | else: |
Scott Zawalski | 94457b7 | 2012-07-02 18:45:07 -0400 | [diff] [blame] | 777 | logging.info('Created suite job: %r', job_id) |
Alex Miller | a05498f | 2013-11-01 16:16:21 -0700 | [diff] [blame] | 778 | link = LogLink(options.name, instance_server, |
| 779 | '%s-%s' % (job_id, getpass.getuser())) |
Craig Harrison | 25eb0f3 | 2012-08-23 16:48:49 -0700 | [diff] [blame] | 780 | logging.info(link.GenerateBuildbotLink()) |
Scott Zawalski | 94457b7 | 2012-07-02 18:45:07 -0400 | [diff] [blame] | 781 | logging.info('--no_wait specified; Exiting.') |
Chris Masone | 24b80f1 | 2012-02-14 14:18:01 -0800 | [diff] [blame] | 782 | return code |
| 783 | |
| 784 | if __name__ == "__main__": |
| 785 | sys.exit(main()) |